{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9998215029452013, "eval_steps": 500, "global_step": 8402, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00023799607306479442, "grad_norm": 5.3633106806179, "learning_rate": 0.0, "loss": 0.7123, "step": 1 }, { "epoch": 0.00047599214612958885, "grad_norm": 3.8186829684019856, "learning_rate": 3.952569169960474e-08, "loss": 0.6005, "step": 2 }, { "epoch": 0.0007139882191943833, "grad_norm": 3.650677826595722, "learning_rate": 7.905138339920948e-08, "loss": 0.5807, "step": 3 }, { "epoch": 0.0009519842922591777, "grad_norm": 3.7298913667173106, "learning_rate": 1.1857707509881423e-07, "loss": 0.7382, "step": 4 }, { "epoch": 0.0011899803653239722, "grad_norm": 5.228820067839456, "learning_rate": 1.5810276679841897e-07, "loss": 0.6882, "step": 5 }, { "epoch": 0.0014279764383887665, "grad_norm": 3.4388897772378844, "learning_rate": 1.9762845849802374e-07, "loss": 0.5336, "step": 6 }, { "epoch": 0.001665972511453561, "grad_norm": 3.788288992632434, "learning_rate": 2.3715415019762845e-07, "loss": 0.6794, "step": 7 }, { "epoch": 0.0019039685845183554, "grad_norm": 4.769198995741846, "learning_rate": 2.766798418972332e-07, "loss": 0.6865, "step": 8 }, { "epoch": 0.00214196465758315, "grad_norm": 4.131645314597065, "learning_rate": 3.1620553359683794e-07, "loss": 0.6119, "step": 9 }, { "epoch": 0.0023799607306479445, "grad_norm": 3.453158794881233, "learning_rate": 3.5573122529644276e-07, "loss": 0.5578, "step": 10 }, { "epoch": 0.0026179568037127386, "grad_norm": 4.116364371771232, "learning_rate": 3.9525691699604747e-07, "loss": 0.6975, "step": 11 }, { "epoch": 0.002855952876777533, "grad_norm": 4.918270216385396, "learning_rate": 4.347826086956522e-07, "loss": 0.6732, "step": 12 }, { "epoch": 0.0030939489498423276, "grad_norm": 3.817650403686872, "learning_rate": 4.743083003952569e-07, "loss": 0.6094, "step": 13 }, { "epoch": 0.003331945022907122, "grad_norm": 3.369346282433572, "learning_rate": 5.138339920948617e-07, "loss": 0.6053, "step": 14 }, { "epoch": 0.0035699410959719163, "grad_norm": 3.988126056910898, "learning_rate": 5.533596837944664e-07, "loss": 0.7204, "step": 15 }, { "epoch": 0.003807937169036711, "grad_norm": 3.8112595042647035, "learning_rate": 5.928853754940712e-07, "loss": 0.5855, "step": 16 }, { "epoch": 0.004045933242101505, "grad_norm": 2.48224547819952, "learning_rate": 6.324110671936759e-07, "loss": 0.5257, "step": 17 }, { "epoch": 0.0042839293151663, "grad_norm": 2.733961457300926, "learning_rate": 6.719367588932807e-07, "loss": 0.6612, "step": 18 }, { "epoch": 0.004521925388231094, "grad_norm": 3.2790957516477235, "learning_rate": 7.114624505928855e-07, "loss": 0.6498, "step": 19 }, { "epoch": 0.004759921461295889, "grad_norm": 2.5124910550047352, "learning_rate": 7.509881422924902e-07, "loss": 0.5692, "step": 20 }, { "epoch": 0.0049979175343606835, "grad_norm": 2.659608784971935, "learning_rate": 7.905138339920949e-07, "loss": 0.5649, "step": 21 }, { "epoch": 0.005235913607425477, "grad_norm": 2.198967450226314, "learning_rate": 8.300395256916997e-07, "loss": 0.6861, "step": 22 }, { "epoch": 0.005473909680490272, "grad_norm": 1.8298179674027764, "learning_rate": 8.695652173913044e-07, "loss": 0.5675, "step": 23 }, { "epoch": 0.005711905753555066, "grad_norm": 1.696370720789889, "learning_rate": 9.090909090909091e-07, "loss": 0.5139, "step": 24 }, { "epoch": 0.005949901826619861, "grad_norm": 1.8785219368535397, "learning_rate": 9.486166007905138e-07, "loss": 0.6436, "step": 25 }, { "epoch": 0.006187897899684655, "grad_norm": 2.08357255427808, "learning_rate": 9.881422924901187e-07, "loss": 0.6934, "step": 26 }, { "epoch": 0.00642589397274945, "grad_norm": 1.6150355998820158, "learning_rate": 1.0276679841897233e-06, "loss": 0.5594, "step": 27 }, { "epoch": 0.006663890045814244, "grad_norm": 1.5788592814751126, "learning_rate": 1.067193675889328e-06, "loss": 0.5212, "step": 28 }, { "epoch": 0.006901886118879039, "grad_norm": 1.7395751013863352, "learning_rate": 1.1067193675889329e-06, "loss": 0.6369, "step": 29 }, { "epoch": 0.0071398821919438325, "grad_norm": 1.7730246621108292, "learning_rate": 1.1462450592885378e-06, "loss": 0.636, "step": 30 }, { "epoch": 0.007377878265008627, "grad_norm": 1.245992545689414, "learning_rate": 1.1857707509881424e-06, "loss": 0.5341, "step": 31 }, { "epoch": 0.007615874338073422, "grad_norm": 1.259904616724805, "learning_rate": 1.225296442687747e-06, "loss": 0.5601, "step": 32 }, { "epoch": 0.007853870411138216, "grad_norm": 1.427704060064298, "learning_rate": 1.2648221343873517e-06, "loss": 0.6261, "step": 33 }, { "epoch": 0.00809186648420301, "grad_norm": 1.4266849280066096, "learning_rate": 1.3043478260869566e-06, "loss": 0.5241, "step": 34 }, { "epoch": 0.008329862557267805, "grad_norm": 1.1492899123050067, "learning_rate": 1.3438735177865615e-06, "loss": 0.5351, "step": 35 }, { "epoch": 0.0085678586303326, "grad_norm": 1.3426107399500602, "learning_rate": 1.3833992094861662e-06, "loss": 0.6201, "step": 36 }, { "epoch": 0.008805854703397394, "grad_norm": 1.3943538319404358, "learning_rate": 1.422924901185771e-06, "loss": 0.5752, "step": 37 }, { "epoch": 0.009043850776462189, "grad_norm": 1.2295476918388797, "learning_rate": 1.4624505928853755e-06, "loss": 0.5134, "step": 38 }, { "epoch": 0.009281846849526983, "grad_norm": 1.0233852069436877, "learning_rate": 1.5019762845849804e-06, "loss": 0.5413, "step": 39 }, { "epoch": 0.009519842922591778, "grad_norm": 1.095723970715187, "learning_rate": 1.541501976284585e-06, "loss": 0.6027, "step": 40 }, { "epoch": 0.009757838995656572, "grad_norm": 0.9021710359927738, "learning_rate": 1.5810276679841899e-06, "loss": 0.5096, "step": 41 }, { "epoch": 0.009995835068721367, "grad_norm": 0.8271645037826616, "learning_rate": 1.6205533596837948e-06, "loss": 0.4744, "step": 42 }, { "epoch": 0.01023383114178616, "grad_norm": 0.9178185602286086, "learning_rate": 1.6600790513833994e-06, "loss": 0.5669, "step": 43 }, { "epoch": 0.010471827214850954, "grad_norm": 0.7642553443300366, "learning_rate": 1.699604743083004e-06, "loss": 0.5444, "step": 44 }, { "epoch": 0.010709823287915749, "grad_norm": 0.7527668145998719, "learning_rate": 1.7391304347826088e-06, "loss": 0.48, "step": 45 }, { "epoch": 0.010947819360980543, "grad_norm": 0.7022856285565289, "learning_rate": 1.7786561264822136e-06, "loss": 0.5277, "step": 46 }, { "epoch": 0.011185815434045338, "grad_norm": 0.7455577433162014, "learning_rate": 1.8181818181818183e-06, "loss": 0.6125, "step": 47 }, { "epoch": 0.011423811507110132, "grad_norm": 0.7225816964027125, "learning_rate": 1.8577075098814232e-06, "loss": 0.4642, "step": 48 }, { "epoch": 0.011661807580174927, "grad_norm": 0.6856376423837439, "learning_rate": 1.8972332015810276e-06, "loss": 0.4817, "step": 49 }, { "epoch": 0.011899803653239721, "grad_norm": 0.7776257134551714, "learning_rate": 1.9367588932806323e-06, "loss": 0.5714, "step": 50 }, { "epoch": 0.012137799726304516, "grad_norm": 0.6598276781972773, "learning_rate": 1.9762845849802374e-06, "loss": 0.5405, "step": 51 }, { "epoch": 0.01237579579936931, "grad_norm": 0.7519528179240484, "learning_rate": 2.015810276679842e-06, "loss": 0.4796, "step": 52 }, { "epoch": 0.012613791872434105, "grad_norm": 0.6641024419350181, "learning_rate": 2.0553359683794467e-06, "loss": 0.4447, "step": 53 }, { "epoch": 0.0128517879454989, "grad_norm": 0.7974261768047928, "learning_rate": 2.0948616600790518e-06, "loss": 0.5726, "step": 54 }, { "epoch": 0.013089784018563694, "grad_norm": 0.701806597200162, "learning_rate": 2.134387351778656e-06, "loss": 0.5425, "step": 55 }, { "epoch": 0.013327780091628489, "grad_norm": 0.6533475855597715, "learning_rate": 2.173913043478261e-06, "loss": 0.4617, "step": 56 }, { "epoch": 0.013565776164693283, "grad_norm": 0.6176303785724955, "learning_rate": 2.2134387351778658e-06, "loss": 0.525, "step": 57 }, { "epoch": 0.013803772237758078, "grad_norm": 0.6092588831474826, "learning_rate": 2.2529644268774704e-06, "loss": 0.5486, "step": 58 }, { "epoch": 0.014041768310822872, "grad_norm": 0.6409287405924915, "learning_rate": 2.2924901185770755e-06, "loss": 0.4525, "step": 59 }, { "epoch": 0.014279764383887665, "grad_norm": 0.5448182821305082, "learning_rate": 2.33201581027668e-06, "loss": 0.4333, "step": 60 }, { "epoch": 0.01451776045695246, "grad_norm": 0.5985899326268923, "learning_rate": 2.371541501976285e-06, "loss": 0.5382, "step": 61 }, { "epoch": 0.014755756530017254, "grad_norm": 0.5562460345435642, "learning_rate": 2.4110671936758895e-06, "loss": 0.5455, "step": 62 }, { "epoch": 0.014993752603082049, "grad_norm": 0.5762099087323772, "learning_rate": 2.450592885375494e-06, "loss": 0.4017, "step": 63 }, { "epoch": 0.015231748676146843, "grad_norm": 0.5743174512558051, "learning_rate": 2.4901185770750993e-06, "loss": 0.4655, "step": 64 }, { "epoch": 0.015469744749211638, "grad_norm": 0.5478944257204411, "learning_rate": 2.5296442687747035e-06, "loss": 0.5516, "step": 65 }, { "epoch": 0.015707740822276432, "grad_norm": 0.5348787537296499, "learning_rate": 2.5691699604743086e-06, "loss": 0.4859, "step": 66 }, { "epoch": 0.015945736895341227, "grad_norm": 0.513662426262249, "learning_rate": 2.6086956521739132e-06, "loss": 0.4405, "step": 67 }, { "epoch": 0.01618373296840602, "grad_norm": 0.5477522117902747, "learning_rate": 2.6482213438735183e-06, "loss": 0.5469, "step": 68 }, { "epoch": 0.016421729041470816, "grad_norm": 0.5353133875838187, "learning_rate": 2.687747035573123e-06, "loss": 0.5546, "step": 69 }, { "epoch": 0.01665972511453561, "grad_norm": 0.5247379742690176, "learning_rate": 2.7272727272727272e-06, "loss": 0.45, "step": 70 }, { "epoch": 0.016897721187600405, "grad_norm": 0.48632866471872, "learning_rate": 2.7667984189723323e-06, "loss": 0.4319, "step": 71 }, { "epoch": 0.0171357172606652, "grad_norm": 0.5339643875178105, "learning_rate": 2.806324110671937e-06, "loss": 0.5189, "step": 72 }, { "epoch": 0.017373713333729994, "grad_norm": 0.5148248669651565, "learning_rate": 2.845849802371542e-06, "loss": 0.5148, "step": 73 }, { "epoch": 0.01761170940679479, "grad_norm": 0.4906003639934845, "learning_rate": 2.8853754940711463e-06, "loss": 0.4181, "step": 74 }, { "epoch": 0.017849705479859583, "grad_norm": 0.5090667895636138, "learning_rate": 2.924901185770751e-06, "loss": 0.4989, "step": 75 }, { "epoch": 0.018087701552924378, "grad_norm": 0.47929822302895847, "learning_rate": 2.964426877470356e-06, "loss": 0.51, "step": 76 }, { "epoch": 0.018325697625989172, "grad_norm": 0.49337262495522655, "learning_rate": 3.0039525691699607e-06, "loss": 0.416, "step": 77 }, { "epoch": 0.018563693699053967, "grad_norm": 0.46721565809919324, "learning_rate": 3.043478260869566e-06, "loss": 0.4243, "step": 78 }, { "epoch": 0.01880168977211876, "grad_norm": 0.47787380347630104, "learning_rate": 3.08300395256917e-06, "loss": 0.5134, "step": 79 }, { "epoch": 0.019039685845183556, "grad_norm": 0.4888904145322131, "learning_rate": 3.1225296442687747e-06, "loss": 0.4965, "step": 80 }, { "epoch": 0.01927768191824835, "grad_norm": 0.474440273062018, "learning_rate": 3.1620553359683798e-06, "loss": 0.4231, "step": 81 }, { "epoch": 0.019515677991313145, "grad_norm": 0.4917931480050054, "learning_rate": 3.2015810276679844e-06, "loss": 0.5044, "step": 82 }, { "epoch": 0.01975367406437794, "grad_norm": 0.4850752880482312, "learning_rate": 3.2411067193675895e-06, "loss": 0.4966, "step": 83 }, { "epoch": 0.019991670137442734, "grad_norm": 0.504493166936379, "learning_rate": 3.2806324110671938e-06, "loss": 0.416, "step": 84 }, { "epoch": 0.020229666210507525, "grad_norm": 0.5145462573417079, "learning_rate": 3.320158102766799e-06, "loss": 0.405, "step": 85 }, { "epoch": 0.02046766228357232, "grad_norm": 0.4950180063312114, "learning_rate": 3.3596837944664035e-06, "loss": 0.5179, "step": 86 }, { "epoch": 0.020705658356637114, "grad_norm": 0.4499055758325075, "learning_rate": 3.399209486166008e-06, "loss": 0.5045, "step": 87 }, { "epoch": 0.02094365442970191, "grad_norm": 0.4886696541409708, "learning_rate": 3.4387351778656133e-06, "loss": 0.4223, "step": 88 }, { "epoch": 0.021181650502766703, "grad_norm": 0.47670535706005207, "learning_rate": 3.4782608695652175e-06, "loss": 0.4569, "step": 89 }, { "epoch": 0.021419646575831498, "grad_norm": 0.4382725012174603, "learning_rate": 3.5177865612648226e-06, "loss": 0.5505, "step": 90 }, { "epoch": 0.021657642648896292, "grad_norm": 0.47014527706939824, "learning_rate": 3.5573122529644273e-06, "loss": 0.4423, "step": 91 }, { "epoch": 0.021895638721961087, "grad_norm": 0.4776076310968795, "learning_rate": 3.5968379446640315e-06, "loss": 0.4014, "step": 92 }, { "epoch": 0.02213363479502588, "grad_norm": 0.48030073876619656, "learning_rate": 3.6363636363636366e-06, "loss": 0.483, "step": 93 }, { "epoch": 0.022371630868090676, "grad_norm": 0.4298351648691775, "learning_rate": 3.6758893280632412e-06, "loss": 0.5171, "step": 94 }, { "epoch": 0.02260962694115547, "grad_norm": 0.4368816040607989, "learning_rate": 3.7154150197628463e-06, "loss": 0.4107, "step": 95 }, { "epoch": 0.022847623014220265, "grad_norm": 0.47265191527306455, "learning_rate": 3.754940711462451e-06, "loss": 0.4379, "step": 96 }, { "epoch": 0.02308561908728506, "grad_norm": 0.44472115691541, "learning_rate": 3.7944664031620552e-06, "loss": 0.5029, "step": 97 }, { "epoch": 0.023323615160349854, "grad_norm": 0.47516731747233143, "learning_rate": 3.833992094861661e-06, "loss": 0.4948, "step": 98 }, { "epoch": 0.02356161123341465, "grad_norm": 0.4756183678548411, "learning_rate": 3.8735177865612646e-06, "loss": 0.4284, "step": 99 }, { "epoch": 0.023799607306479443, "grad_norm": 0.4648909223572498, "learning_rate": 3.91304347826087e-06, "loss": 0.4613, "step": 100 }, { "epoch": 0.024037603379544237, "grad_norm": 0.45494168373864374, "learning_rate": 3.952569169960475e-06, "loss": 0.5071, "step": 101 }, { "epoch": 0.024275599452609032, "grad_norm": 0.47165993720085936, "learning_rate": 3.992094861660079e-06, "loss": 0.4297, "step": 102 }, { "epoch": 0.024513595525673827, "grad_norm": 0.46600664566797045, "learning_rate": 4.031620553359684e-06, "loss": 0.4006, "step": 103 }, { "epoch": 0.02475159159873862, "grad_norm": 0.440136381866, "learning_rate": 4.071146245059289e-06, "loss": 0.5229, "step": 104 }, { "epoch": 0.024989587671803416, "grad_norm": 0.4874483108078875, "learning_rate": 4.110671936758893e-06, "loss": 0.4793, "step": 105 }, { "epoch": 0.02522758374486821, "grad_norm": 0.4412999202522836, "learning_rate": 4.150197628458498e-06, "loss": 0.3896, "step": 106 }, { "epoch": 0.025465579817933005, "grad_norm": 0.4843482283829763, "learning_rate": 4.1897233201581036e-06, "loss": 0.5004, "step": 107 }, { "epoch": 0.0257035758909978, "grad_norm": 0.5192505494175473, "learning_rate": 4.229249011857708e-06, "loss": 0.4848, "step": 108 }, { "epoch": 0.025941571964062594, "grad_norm": 0.4697882778878796, "learning_rate": 4.268774703557312e-06, "loss": 0.4183, "step": 109 }, { "epoch": 0.026179568037127388, "grad_norm": 0.47243383971128333, "learning_rate": 4.3083003952569175e-06, "loss": 0.4165, "step": 110 }, { "epoch": 0.026417564110192183, "grad_norm": 0.46058732243836664, "learning_rate": 4.347826086956522e-06, "loss": 0.4516, "step": 111 }, { "epoch": 0.026655560183256977, "grad_norm": 0.439182011456468, "learning_rate": 4.387351778656127e-06, "loss": 0.4817, "step": 112 }, { "epoch": 0.026893556256321772, "grad_norm": 0.43801194497466545, "learning_rate": 4.4268774703557315e-06, "loss": 0.4033, "step": 113 }, { "epoch": 0.027131552329386566, "grad_norm": 0.47578753276213764, "learning_rate": 4.466403162055336e-06, "loss": 0.4751, "step": 114 }, { "epoch": 0.02736954840245136, "grad_norm": 0.44222504076871677, "learning_rate": 4.505928853754941e-06, "loss": 0.5385, "step": 115 }, { "epoch": 0.027607544475516155, "grad_norm": 0.4619468918214023, "learning_rate": 4.5454545454545455e-06, "loss": 0.4023, "step": 116 }, { "epoch": 0.02784554054858095, "grad_norm": 0.4554362965127849, "learning_rate": 4.584980237154151e-06, "loss": 0.3981, "step": 117 }, { "epoch": 0.028083536621645745, "grad_norm": 0.42154073719188745, "learning_rate": 4.624505928853755e-06, "loss": 0.4805, "step": 118 }, { "epoch": 0.028321532694710536, "grad_norm": 0.4407938952654761, "learning_rate": 4.66403162055336e-06, "loss": 0.4883, "step": 119 }, { "epoch": 0.02855952876777533, "grad_norm": 0.4533865445704307, "learning_rate": 4.703557312252965e-06, "loss": 0.4043, "step": 120 }, { "epoch": 0.028797524840840125, "grad_norm": 0.4501769062330425, "learning_rate": 4.74308300395257e-06, "loss": 0.424, "step": 121 }, { "epoch": 0.02903552091390492, "grad_norm": 0.4964204618463292, "learning_rate": 4.782608695652174e-06, "loss": 0.5057, "step": 122 }, { "epoch": 0.029273516986969714, "grad_norm": 0.43608831339404674, "learning_rate": 4.822134387351779e-06, "loss": 0.4411, "step": 123 }, { "epoch": 0.029511513060034508, "grad_norm": 0.5134189283501325, "learning_rate": 4.861660079051384e-06, "loss": 0.3686, "step": 124 }, { "epoch": 0.029749509133099303, "grad_norm": 0.4454413745824674, "learning_rate": 4.901185770750988e-06, "loss": 0.4342, "step": 125 }, { "epoch": 0.029987505206164097, "grad_norm": 0.4206745271958203, "learning_rate": 4.940711462450593e-06, "loss": 0.5246, "step": 126 }, { "epoch": 0.030225501279228892, "grad_norm": 0.45531729242028907, "learning_rate": 4.9802371541501985e-06, "loss": 0.4269, "step": 127 }, { "epoch": 0.030463497352293686, "grad_norm": 0.43018413490903146, "learning_rate": 5.019762845849802e-06, "loss": 0.4158, "step": 128 }, { "epoch": 0.03070149342535848, "grad_norm": 0.4452267415803146, "learning_rate": 5.059288537549407e-06, "loss": 0.498, "step": 129 }, { "epoch": 0.030939489498423275, "grad_norm": 0.44751910045276433, "learning_rate": 5.0988142292490125e-06, "loss": 0.4583, "step": 130 }, { "epoch": 0.03117748557148807, "grad_norm": 0.4474828998483563, "learning_rate": 5.138339920948617e-06, "loss": 0.3873, "step": 131 }, { "epoch": 0.031415481644552865, "grad_norm": 0.46141860455017775, "learning_rate": 5.177865612648222e-06, "loss": 0.4635, "step": 132 }, { "epoch": 0.03165347771761766, "grad_norm": 0.4406433909527717, "learning_rate": 5.2173913043478265e-06, "loss": 0.5175, "step": 133 }, { "epoch": 0.031891473790682454, "grad_norm": 0.46817970001375386, "learning_rate": 5.256916996047431e-06, "loss": 0.4269, "step": 134 }, { "epoch": 0.03212946986374725, "grad_norm": 0.4582029027978222, "learning_rate": 5.296442687747037e-06, "loss": 0.3855, "step": 135 }, { "epoch": 0.03236746593681204, "grad_norm": 0.4483408124347049, "learning_rate": 5.335968379446641e-06, "loss": 0.5035, "step": 136 }, { "epoch": 0.03260546200987684, "grad_norm": 0.4521188947407404, "learning_rate": 5.375494071146246e-06, "loss": 0.4773, "step": 137 }, { "epoch": 0.03284345808294163, "grad_norm": 0.4584775312885165, "learning_rate": 5.41501976284585e-06, "loss": 0.4263, "step": 138 }, { "epoch": 0.033081454156006426, "grad_norm": 0.44388523673544455, "learning_rate": 5.4545454545454545e-06, "loss": 0.4206, "step": 139 }, { "epoch": 0.03331945022907122, "grad_norm": 0.4443963263357755, "learning_rate": 5.49407114624506e-06, "loss": 0.5354, "step": 140 }, { "epoch": 0.033557446302136015, "grad_norm": 0.4624952425984824, "learning_rate": 5.533596837944665e-06, "loss": 0.4237, "step": 141 }, { "epoch": 0.03379544237520081, "grad_norm": 0.4372307380751775, "learning_rate": 5.573122529644269e-06, "loss": 0.4082, "step": 142 }, { "epoch": 0.034033438448265604, "grad_norm": 0.4494964774973684, "learning_rate": 5.612648221343874e-06, "loss": 0.5033, "step": 143 }, { "epoch": 0.0342714345213304, "grad_norm": 0.4308495087586641, "learning_rate": 5.652173913043479e-06, "loss": 0.4855, "step": 144 }, { "epoch": 0.03450943059439519, "grad_norm": 0.45898397608033087, "learning_rate": 5.691699604743084e-06, "loss": 0.3943, "step": 145 }, { "epoch": 0.03474742666745999, "grad_norm": 0.444667683614168, "learning_rate": 5.731225296442689e-06, "loss": 0.3907, "step": 146 }, { "epoch": 0.03498542274052478, "grad_norm": 0.43190369082425845, "learning_rate": 5.770750988142293e-06, "loss": 0.5306, "step": 147 }, { "epoch": 0.03522341881358958, "grad_norm": 0.47772442543782057, "learning_rate": 5.810276679841897e-06, "loss": 0.4328, "step": 148 }, { "epoch": 0.03546141488665437, "grad_norm": 0.5149054927207557, "learning_rate": 5.849802371541502e-06, "loss": 0.4038, "step": 149 }, { "epoch": 0.035699410959719166, "grad_norm": 0.46489298468292706, "learning_rate": 5.8893280632411074e-06, "loss": 0.4641, "step": 150 }, { "epoch": 0.03593740703278396, "grad_norm": 0.43822016358847443, "learning_rate": 5.928853754940712e-06, "loss": 0.511, "step": 151 }, { "epoch": 0.036175403105848755, "grad_norm": 0.4780094956775413, "learning_rate": 5.968379446640317e-06, "loss": 0.4091, "step": 152 }, { "epoch": 0.03641339917891355, "grad_norm": 0.5193809604213568, "learning_rate": 6.007905138339921e-06, "loss": 0.3704, "step": 153 }, { "epoch": 0.036651395251978344, "grad_norm": 0.47203611226409087, "learning_rate": 6.047430830039526e-06, "loss": 0.5057, "step": 154 }, { "epoch": 0.03688939132504314, "grad_norm": 0.4255995467291753, "learning_rate": 6.086956521739132e-06, "loss": 0.4863, "step": 155 }, { "epoch": 0.03712738739810793, "grad_norm": 0.4317511844695257, "learning_rate": 6.126482213438736e-06, "loss": 0.4005, "step": 156 }, { "epoch": 0.03736538347117273, "grad_norm": 0.4463988044172272, "learning_rate": 6.16600790513834e-06, "loss": 0.4169, "step": 157 }, { "epoch": 0.03760337954423752, "grad_norm": 0.4301302395556984, "learning_rate": 6.205533596837945e-06, "loss": 0.4749, "step": 158 }, { "epoch": 0.03784137561730232, "grad_norm": 0.4885006908777435, "learning_rate": 6.245059288537549e-06, "loss": 0.4176, "step": 159 }, { "epoch": 0.03807937169036711, "grad_norm": 0.490807059909575, "learning_rate": 6.284584980237155e-06, "loss": 0.3878, "step": 160 }, { "epoch": 0.038317367763431906, "grad_norm": 0.43132409383451087, "learning_rate": 6.3241106719367596e-06, "loss": 0.4834, "step": 161 }, { "epoch": 0.0385553638364967, "grad_norm": 0.44865911014332444, "learning_rate": 6.363636363636364e-06, "loss": 0.4821, "step": 162 }, { "epoch": 0.038793359909561495, "grad_norm": 0.4260431090618812, "learning_rate": 6.403162055335969e-06, "loss": 0.4174, "step": 163 }, { "epoch": 0.03903135598262629, "grad_norm": 0.49901659393595554, "learning_rate": 6.442687747035574e-06, "loss": 0.4296, "step": 164 }, { "epoch": 0.039269352055691084, "grad_norm": 0.4897547343200327, "learning_rate": 6.482213438735179e-06, "loss": 0.512, "step": 165 }, { "epoch": 0.03950734812875588, "grad_norm": 0.4813236527289336, "learning_rate": 6.521739130434783e-06, "loss": 0.431, "step": 166 }, { "epoch": 0.03974534420182067, "grad_norm": 0.45927226014349076, "learning_rate": 6.5612648221343875e-06, "loss": 0.3718, "step": 167 }, { "epoch": 0.03998334027488547, "grad_norm": 0.4965136015728435, "learning_rate": 6.600790513833992e-06, "loss": 0.4695, "step": 168 }, { "epoch": 0.04022133634795026, "grad_norm": 0.4683557415256707, "learning_rate": 6.640316205533598e-06, "loss": 0.4778, "step": 169 }, { "epoch": 0.04045933242101505, "grad_norm": 0.4634555253102173, "learning_rate": 6.679841897233202e-06, "loss": 0.3995, "step": 170 }, { "epoch": 0.040697328494079844, "grad_norm": 0.48973289383270246, "learning_rate": 6.719367588932807e-06, "loss": 0.3976, "step": 171 }, { "epoch": 0.04093532456714464, "grad_norm": 0.42723303350236147, "learning_rate": 6.758893280632412e-06, "loss": 0.494, "step": 172 }, { "epoch": 0.04117332064020943, "grad_norm": 0.7587112192005009, "learning_rate": 6.798418972332016e-06, "loss": 0.3958, "step": 173 }, { "epoch": 0.04141131671327423, "grad_norm": 0.4915235015843637, "learning_rate": 6.837944664031622e-06, "loss": 0.3719, "step": 174 }, { "epoch": 0.04164931278633902, "grad_norm": 0.48028138363352874, "learning_rate": 6.8774703557312265e-06, "loss": 0.4144, "step": 175 }, { "epoch": 0.04188730885940382, "grad_norm": 0.43339371237253094, "learning_rate": 6.91699604743083e-06, "loss": 0.523, "step": 176 }, { "epoch": 0.04212530493246861, "grad_norm": 0.45447194682552483, "learning_rate": 6.956521739130435e-06, "loss": 0.3567, "step": 177 }, { "epoch": 0.042363301005533406, "grad_norm": 0.4552581773897187, "learning_rate": 6.99604743083004e-06, "loss": 0.3722, "step": 178 }, { "epoch": 0.0426012970785982, "grad_norm": 0.4796620850546896, "learning_rate": 7.035573122529645e-06, "loss": 0.4874, "step": 179 }, { "epoch": 0.042839293151662995, "grad_norm": 0.43749845312110847, "learning_rate": 7.07509881422925e-06, "loss": 0.4922, "step": 180 }, { "epoch": 0.04307728922472779, "grad_norm": 0.46789960493230326, "learning_rate": 7.1146245059288545e-06, "loss": 0.383, "step": 181 }, { "epoch": 0.043315285297792584, "grad_norm": 0.5224298682432643, "learning_rate": 7.154150197628459e-06, "loss": 0.4526, "step": 182 }, { "epoch": 0.04355328137085738, "grad_norm": 0.4746636793562093, "learning_rate": 7.193675889328063e-06, "loss": 0.4934, "step": 183 }, { "epoch": 0.04379127744392217, "grad_norm": 0.4701682318036117, "learning_rate": 7.233201581027669e-06, "loss": 0.4034, "step": 184 }, { "epoch": 0.04402927351698697, "grad_norm": 0.5015499630814017, "learning_rate": 7.272727272727273e-06, "loss": 0.3577, "step": 185 }, { "epoch": 0.04426726959005176, "grad_norm": 0.447979905112201, "learning_rate": 7.312252964426878e-06, "loss": 0.4454, "step": 186 }, { "epoch": 0.04450526566311656, "grad_norm": 0.4360535244392955, "learning_rate": 7.3517786561264825e-06, "loss": 0.4708, "step": 187 }, { "epoch": 0.04474326173618135, "grad_norm": 0.4906094221266247, "learning_rate": 7.391304347826087e-06, "loss": 0.3734, "step": 188 }, { "epoch": 0.044981257809246146, "grad_norm": 0.47012438815001395, "learning_rate": 7.430830039525693e-06, "loss": 0.4194, "step": 189 }, { "epoch": 0.04521925388231094, "grad_norm": 0.4668496482235658, "learning_rate": 7.470355731225297e-06, "loss": 0.4804, "step": 190 }, { "epoch": 0.045457249955375735, "grad_norm": 0.43523540977913394, "learning_rate": 7.509881422924902e-06, "loss": 0.4067, "step": 191 }, { "epoch": 0.04569524602844053, "grad_norm": 0.4388184201414626, "learning_rate": 7.549407114624507e-06, "loss": 0.405, "step": 192 }, { "epoch": 0.045933242101505324, "grad_norm": 0.4710177222648498, "learning_rate": 7.5889328063241105e-06, "loss": 0.4461, "step": 193 }, { "epoch": 0.04617123817457012, "grad_norm": 0.45506913551818734, "learning_rate": 7.628458498023717e-06, "loss": 0.5013, "step": 194 }, { "epoch": 0.04640923424763491, "grad_norm": 0.4503230582686603, "learning_rate": 7.667984189723321e-06, "loss": 0.3812, "step": 195 }, { "epoch": 0.04664723032069971, "grad_norm": 0.4900687410351956, "learning_rate": 7.707509881422925e-06, "loss": 0.3929, "step": 196 }, { "epoch": 0.0468852263937645, "grad_norm": 0.4422937878413219, "learning_rate": 7.747035573122529e-06, "loss": 0.4826, "step": 197 }, { "epoch": 0.0471232224668293, "grad_norm": 0.4696233806991265, "learning_rate": 7.786561264822135e-06, "loss": 0.4321, "step": 198 }, { "epoch": 0.04736121853989409, "grad_norm": 0.5036764448381432, "learning_rate": 7.82608695652174e-06, "loss": 0.3737, "step": 199 }, { "epoch": 0.047599214612958886, "grad_norm": 0.4460954367231951, "learning_rate": 7.865612648221344e-06, "loss": 0.4502, "step": 200 }, { "epoch": 0.04783721068602368, "grad_norm": 0.4121996924774416, "learning_rate": 7.90513833992095e-06, "loss": 0.4872, "step": 201 }, { "epoch": 0.048075206759088475, "grad_norm": 0.5377437642053337, "learning_rate": 7.944664031620553e-06, "loss": 0.3756, "step": 202 }, { "epoch": 0.04831320283215327, "grad_norm": 0.4869568221691017, "learning_rate": 7.984189723320159e-06, "loss": 0.3994, "step": 203 }, { "epoch": 0.048551198905218064, "grad_norm": 0.45051544052407894, "learning_rate": 8.023715415019764e-06, "loss": 0.4758, "step": 204 }, { "epoch": 0.04878919497828286, "grad_norm": 0.46693597119879243, "learning_rate": 8.063241106719368e-06, "loss": 0.4724, "step": 205 }, { "epoch": 0.04902719105134765, "grad_norm": 0.4434729096123986, "learning_rate": 8.102766798418974e-06, "loss": 0.3654, "step": 206 }, { "epoch": 0.04926518712441245, "grad_norm": 0.4516410800019458, "learning_rate": 8.142292490118577e-06, "loss": 0.4328, "step": 207 }, { "epoch": 0.04950318319747724, "grad_norm": 0.4092851533761814, "learning_rate": 8.181818181818183e-06, "loss": 0.4973, "step": 208 }, { "epoch": 0.04974117927054204, "grad_norm": 0.4458979348243189, "learning_rate": 8.221343873517787e-06, "loss": 0.3875, "step": 209 }, { "epoch": 0.04997917534360683, "grad_norm": 0.4575088617794585, "learning_rate": 8.260869565217392e-06, "loss": 0.3812, "step": 210 }, { "epoch": 0.050217171416671626, "grad_norm": 0.42998279360778746, "learning_rate": 8.300395256916996e-06, "loss": 0.4689, "step": 211 }, { "epoch": 0.05045516748973642, "grad_norm": 0.4664154763779984, "learning_rate": 8.339920948616602e-06, "loss": 0.4628, "step": 212 }, { "epoch": 0.050693163562801215, "grad_norm": 0.48372522149295644, "learning_rate": 8.379446640316207e-06, "loss": 0.3713, "step": 213 }, { "epoch": 0.05093115963586601, "grad_norm": 0.45614467932799935, "learning_rate": 8.418972332015811e-06, "loss": 0.4565, "step": 214 }, { "epoch": 0.051169155708930804, "grad_norm": 0.4521533500555388, "learning_rate": 8.458498023715416e-06, "loss": 0.4939, "step": 215 }, { "epoch": 0.0514071517819956, "grad_norm": 0.44133282573284666, "learning_rate": 8.49802371541502e-06, "loss": 0.4448, "step": 216 }, { "epoch": 0.05164514785506039, "grad_norm": 0.4461748694701973, "learning_rate": 8.537549407114624e-06, "loss": 0.3685, "step": 217 }, { "epoch": 0.05188314392812519, "grad_norm": 0.43361066733503273, "learning_rate": 8.57707509881423e-06, "loss": 0.4443, "step": 218 }, { "epoch": 0.05212114000118998, "grad_norm": 0.44192635288128085, "learning_rate": 8.616600790513835e-06, "loss": 0.4858, "step": 219 }, { "epoch": 0.052359136074254777, "grad_norm": 0.4765904637426461, "learning_rate": 8.656126482213439e-06, "loss": 0.37, "step": 220 }, { "epoch": 0.05259713214731957, "grad_norm": 0.42950301730198376, "learning_rate": 8.695652173913044e-06, "loss": 0.4087, "step": 221 }, { "epoch": 0.052835128220384366, "grad_norm": 0.4493355585707809, "learning_rate": 8.735177865612648e-06, "loss": 0.4808, "step": 222 }, { "epoch": 0.05307312429344916, "grad_norm": 0.4473843798566356, "learning_rate": 8.774703557312254e-06, "loss": 0.4421, "step": 223 }, { "epoch": 0.053311120366513955, "grad_norm": 0.4662717413273855, "learning_rate": 8.81422924901186e-06, "loss": 0.3826, "step": 224 }, { "epoch": 0.05354911643957875, "grad_norm": 0.4753294327238269, "learning_rate": 8.853754940711463e-06, "loss": 0.435, "step": 225 }, { "epoch": 0.053787112512643544, "grad_norm": 0.43479966227192995, "learning_rate": 8.893280632411067e-06, "loss": 0.4577, "step": 226 }, { "epoch": 0.05402510858570834, "grad_norm": 0.4540970891602704, "learning_rate": 8.932806324110672e-06, "loss": 0.3734, "step": 227 }, { "epoch": 0.05426310465877313, "grad_norm": 0.47014652353944864, "learning_rate": 8.972332015810278e-06, "loss": 0.4018, "step": 228 }, { "epoch": 0.05450110073183793, "grad_norm": 0.4588196915897715, "learning_rate": 9.011857707509882e-06, "loss": 0.4821, "step": 229 }, { "epoch": 0.05473909680490272, "grad_norm": 0.4290397451443549, "learning_rate": 9.051383399209487e-06, "loss": 0.4443, "step": 230 }, { "epoch": 0.054977092877967516, "grad_norm": 0.4634653667823266, "learning_rate": 9.090909090909091e-06, "loss": 0.3622, "step": 231 }, { "epoch": 0.05521508895103231, "grad_norm": 0.4799790289966676, "learning_rate": 9.130434782608697e-06, "loss": 0.4462, "step": 232 }, { "epoch": 0.055453085024097105, "grad_norm": 0.4480981798171716, "learning_rate": 9.169960474308302e-06, "loss": 0.4328, "step": 233 }, { "epoch": 0.0556910810971619, "grad_norm": 0.46767604607383606, "learning_rate": 9.209486166007906e-06, "loss": 0.412, "step": 234 }, { "epoch": 0.055929077170226695, "grad_norm": 0.5333947256403828, "learning_rate": 9.24901185770751e-06, "loss": 0.3851, "step": 235 }, { "epoch": 0.05616707324329149, "grad_norm": 0.44207186013928046, "learning_rate": 9.288537549407115e-06, "loss": 0.4775, "step": 236 }, { "epoch": 0.056405069316356284, "grad_norm": 0.4579089731862815, "learning_rate": 9.32806324110672e-06, "loss": 0.4796, "step": 237 }, { "epoch": 0.05664306538942107, "grad_norm": 0.4916045682404132, "learning_rate": 9.367588932806325e-06, "loss": 0.3495, "step": 238 }, { "epoch": 0.056881061462485866, "grad_norm": 0.48592356270000286, "learning_rate": 9.40711462450593e-06, "loss": 0.3762, "step": 239 }, { "epoch": 0.05711905753555066, "grad_norm": 0.40760679787076876, "learning_rate": 9.446640316205534e-06, "loss": 0.4946, "step": 240 }, { "epoch": 0.057357053608615455, "grad_norm": 0.5404446805301054, "learning_rate": 9.48616600790514e-06, "loss": 0.3878, "step": 241 }, { "epoch": 0.05759504968168025, "grad_norm": 0.5023864741183144, "learning_rate": 9.525691699604745e-06, "loss": 0.3769, "step": 242 }, { "epoch": 0.057833045754745044, "grad_norm": 0.4291166201410665, "learning_rate": 9.565217391304349e-06, "loss": 0.4405, "step": 243 }, { "epoch": 0.05807104182780984, "grad_norm": 0.4370205569354994, "learning_rate": 9.604743083003954e-06, "loss": 0.4543, "step": 244 }, { "epoch": 0.05830903790087463, "grad_norm": 0.4726343079593376, "learning_rate": 9.644268774703558e-06, "loss": 0.3977, "step": 245 }, { "epoch": 0.05854703397393943, "grad_norm": 0.48561129532487857, "learning_rate": 9.683794466403162e-06, "loss": 0.3915, "step": 246 }, { "epoch": 0.05878503004700422, "grad_norm": 0.4757606568781063, "learning_rate": 9.723320158102767e-06, "loss": 0.4472, "step": 247 }, { "epoch": 0.059023026120069016, "grad_norm": 0.515327431255371, "learning_rate": 9.762845849802373e-06, "loss": 0.4027, "step": 248 }, { "epoch": 0.05926102219313381, "grad_norm": 0.4762224245393142, "learning_rate": 9.802371541501977e-06, "loss": 0.3695, "step": 249 }, { "epoch": 0.059499018266198606, "grad_norm": 0.4459966492940902, "learning_rate": 9.841897233201582e-06, "loss": 0.4651, "step": 250 }, { "epoch": 0.0597370143392634, "grad_norm": 0.4460885254332536, "learning_rate": 9.881422924901186e-06, "loss": 0.4789, "step": 251 }, { "epoch": 0.059975010412328195, "grad_norm": 0.5301749599674928, "learning_rate": 9.920948616600791e-06, "loss": 0.3894, "step": 252 }, { "epoch": 0.06021300648539299, "grad_norm": 0.5074601139708311, "learning_rate": 9.960474308300397e-06, "loss": 0.379, "step": 253 }, { "epoch": 0.060451002558457784, "grad_norm": 0.46943388848534723, "learning_rate": 1e-05, "loss": 0.4968, "step": 254 }, { "epoch": 0.06068899863152258, "grad_norm": 0.4849058663631589, "learning_rate": 9.999999628438155e-06, "loss": 0.4239, "step": 255 }, { "epoch": 0.06092699470458737, "grad_norm": 0.5159279619948409, "learning_rate": 9.999998513752668e-06, "loss": 0.3897, "step": 256 }, { "epoch": 0.06116499077765217, "grad_norm": 0.4913799031325082, "learning_rate": 9.999996655943708e-06, "loss": 0.4329, "step": 257 }, { "epoch": 0.06140298685071696, "grad_norm": 0.4609134555633486, "learning_rate": 9.999994055011552e-06, "loss": 0.4635, "step": 258 }, { "epoch": 0.061640982923781756, "grad_norm": 0.4600665037690504, "learning_rate": 9.999990710956586e-06, "loss": 0.361, "step": 259 }, { "epoch": 0.06187897899684655, "grad_norm": 0.4893892245206209, "learning_rate": 9.999986623779307e-06, "loss": 0.3929, "step": 260 }, { "epoch": 0.062116975069911345, "grad_norm": 0.47317754883053104, "learning_rate": 9.99998179348032e-06, "loss": 0.4411, "step": 261 }, { "epoch": 0.06235497114297614, "grad_norm": 0.46027884774340916, "learning_rate": 9.999976220060347e-06, "loss": 0.4357, "step": 262 }, { "epoch": 0.06259296721604093, "grad_norm": 0.5110985377219065, "learning_rate": 9.999969903520212e-06, "loss": 0.368, "step": 263 }, { "epoch": 0.06283096328910573, "grad_norm": 0.5424172208623367, "learning_rate": 9.999962843860858e-06, "loss": 0.3945, "step": 264 }, { "epoch": 0.06306895936217052, "grad_norm": 0.4455661724192156, "learning_rate": 9.999955041083332e-06, "loss": 0.4574, "step": 265 }, { "epoch": 0.06330695543523532, "grad_norm": 0.4967774114157658, "learning_rate": 9.999946495188793e-06, "loss": 0.4139, "step": 266 }, { "epoch": 0.06354495150830011, "grad_norm": 0.48548245385801053, "learning_rate": 9.999937206178512e-06, "loss": 0.369, "step": 267 }, { "epoch": 0.06378294758136491, "grad_norm": 0.44764818752534175, "learning_rate": 9.999927174053872e-06, "loss": 0.4449, "step": 268 }, { "epoch": 0.0640209436544297, "grad_norm": 0.47571255336778956, "learning_rate": 9.999916398816359e-06, "loss": 0.4252, "step": 269 }, { "epoch": 0.0642589397274945, "grad_norm": 0.5176454688899305, "learning_rate": 9.999904880467579e-06, "loss": 0.4172, "step": 270 }, { "epoch": 0.06449693580055929, "grad_norm": 0.47797256428291063, "learning_rate": 9.99989261900924e-06, "loss": 0.4234, "step": 271 }, { "epoch": 0.06473493187362409, "grad_norm": 0.45140623928149853, "learning_rate": 9.999879614443168e-06, "loss": 0.4936, "step": 272 }, { "epoch": 0.06497292794668888, "grad_norm": 0.4619192022175719, "learning_rate": 9.999865866771295e-06, "loss": 0.472, "step": 273 }, { "epoch": 0.06521092401975367, "grad_norm": 0.5262244410688721, "learning_rate": 9.999851375995662e-06, "loss": 0.3445, "step": 274 }, { "epoch": 0.06544892009281847, "grad_norm": 0.4520208956154366, "learning_rate": 9.999836142118424e-06, "loss": 0.4155, "step": 275 }, { "epoch": 0.06568691616588326, "grad_norm": 0.46440432038472335, "learning_rate": 9.999820165141845e-06, "loss": 0.4586, "step": 276 }, { "epoch": 0.06592491223894806, "grad_norm": 0.4573062726758063, "learning_rate": 9.9998034450683e-06, "loss": 0.3835, "step": 277 }, { "epoch": 0.06616290831201285, "grad_norm": 0.4475402421191899, "learning_rate": 9.999785981900277e-06, "loss": 0.3686, "step": 278 }, { "epoch": 0.06640090438507765, "grad_norm": 0.4696018693236434, "learning_rate": 9.999767775640364e-06, "loss": 0.4623, "step": 279 }, { "epoch": 0.06663890045814244, "grad_norm": 0.4618737104284479, "learning_rate": 9.999748826291273e-06, "loss": 0.4177, "step": 280 }, { "epoch": 0.06687689653120724, "grad_norm": 0.4799653750371601, "learning_rate": 9.99972913385582e-06, "loss": 0.3504, "step": 281 }, { "epoch": 0.06711489260427203, "grad_norm": 0.4497108981102174, "learning_rate": 9.999708698336929e-06, "loss": 0.4563, "step": 282 }, { "epoch": 0.06735288867733683, "grad_norm": 0.44395236286884626, "learning_rate": 9.999687519737639e-06, "loss": 0.4782, "step": 283 }, { "epoch": 0.06759088475040162, "grad_norm": 0.6091422041147246, "learning_rate": 9.999665598061097e-06, "loss": 0.3892, "step": 284 }, { "epoch": 0.06782888082346641, "grad_norm": 0.49705585154406684, "learning_rate": 9.999642933310561e-06, "loss": 0.3826, "step": 285 }, { "epoch": 0.06806687689653121, "grad_norm": 0.4568270789179068, "learning_rate": 9.9996195254894e-06, "loss": 0.4423, "step": 286 }, { "epoch": 0.068304872969596, "grad_norm": 0.45526888232059454, "learning_rate": 9.999595374601093e-06, "loss": 0.4749, "step": 287 }, { "epoch": 0.0685428690426608, "grad_norm": 0.46039335829876826, "learning_rate": 9.99957048064923e-06, "loss": 0.3574, "step": 288 }, { "epoch": 0.06878086511572559, "grad_norm": 0.4484215237280295, "learning_rate": 9.999544843637509e-06, "loss": 0.4244, "step": 289 }, { "epoch": 0.06901886118879039, "grad_norm": 0.4156896188113592, "learning_rate": 9.999518463569742e-06, "loss": 0.4771, "step": 290 }, { "epoch": 0.06925685726185518, "grad_norm": 0.4207781838747178, "learning_rate": 9.99949134044985e-06, "loss": 0.3879, "step": 291 }, { "epoch": 0.06949485333491998, "grad_norm": 0.45221148207375844, "learning_rate": 9.999463474281862e-06, "loss": 0.403, "step": 292 }, { "epoch": 0.06973284940798477, "grad_norm": 0.4377876809771397, "learning_rate": 9.999434865069922e-06, "loss": 0.4193, "step": 293 }, { "epoch": 0.06997084548104957, "grad_norm": 0.4024216767935093, "learning_rate": 9.99940551281828e-06, "loss": 0.4875, "step": 294 }, { "epoch": 0.07020884155411436, "grad_norm": 0.43322473693066027, "learning_rate": 9.999375417531301e-06, "loss": 0.3583, "step": 295 }, { "epoch": 0.07044683762717915, "grad_norm": 0.471599149075093, "learning_rate": 9.999344579213455e-06, "loss": 0.42, "step": 296 }, { "epoch": 0.07068483370024395, "grad_norm": 0.44747663908303237, "learning_rate": 9.999312997869326e-06, "loss": 0.503, "step": 297 }, { "epoch": 0.07092282977330874, "grad_norm": 0.5069193101593006, "learning_rate": 9.99928067350361e-06, "loss": 0.4093, "step": 298 }, { "epoch": 0.07116082584637354, "grad_norm": 0.4859190395369954, "learning_rate": 9.99924760612111e-06, "loss": 0.3736, "step": 299 }, { "epoch": 0.07139882191943833, "grad_norm": 0.4497898536509786, "learning_rate": 9.999213795726738e-06, "loss": 0.4518, "step": 300 }, { "epoch": 0.07163681799250313, "grad_norm": 0.4399132919727615, "learning_rate": 9.999179242325523e-06, "loss": 0.5143, "step": 301 }, { "epoch": 0.07187481406556792, "grad_norm": 0.40980962676569144, "learning_rate": 9.999143945922599e-06, "loss": 0.3701, "step": 302 }, { "epoch": 0.07211281013863272, "grad_norm": 0.4266818493401362, "learning_rate": 9.999107906523212e-06, "loss": 0.3847, "step": 303 }, { "epoch": 0.07235080621169751, "grad_norm": 0.4120201125570514, "learning_rate": 9.999071124132717e-06, "loss": 0.4753, "step": 304 }, { "epoch": 0.0725888022847623, "grad_norm": 0.5036108776564726, "learning_rate": 9.999033598756583e-06, "loss": 0.4513, "step": 305 }, { "epoch": 0.0728267983578271, "grad_norm": 0.45433796497230666, "learning_rate": 9.998995330400385e-06, "loss": 0.38, "step": 306 }, { "epoch": 0.0730647944308919, "grad_norm": 0.4604572613372604, "learning_rate": 9.998956319069813e-06, "loss": 0.4294, "step": 307 }, { "epoch": 0.07330279050395669, "grad_norm": 0.4239655605570626, "learning_rate": 9.998916564770662e-06, "loss": 0.5023, "step": 308 }, { "epoch": 0.07354078657702148, "grad_norm": 0.49639603018946726, "learning_rate": 9.998876067508846e-06, "loss": 0.3821, "step": 309 }, { "epoch": 0.07377878265008628, "grad_norm": 0.4498046706633747, "learning_rate": 9.998834827290376e-06, "loss": 0.3897, "step": 310 }, { "epoch": 0.07401677872315107, "grad_norm": 0.4847626369166094, "learning_rate": 9.998792844121386e-06, "loss": 0.4587, "step": 311 }, { "epoch": 0.07425477479621587, "grad_norm": 0.4754430506699406, "learning_rate": 9.998750118008117e-06, "loss": 0.4725, "step": 312 }, { "epoch": 0.07449277086928066, "grad_norm": 0.48598157686223126, "learning_rate": 9.998706648956916e-06, "loss": 0.3919, "step": 313 }, { "epoch": 0.07473076694234546, "grad_norm": 0.4660284468209422, "learning_rate": 9.998662436974246e-06, "loss": 0.422, "step": 314 }, { "epoch": 0.07496876301541025, "grad_norm": 0.4755957103806313, "learning_rate": 9.998617482066677e-06, "loss": 0.4716, "step": 315 }, { "epoch": 0.07520675908847504, "grad_norm": 0.49198072627395434, "learning_rate": 9.998571784240889e-06, "loss": 0.3805, "step": 316 }, { "epoch": 0.07544475516153984, "grad_norm": 0.45039434089900343, "learning_rate": 9.998525343503676e-06, "loss": 0.3578, "step": 317 }, { "epoch": 0.07568275123460463, "grad_norm": 0.4567179079014157, "learning_rate": 9.998478159861938e-06, "loss": 0.4384, "step": 318 }, { "epoch": 0.07592074730766943, "grad_norm": 0.44281871668390654, "learning_rate": 9.99843023332269e-06, "loss": 0.4717, "step": 319 }, { "epoch": 0.07615874338073422, "grad_norm": 0.5238647769899825, "learning_rate": 9.998381563893056e-06, "loss": 0.4097, "step": 320 }, { "epoch": 0.07639673945379902, "grad_norm": 0.4477956066826517, "learning_rate": 9.998332151580266e-06, "loss": 0.3769, "step": 321 }, { "epoch": 0.07663473552686381, "grad_norm": 0.44274206034195474, "learning_rate": 9.998281996391665e-06, "loss": 0.4546, "step": 322 }, { "epoch": 0.0768727315999286, "grad_norm": 0.43869730085122627, "learning_rate": 9.998231098334708e-06, "loss": 0.4046, "step": 323 }, { "epoch": 0.0771107276729934, "grad_norm": 0.5146875829687024, "learning_rate": 9.99817945741696e-06, "loss": 0.3706, "step": 324 }, { "epoch": 0.0773487237460582, "grad_norm": 0.4036470374218696, "learning_rate": 9.998127073646095e-06, "loss": 0.4352, "step": 325 }, { "epoch": 0.07758671981912299, "grad_norm": 0.4630902945017874, "learning_rate": 9.998073947029899e-06, "loss": 0.4768, "step": 326 }, { "epoch": 0.07782471589218778, "grad_norm": 0.5458520408430351, "learning_rate": 9.99802007757627e-06, "loss": 0.3639, "step": 327 }, { "epoch": 0.07806271196525258, "grad_norm": 0.42241640395474855, "learning_rate": 9.997965465293208e-06, "loss": 0.3646, "step": 328 }, { "epoch": 0.07830070803831737, "grad_norm": 0.4473593312203704, "learning_rate": 9.99791011018884e-06, "loss": 0.4494, "step": 329 }, { "epoch": 0.07853870411138217, "grad_norm": 0.4617723155081359, "learning_rate": 9.997854012271383e-06, "loss": 0.4168, "step": 330 }, { "epoch": 0.07877670018444696, "grad_norm": 0.4796048928242152, "learning_rate": 9.99779717154918e-06, "loss": 0.3781, "step": 331 }, { "epoch": 0.07901469625751176, "grad_norm": 0.4409776472086194, "learning_rate": 9.99773958803068e-06, "loss": 0.4194, "step": 332 }, { "epoch": 0.07925269233057655, "grad_norm": 0.49694630682051916, "learning_rate": 9.997681261724436e-06, "loss": 0.4955, "step": 333 }, { "epoch": 0.07949068840364135, "grad_norm": 0.5521495917382526, "learning_rate": 9.99762219263912e-06, "loss": 0.3829, "step": 334 }, { "epoch": 0.07972868447670614, "grad_norm": 0.4448039189655745, "learning_rate": 9.997562380783512e-06, "loss": 0.3625, "step": 335 }, { "epoch": 0.07996668054977094, "grad_norm": 0.4233213810496793, "learning_rate": 9.997501826166502e-06, "loss": 0.4431, "step": 336 }, { "epoch": 0.08020467662283573, "grad_norm": 0.4511409424625309, "learning_rate": 9.997440528797087e-06, "loss": 0.4574, "step": 337 }, { "epoch": 0.08044267269590052, "grad_norm": 0.5076129237328914, "learning_rate": 9.997378488684376e-06, "loss": 0.3615, "step": 338 }, { "epoch": 0.0806806687689653, "grad_norm": 0.41838318534134256, "learning_rate": 9.997315705837596e-06, "loss": 0.3779, "step": 339 }, { "epoch": 0.0809186648420301, "grad_norm": 0.44373668851344966, "learning_rate": 9.997252180266074e-06, "loss": 0.4479, "step": 340 }, { "epoch": 0.0811566609150949, "grad_norm": 0.5204118151673891, "learning_rate": 9.997187911979252e-06, "loss": 0.4187, "step": 341 }, { "epoch": 0.08139465698815969, "grad_norm": 0.5043252971282625, "learning_rate": 9.99712290098668e-06, "loss": 0.3434, "step": 342 }, { "epoch": 0.08163265306122448, "grad_norm": 0.44526891013124525, "learning_rate": 9.997057147298024e-06, "loss": 0.4474, "step": 343 }, { "epoch": 0.08187064913428928, "grad_norm": 0.477769396408338, "learning_rate": 9.996990650923053e-06, "loss": 0.4611, "step": 344 }, { "epoch": 0.08210864520735407, "grad_norm": 0.5121141417084246, "learning_rate": 9.996923411871653e-06, "loss": 0.3596, "step": 345 }, { "epoch": 0.08234664128041887, "grad_norm": 0.4424669784191923, "learning_rate": 9.996855430153816e-06, "loss": 0.3949, "step": 346 }, { "epoch": 0.08258463735348366, "grad_norm": 0.4508136073799909, "learning_rate": 9.996786705779645e-06, "loss": 0.4567, "step": 347 }, { "epoch": 0.08282263342654846, "grad_norm": 0.5106951134749175, "learning_rate": 9.996717238759355e-06, "loss": 0.3982, "step": 348 }, { "epoch": 0.08306062949961325, "grad_norm": 0.4710547751602934, "learning_rate": 9.996647029103271e-06, "loss": 0.334, "step": 349 }, { "epoch": 0.08329862557267805, "grad_norm": 0.45527398227968663, "learning_rate": 9.996576076821827e-06, "loss": 0.4494, "step": 350 }, { "epoch": 0.08353662164574284, "grad_norm": 0.43961786486243276, "learning_rate": 9.99650438192557e-06, "loss": 0.4702, "step": 351 }, { "epoch": 0.08377461771880763, "grad_norm": 0.4920860251140083, "learning_rate": 9.996431944425154e-06, "loss": 0.3757, "step": 352 }, { "epoch": 0.08401261379187243, "grad_norm": 0.4544455547851751, "learning_rate": 9.996358764331344e-06, "loss": 0.3715, "step": 353 }, { "epoch": 0.08425060986493722, "grad_norm": 0.4383129033433832, "learning_rate": 9.996284841655017e-06, "loss": 0.4829, "step": 354 }, { "epoch": 0.08448860593800202, "grad_norm": 0.4911519487114972, "learning_rate": 9.996210176407163e-06, "loss": 0.4288, "step": 355 }, { "epoch": 0.08472660201106681, "grad_norm": 0.4165648910362421, "learning_rate": 9.996134768598874e-06, "loss": 0.3398, "step": 356 }, { "epoch": 0.0849645980841316, "grad_norm": 0.4583575292505661, "learning_rate": 9.996058618241362e-06, "loss": 0.4005, "step": 357 }, { "epoch": 0.0852025941571964, "grad_norm": 0.41965836884164454, "learning_rate": 9.995981725345941e-06, "loss": 0.4569, "step": 358 }, { "epoch": 0.0854405902302612, "grad_norm": 0.45798270095341687, "learning_rate": 9.995904089924043e-06, "loss": 0.4057, "step": 359 }, { "epoch": 0.08567858630332599, "grad_norm": 0.46062392413472286, "learning_rate": 9.995825711987202e-06, "loss": 0.3556, "step": 360 }, { "epoch": 0.08591658237639078, "grad_norm": 0.4533838682476381, "learning_rate": 9.99574659154707e-06, "loss": 0.4538, "step": 361 }, { "epoch": 0.08615457844945558, "grad_norm": 0.46749855571344134, "learning_rate": 9.995666728615407e-06, "loss": 0.4573, "step": 362 }, { "epoch": 0.08639257452252037, "grad_norm": 0.41387520189963245, "learning_rate": 9.995586123204083e-06, "loss": 0.3657, "step": 363 }, { "epoch": 0.08663057059558517, "grad_norm": 0.4516587880902474, "learning_rate": 9.995504775325073e-06, "loss": 0.4202, "step": 364 }, { "epoch": 0.08686856666864996, "grad_norm": 0.42225242395315815, "learning_rate": 9.99542268499047e-06, "loss": 0.4667, "step": 365 }, { "epoch": 0.08710656274171476, "grad_norm": 0.44765554289425097, "learning_rate": 9.995339852212478e-06, "loss": 0.4053, "step": 366 }, { "epoch": 0.08734455881477955, "grad_norm": 0.40737919321911104, "learning_rate": 9.995256277003403e-06, "loss": 0.3616, "step": 367 }, { "epoch": 0.08758255488784435, "grad_norm": 0.47373973714542855, "learning_rate": 9.99517195937567e-06, "loss": 0.4084, "step": 368 }, { "epoch": 0.08782055096090914, "grad_norm": 0.4530682065428116, "learning_rate": 9.995086899341808e-06, "loss": 0.4571, "step": 369 }, { "epoch": 0.08805854703397394, "grad_norm": 1.3913274762621268, "learning_rate": 9.995001096914462e-06, "loss": 0.3454, "step": 370 }, { "epoch": 0.08829654310703873, "grad_norm": 0.47478901107913046, "learning_rate": 9.99491455210638e-06, "loss": 0.3953, "step": 371 }, { "epoch": 0.08853453918010352, "grad_norm": 0.5018434335802849, "learning_rate": 9.994827264930432e-06, "loss": 0.4965, "step": 372 }, { "epoch": 0.08877253525316832, "grad_norm": 0.4450551192390846, "learning_rate": 9.994739235399581e-06, "loss": 0.4243, "step": 373 }, { "epoch": 0.08901053132623311, "grad_norm": 0.4515333750014817, "learning_rate": 9.994650463526918e-06, "loss": 0.3676, "step": 374 }, { "epoch": 0.08924852739929791, "grad_norm": 0.48851016231525185, "learning_rate": 9.994560949325636e-06, "loss": 0.467, "step": 375 }, { "epoch": 0.0894865234723627, "grad_norm": 0.4195656189924732, "learning_rate": 9.994470692809036e-06, "loss": 0.4756, "step": 376 }, { "epoch": 0.0897245195454275, "grad_norm": 0.43584449035060197, "learning_rate": 9.994379693990533e-06, "loss": 0.3715, "step": 377 }, { "epoch": 0.08996251561849229, "grad_norm": 0.4385497336895332, "learning_rate": 9.994287952883652e-06, "loss": 0.3578, "step": 378 }, { "epoch": 0.09020051169155709, "grad_norm": 0.47979109705837614, "learning_rate": 9.994195469502031e-06, "loss": 0.4422, "step": 379 }, { "epoch": 0.09043850776462188, "grad_norm": 0.4035725189989237, "learning_rate": 9.994102243859412e-06, "loss": 0.4285, "step": 380 }, { "epoch": 0.09067650383768668, "grad_norm": 0.40727590335667724, "learning_rate": 9.99400827596965e-06, "loss": 0.3412, "step": 381 }, { "epoch": 0.09091449991075147, "grad_norm": 0.4480428671921397, "learning_rate": 9.993913565846713e-06, "loss": 0.4233, "step": 382 }, { "epoch": 0.09115249598381626, "grad_norm": 0.45603055794812214, "learning_rate": 9.993818113504678e-06, "loss": 0.4748, "step": 383 }, { "epoch": 0.09139049205688106, "grad_norm": 0.4662947179384981, "learning_rate": 9.993721918957728e-06, "loss": 0.3761, "step": 384 }, { "epoch": 0.09162848812994585, "grad_norm": 0.4974648803891763, "learning_rate": 9.993624982220164e-06, "loss": 0.3555, "step": 385 }, { "epoch": 0.09186648420301065, "grad_norm": 0.4643066974713487, "learning_rate": 9.99352730330639e-06, "loss": 0.4187, "step": 386 }, { "epoch": 0.09210448027607544, "grad_norm": 0.43172365645329547, "learning_rate": 9.993428882230925e-06, "loss": 0.4664, "step": 387 }, { "epoch": 0.09234247634914024, "grad_norm": 0.4714894865640499, "learning_rate": 9.993329719008397e-06, "loss": 0.3685, "step": 388 }, { "epoch": 0.09258047242220503, "grad_norm": 0.41913422983030557, "learning_rate": 9.993229813653544e-06, "loss": 0.3675, "step": 389 }, { "epoch": 0.09281846849526983, "grad_norm": 0.43975663296852435, "learning_rate": 9.993129166181215e-06, "loss": 0.4556, "step": 390 }, { "epoch": 0.09305646456833462, "grad_norm": 0.4494552356684672, "learning_rate": 9.993027776606365e-06, "loss": 0.4227, "step": 391 }, { "epoch": 0.09329446064139942, "grad_norm": 0.4288268452457883, "learning_rate": 9.992925644944068e-06, "loss": 0.3461, "step": 392 }, { "epoch": 0.09353245671446421, "grad_norm": 0.4489798380818497, "learning_rate": 9.992822771209501e-06, "loss": 0.4179, "step": 393 }, { "epoch": 0.093770452787529, "grad_norm": 0.4509441961780933, "learning_rate": 9.992719155417954e-06, "loss": 0.4886, "step": 394 }, { "epoch": 0.0940084488605938, "grad_norm": 0.4449441936420067, "learning_rate": 9.992614797584825e-06, "loss": 0.3609, "step": 395 }, { "epoch": 0.0942464449336586, "grad_norm": 0.4517902195455245, "learning_rate": 9.992509697725627e-06, "loss": 0.3913, "step": 396 }, { "epoch": 0.09448444100672339, "grad_norm": 0.4272371117143219, "learning_rate": 9.992403855855979e-06, "loss": 0.4825, "step": 397 }, { "epoch": 0.09472243707978818, "grad_norm": 0.49237207377468334, "learning_rate": 9.992297271991611e-06, "loss": 0.4321, "step": 398 }, { "epoch": 0.09496043315285298, "grad_norm": 0.4742011509459983, "learning_rate": 9.992189946148366e-06, "loss": 0.3706, "step": 399 }, { "epoch": 0.09519842922591777, "grad_norm": 0.481623368264721, "learning_rate": 9.992081878342196e-06, "loss": 0.433, "step": 400 }, { "epoch": 0.09543642529898257, "grad_norm": 0.43969308523488704, "learning_rate": 9.991973068589157e-06, "loss": 0.497, "step": 401 }, { "epoch": 0.09567442137204736, "grad_norm": 0.45002640361694246, "learning_rate": 9.991863516905428e-06, "loss": 0.3763, "step": 402 }, { "epoch": 0.09591241744511216, "grad_norm": 0.4140916324473366, "learning_rate": 9.991753223307283e-06, "loss": 0.4141, "step": 403 }, { "epoch": 0.09615041351817695, "grad_norm": 0.4987232508457122, "learning_rate": 9.991642187811122e-06, "loss": 0.4597, "step": 404 }, { "epoch": 0.09638840959124174, "grad_norm": 0.502335430018636, "learning_rate": 9.991530410433446e-06, "loss": 0.4302, "step": 405 }, { "epoch": 0.09662640566430654, "grad_norm": 0.4984534349500167, "learning_rate": 9.991417891190864e-06, "loss": 0.3477, "step": 406 }, { "epoch": 0.09686440173737133, "grad_norm": 0.43148480208491835, "learning_rate": 9.991304630100103e-06, "loss": 0.4022, "step": 407 }, { "epoch": 0.09710239781043613, "grad_norm": 0.42523411463325067, "learning_rate": 9.991190627177993e-06, "loss": 0.4572, "step": 408 }, { "epoch": 0.09734039388350092, "grad_norm": 0.5251992385161286, "learning_rate": 9.991075882441482e-06, "loss": 0.3742, "step": 409 }, { "epoch": 0.09757838995656572, "grad_norm": 0.5071319884695096, "learning_rate": 9.990960395907621e-06, "loss": 0.3707, "step": 410 }, { "epoch": 0.09781638602963051, "grad_norm": 0.449266770757162, "learning_rate": 9.990844167593574e-06, "loss": 0.4296, "step": 411 }, { "epoch": 0.0980543821026953, "grad_norm": 0.48923465192436794, "learning_rate": 9.990727197516617e-06, "loss": 0.4722, "step": 412 }, { "epoch": 0.0982923781757601, "grad_norm": 0.525715202859848, "learning_rate": 9.990609485694133e-06, "loss": 0.3591, "step": 413 }, { "epoch": 0.0985303742488249, "grad_norm": 0.4460753119588959, "learning_rate": 9.990491032143619e-06, "loss": 0.3947, "step": 414 }, { "epoch": 0.09876837032188969, "grad_norm": 0.4540712349304704, "learning_rate": 9.990371836882678e-06, "loss": 0.4902, "step": 415 }, { "epoch": 0.09900636639495448, "grad_norm": 0.45967087446771393, "learning_rate": 9.990251899929026e-06, "loss": 0.3735, "step": 416 }, { "epoch": 0.09924436246801928, "grad_norm": 0.5344550556485966, "learning_rate": 9.990131221300489e-06, "loss": 0.3667, "step": 417 }, { "epoch": 0.09948235854108407, "grad_norm": 0.44602952588771577, "learning_rate": 9.990009801015003e-06, "loss": 0.4267, "step": 418 }, { "epoch": 0.09972035461414887, "grad_norm": 0.5195582033175876, "learning_rate": 9.989887639090614e-06, "loss": 0.4233, "step": 419 }, { "epoch": 0.09995835068721366, "grad_norm": 0.5142299290549335, "learning_rate": 9.989764735545477e-06, "loss": 0.378, "step": 420 }, { "epoch": 0.10019634676027846, "grad_norm": 0.4854121376426571, "learning_rate": 9.98964109039786e-06, "loss": 0.361, "step": 421 }, { "epoch": 0.10043434283334325, "grad_norm": 0.4550651737012537, "learning_rate": 9.98951670366614e-06, "loss": 0.4618, "step": 422 }, { "epoch": 0.10067233890640805, "grad_norm": 0.5234336286250443, "learning_rate": 9.989391575368802e-06, "loss": 0.3958, "step": 423 }, { "epoch": 0.10091033497947284, "grad_norm": 0.47063073761951685, "learning_rate": 9.989265705524444e-06, "loss": 0.3747, "step": 424 }, { "epoch": 0.10114833105253764, "grad_norm": 0.46952341342697657, "learning_rate": 9.989139094151773e-06, "loss": 0.4112, "step": 425 }, { "epoch": 0.10138632712560243, "grad_norm": 0.42526909585554723, "learning_rate": 9.98901174126961e-06, "loss": 0.4603, "step": 426 }, { "epoch": 0.10162432319866722, "grad_norm": 0.43907211057538165, "learning_rate": 9.988883646896877e-06, "loss": 0.3813, "step": 427 }, { "epoch": 0.10186231927173202, "grad_norm": 0.4940877209700441, "learning_rate": 9.988754811052616e-06, "loss": 0.355, "step": 428 }, { "epoch": 0.10210031534479681, "grad_norm": 0.4302285563808865, "learning_rate": 9.988625233755975e-06, "loss": 0.4783, "step": 429 }, { "epoch": 0.10233831141786161, "grad_norm": 0.4399773493860619, "learning_rate": 9.988494915026213e-06, "loss": 0.4771, "step": 430 }, { "epoch": 0.1025763074909264, "grad_norm": 0.5749592576382808, "learning_rate": 9.988363854882694e-06, "loss": 0.3388, "step": 431 }, { "epoch": 0.1028143035639912, "grad_norm": 0.4171737508333019, "learning_rate": 9.988232053344901e-06, "loss": 0.4032, "step": 432 }, { "epoch": 0.10305229963705599, "grad_norm": 0.44690528937324775, "learning_rate": 9.988099510432422e-06, "loss": 0.4354, "step": 433 }, { "epoch": 0.10329029571012079, "grad_norm": 0.7413219284083276, "learning_rate": 9.987966226164958e-06, "loss": 0.3871, "step": 434 }, { "epoch": 0.10352829178318558, "grad_norm": 0.572534433584837, "learning_rate": 9.987832200562315e-06, "loss": 0.3343, "step": 435 }, { "epoch": 0.10376628785625037, "grad_norm": 0.4024474823593315, "learning_rate": 9.987697433644414e-06, "loss": 0.4353, "step": 436 }, { "epoch": 0.10400428392931517, "grad_norm": 0.5053428492479292, "learning_rate": 9.987561925431283e-06, "loss": 0.456, "step": 437 }, { "epoch": 0.10424228000237996, "grad_norm": 0.5684801045695161, "learning_rate": 9.987425675943067e-06, "loss": 0.3426, "step": 438 }, { "epoch": 0.10448027607544476, "grad_norm": 0.5703263329939863, "learning_rate": 9.98728868520001e-06, "loss": 0.417, "step": 439 }, { "epoch": 0.10471827214850955, "grad_norm": 0.5736516226532222, "learning_rate": 9.987150953222476e-06, "loss": 0.4352, "step": 440 }, { "epoch": 0.10495626822157435, "grad_norm": 0.4287015110638932, "learning_rate": 9.987012480030934e-06, "loss": 0.4033, "step": 441 }, { "epoch": 0.10519426429463914, "grad_norm": 0.46545342863477973, "learning_rate": 9.986873265645965e-06, "loss": 0.3712, "step": 442 }, { "epoch": 0.10543226036770394, "grad_norm": 0.4709132222065424, "learning_rate": 9.98673331008826e-06, "loss": 0.4198, "step": 443 }, { "epoch": 0.10567025644076873, "grad_norm": 0.5082536813625272, "learning_rate": 9.986592613378616e-06, "loss": 0.4497, "step": 444 }, { "epoch": 0.10590825251383353, "grad_norm": 0.4722946115830572, "learning_rate": 9.98645117553795e-06, "loss": 0.3466, "step": 445 }, { "epoch": 0.10614624858689832, "grad_norm": 1.2081855072445622, "learning_rate": 9.98630899658728e-06, "loss": 0.3711, "step": 446 }, { "epoch": 0.10638424465996311, "grad_norm": 0.49995351880997874, "learning_rate": 9.98616607654774e-06, "loss": 0.4477, "step": 447 }, { "epoch": 0.10662224073302791, "grad_norm": 0.5032188297810777, "learning_rate": 9.986022415440564e-06, "loss": 0.4162, "step": 448 }, { "epoch": 0.1068602368060927, "grad_norm": 0.7647181678703971, "learning_rate": 9.985878013287113e-06, "loss": 0.3694, "step": 449 }, { "epoch": 0.1070982328791575, "grad_norm": 0.6789559238085723, "learning_rate": 9.985732870108843e-06, "loss": 0.4079, "step": 450 }, { "epoch": 0.10733622895222229, "grad_norm": 0.6758534206958869, "learning_rate": 9.985586985927328e-06, "loss": 0.4949, "step": 451 }, { "epoch": 0.10757422502528709, "grad_norm": 0.63305874198617, "learning_rate": 9.98544036076425e-06, "loss": 0.3443, "step": 452 }, { "epoch": 0.10781222109835188, "grad_norm": 0.5712028770173395, "learning_rate": 9.985292994641398e-06, "loss": 0.3686, "step": 453 }, { "epoch": 0.10805021717141668, "grad_norm": 0.42711184550985515, "learning_rate": 9.98514488758068e-06, "loss": 0.4819, "step": 454 }, { "epoch": 0.10828821324448147, "grad_norm": 0.45616716468152596, "learning_rate": 9.984996039604102e-06, "loss": 0.4337, "step": 455 }, { "epoch": 0.10852620931754627, "grad_norm": 0.44388087258594616, "learning_rate": 9.98484645073379e-06, "loss": 0.3341, "step": 456 }, { "epoch": 0.10876420539061106, "grad_norm": 0.5001916577790845, "learning_rate": 9.984696120991979e-06, "loss": 0.431, "step": 457 }, { "epoch": 0.10900220146367585, "grad_norm": 0.44705716190669925, "learning_rate": 9.984545050401007e-06, "loss": 0.4467, "step": 458 }, { "epoch": 0.10924019753674065, "grad_norm": 0.45150670551113553, "learning_rate": 9.98439323898333e-06, "loss": 0.3919, "step": 459 }, { "epoch": 0.10947819360980544, "grad_norm": 0.520796434754494, "learning_rate": 9.98424068676151e-06, "loss": 0.3459, "step": 460 }, { "epoch": 0.10971618968287024, "grad_norm": 0.4208531093058114, "learning_rate": 9.984087393758218e-06, "loss": 0.444, "step": 461 }, { "epoch": 0.10995418575593503, "grad_norm": 0.45915739817950607, "learning_rate": 9.983933359996241e-06, "loss": 0.4736, "step": 462 }, { "epoch": 0.11019218182899983, "grad_norm": 0.4461278219351183, "learning_rate": 9.983778585498468e-06, "loss": 0.3765, "step": 463 }, { "epoch": 0.11043017790206462, "grad_norm": 0.40738699092689734, "learning_rate": 9.983623070287905e-06, "loss": 0.3933, "step": 464 }, { "epoch": 0.11066817397512942, "grad_norm": 0.4913832741443862, "learning_rate": 9.983466814387666e-06, "loss": 0.4552, "step": 465 }, { "epoch": 0.11090617004819421, "grad_norm": 0.530231130629648, "learning_rate": 9.983309817820972e-06, "loss": 0.3586, "step": 466 }, { "epoch": 0.111144166121259, "grad_norm": 0.4241816209307345, "learning_rate": 9.983152080611158e-06, "loss": 0.326, "step": 467 }, { "epoch": 0.1113821621943238, "grad_norm": 0.43804873868170846, "learning_rate": 9.98299360278167e-06, "loss": 0.4251, "step": 468 }, { "epoch": 0.1116201582673886, "grad_norm": 0.4393810388350614, "learning_rate": 9.982834384356057e-06, "loss": 0.4675, "step": 469 }, { "epoch": 0.11185815434045339, "grad_norm": 0.46372765200679533, "learning_rate": 9.982674425357985e-06, "loss": 0.3493, "step": 470 }, { "epoch": 0.11209615041351818, "grad_norm": 0.4436475358645403, "learning_rate": 9.982513725811228e-06, "loss": 0.4243, "step": 471 }, { "epoch": 0.11233414648658298, "grad_norm": 0.4173010199129987, "learning_rate": 9.98235228573967e-06, "loss": 0.472, "step": 472 }, { "epoch": 0.11257214255964777, "grad_norm": 0.535729642414528, "learning_rate": 9.982190105167306e-06, "loss": 0.4164, "step": 473 }, { "epoch": 0.11281013863271257, "grad_norm": 0.4772334936227505, "learning_rate": 9.982027184118236e-06, "loss": 0.349, "step": 474 }, { "epoch": 0.11304813470577735, "grad_norm": 0.47313830652761185, "learning_rate": 9.981863522616681e-06, "loss": 0.4435, "step": 475 }, { "epoch": 0.11328613077884214, "grad_norm": 0.47425353304897183, "learning_rate": 9.981699120686959e-06, "loss": 0.4695, "step": 476 }, { "epoch": 0.11352412685190694, "grad_norm": 0.44292841465417715, "learning_rate": 9.981533978353508e-06, "loss": 0.3857, "step": 477 }, { "epoch": 0.11376212292497173, "grad_norm": 0.5239164407002358, "learning_rate": 9.981368095640868e-06, "loss": 0.3726, "step": 478 }, { "epoch": 0.11400011899803653, "grad_norm": 0.44488449687995085, "learning_rate": 9.981201472573698e-06, "loss": 0.4432, "step": 479 }, { "epoch": 0.11423811507110132, "grad_norm": 0.43967538016218216, "learning_rate": 9.98103410917676e-06, "loss": 0.4076, "step": 480 }, { "epoch": 0.11447611114416611, "grad_norm": 0.739424564589288, "learning_rate": 9.980866005474928e-06, "loss": 0.3675, "step": 481 }, { "epoch": 0.11471410721723091, "grad_norm": 0.4993736830245461, "learning_rate": 9.980697161493185e-06, "loss": 0.4131, "step": 482 }, { "epoch": 0.1149521032902957, "grad_norm": 1.3276469711665722, "learning_rate": 9.980527577256629e-06, "loss": 0.4678, "step": 483 }, { "epoch": 0.1151900993633605, "grad_norm": 0.46128218449640923, "learning_rate": 9.980357252790464e-06, "loss": 0.3607, "step": 484 }, { "epoch": 0.1154280954364253, "grad_norm": 0.41316023220047166, "learning_rate": 9.980186188120002e-06, "loss": 0.351, "step": 485 }, { "epoch": 0.11566609150949009, "grad_norm": 0.482093707108989, "learning_rate": 9.980014383270668e-06, "loss": 0.3955, "step": 486 }, { "epoch": 0.11590408758255488, "grad_norm": 0.5011032501000284, "learning_rate": 9.979841838267999e-06, "loss": 0.458, "step": 487 }, { "epoch": 0.11614208365561968, "grad_norm": 0.4135106798022244, "learning_rate": 9.979668553137635e-06, "loss": 0.3878, "step": 488 }, { "epoch": 0.11638007972868447, "grad_norm": 0.44904272180640614, "learning_rate": 9.979494527905334e-06, "loss": 0.3706, "step": 489 }, { "epoch": 0.11661807580174927, "grad_norm": 0.41254283183456614, "learning_rate": 9.979319762596959e-06, "loss": 0.4731, "step": 490 }, { "epoch": 0.11685607187481406, "grad_norm": 0.4326477250584368, "learning_rate": 9.979144257238484e-06, "loss": 0.3882, "step": 491 }, { "epoch": 0.11709406794787885, "grad_norm": 0.47385412746747546, "learning_rate": 9.978968011855996e-06, "loss": 0.3596, "step": 492 }, { "epoch": 0.11733206402094365, "grad_norm": 0.48119232908966797, "learning_rate": 9.978791026475689e-06, "loss": 0.4342, "step": 493 }, { "epoch": 0.11757006009400844, "grad_norm": 0.4441552118826538, "learning_rate": 9.978613301123864e-06, "loss": 0.4489, "step": 494 }, { "epoch": 0.11780805616707324, "grad_norm": 0.46690414644784983, "learning_rate": 9.978434835826937e-06, "loss": 0.3777, "step": 495 }, { "epoch": 0.11804605224013803, "grad_norm": 0.44970400252197734, "learning_rate": 9.978255630611432e-06, "loss": 0.3701, "step": 496 }, { "epoch": 0.11828404831320283, "grad_norm": 0.42146124042697336, "learning_rate": 9.978075685503988e-06, "loss": 0.4581, "step": 497 }, { "epoch": 0.11852204438626762, "grad_norm": 0.4287576458047421, "learning_rate": 9.977895000531343e-06, "loss": 0.4068, "step": 498 }, { "epoch": 0.11876004045933242, "grad_norm": 0.4661011135993105, "learning_rate": 9.977713575720354e-06, "loss": 0.3648, "step": 499 }, { "epoch": 0.11899803653239721, "grad_norm": 0.44336193791681305, "learning_rate": 9.977531411097985e-06, "loss": 0.3956, "step": 500 }, { "epoch": 0.119236032605462, "grad_norm": 0.40135410660420456, "learning_rate": 9.97734850669131e-06, "loss": 0.4393, "step": 501 }, { "epoch": 0.1194740286785268, "grad_norm": 0.4559807876897745, "learning_rate": 9.977164862527512e-06, "loss": 0.3646, "step": 502 }, { "epoch": 0.1197120247515916, "grad_norm": 0.4347794693076568, "learning_rate": 9.976980478633888e-06, "loss": 0.3785, "step": 503 }, { "epoch": 0.11995002082465639, "grad_norm": 0.46153771301045676, "learning_rate": 9.97679535503784e-06, "loss": 0.4069, "step": 504 }, { "epoch": 0.12018801689772118, "grad_norm": 0.424717397560516, "learning_rate": 9.976609491766883e-06, "loss": 0.4598, "step": 505 }, { "epoch": 0.12042601297078598, "grad_norm": 0.4434747636911381, "learning_rate": 9.97642288884864e-06, "loss": 0.356, "step": 506 }, { "epoch": 0.12066400904385077, "grad_norm": 0.4452496263517166, "learning_rate": 9.976235546310844e-06, "loss": 0.4129, "step": 507 }, { "epoch": 0.12090200511691557, "grad_norm": 0.4578774709736023, "learning_rate": 9.97604746418134e-06, "loss": 0.4812, "step": 508 }, { "epoch": 0.12114000118998036, "grad_norm": 0.41675783377655806, "learning_rate": 9.975858642488081e-06, "loss": 0.3771, "step": 509 }, { "epoch": 0.12137799726304516, "grad_norm": 0.40069287579572055, "learning_rate": 9.975669081259132e-06, "loss": 0.3734, "step": 510 }, { "epoch": 0.12161599333610995, "grad_norm": 0.4309476794229487, "learning_rate": 9.975478780522664e-06, "loss": 0.4206, "step": 511 }, { "epoch": 0.12185398940917475, "grad_norm": 0.46800262518826174, "learning_rate": 9.975287740306962e-06, "loss": 0.4462, "step": 512 }, { "epoch": 0.12209198548223954, "grad_norm": 0.42699693782693693, "learning_rate": 9.97509596064042e-06, "loss": 0.3419, "step": 513 }, { "epoch": 0.12232998155530433, "grad_norm": 0.4386357295282374, "learning_rate": 9.97490344155154e-06, "loss": 0.398, "step": 514 }, { "epoch": 0.12256797762836913, "grad_norm": 0.5979447179868536, "learning_rate": 9.974710183068935e-06, "loss": 0.4266, "step": 515 }, { "epoch": 0.12280597370143392, "grad_norm": 0.4391789380414681, "learning_rate": 9.97451618522133e-06, "loss": 0.3549, "step": 516 }, { "epoch": 0.12304396977449872, "grad_norm": 0.4254442498856795, "learning_rate": 9.974321448037553e-06, "loss": 0.372, "step": 517 }, { "epoch": 0.12328196584756351, "grad_norm": 0.4203710488596454, "learning_rate": 9.974125971546553e-06, "loss": 0.4029, "step": 518 }, { "epoch": 0.12351996192062831, "grad_norm": 0.4235704728114593, "learning_rate": 9.973929755777379e-06, "loss": 0.4569, "step": 519 }, { "epoch": 0.1237579579936931, "grad_norm": 0.43253576502031255, "learning_rate": 9.973732800759193e-06, "loss": 0.3386, "step": 520 }, { "epoch": 0.1239959540667579, "grad_norm": 0.4130102595578799, "learning_rate": 9.97353510652127e-06, "loss": 0.3849, "step": 521 }, { "epoch": 0.12423395013982269, "grad_norm": 0.43914912023331804, "learning_rate": 9.97333667309299e-06, "loss": 0.4497, "step": 522 }, { "epoch": 0.12447194621288749, "grad_norm": 0.3972463467100798, "learning_rate": 9.973137500503846e-06, "loss": 0.3995, "step": 523 }, { "epoch": 0.12470994228595228, "grad_norm": 0.4452000977643881, "learning_rate": 9.97293758878344e-06, "loss": 0.3296, "step": 524 }, { "epoch": 0.12494793835901707, "grad_norm": 0.4194230322974543, "learning_rate": 9.972736937961484e-06, "loss": 0.4134, "step": 525 }, { "epoch": 0.12518593443208187, "grad_norm": 0.4146732557797122, "learning_rate": 9.9725355480678e-06, "loss": 0.474, "step": 526 }, { "epoch": 0.12542393050514666, "grad_norm": 0.40700998263113747, "learning_rate": 9.972333419132319e-06, "loss": 0.383, "step": 527 }, { "epoch": 0.12566192657821146, "grad_norm": 0.4023415477618789, "learning_rate": 9.97213055118508e-06, "loss": 0.3658, "step": 528 }, { "epoch": 0.12589992265127625, "grad_norm": 0.44336944898631714, "learning_rate": 9.971926944256239e-06, "loss": 0.476, "step": 529 }, { "epoch": 0.12613791872434105, "grad_norm": 0.40291923826893716, "learning_rate": 9.971722598376054e-06, "loss": 0.4248, "step": 530 }, { "epoch": 0.12637591479740584, "grad_norm": 0.3986265875109195, "learning_rate": 9.971517513574896e-06, "loss": 0.3415, "step": 531 }, { "epoch": 0.12661391087047064, "grad_norm": 0.43032291172443293, "learning_rate": 9.971311689883247e-06, "loss": 0.4259, "step": 532 }, { "epoch": 0.12685190694353543, "grad_norm": 0.39866454624485, "learning_rate": 9.971105127331695e-06, "loss": 0.4444, "step": 533 }, { "epoch": 0.12708990301660023, "grad_norm": 0.43444250324758876, "learning_rate": 9.970897825950942e-06, "loss": 0.3788, "step": 534 }, { "epoch": 0.12732789908966502, "grad_norm": 0.419118191593301, "learning_rate": 9.970689785771798e-06, "loss": 0.3715, "step": 535 }, { "epoch": 0.12756589516272981, "grad_norm": 0.4333465547745647, "learning_rate": 9.970481006825185e-06, "loss": 0.4261, "step": 536 }, { "epoch": 0.1278038912357946, "grad_norm": 0.4353660970004805, "learning_rate": 9.970271489142127e-06, "loss": 0.4315, "step": 537 }, { "epoch": 0.1280418873088594, "grad_norm": 0.45342539645281116, "learning_rate": 9.97006123275377e-06, "loss": 0.4001, "step": 538 }, { "epoch": 0.1282798833819242, "grad_norm": 0.44126076267178, "learning_rate": 9.96985023769136e-06, "loss": 0.3999, "step": 539 }, { "epoch": 0.128517879454989, "grad_norm": 0.42194551508228195, "learning_rate": 9.969638503986256e-06, "loss": 0.4385, "step": 540 }, { "epoch": 0.1287558755280538, "grad_norm": 0.4069592336480195, "learning_rate": 9.969426031669928e-06, "loss": 0.3883, "step": 541 }, { "epoch": 0.12899387160111858, "grad_norm": 0.4178468875829833, "learning_rate": 9.969212820773952e-06, "loss": 0.3655, "step": 542 }, { "epoch": 0.12923186767418338, "grad_norm": 0.4570434100827094, "learning_rate": 9.968998871330021e-06, "loss": 0.4334, "step": 543 }, { "epoch": 0.12946986374724817, "grad_norm": 0.4166848553496299, "learning_rate": 9.968784183369929e-06, "loss": 0.4484, "step": 544 }, { "epoch": 0.12970785982031297, "grad_norm": 0.42519002468336536, "learning_rate": 9.968568756925588e-06, "loss": 0.3469, "step": 545 }, { "epoch": 0.12994585589337776, "grad_norm": 0.44962945270540455, "learning_rate": 9.968352592029011e-06, "loss": 0.3744, "step": 546 }, { "epoch": 0.13018385196644255, "grad_norm": 0.40876516260944795, "learning_rate": 9.968135688712328e-06, "loss": 0.4431, "step": 547 }, { "epoch": 0.13042184803950735, "grad_norm": 0.47209127285023517, "learning_rate": 9.967918047007775e-06, "loss": 0.3748, "step": 548 }, { "epoch": 0.13065984411257214, "grad_norm": 0.4089317804173208, "learning_rate": 9.967699666947702e-06, "loss": 0.3127, "step": 549 }, { "epoch": 0.13089784018563694, "grad_norm": 0.4235984982251744, "learning_rate": 9.96748054856456e-06, "loss": 0.4103, "step": 550 }, { "epoch": 0.13113583625870173, "grad_norm": 0.47319011240693865, "learning_rate": 9.967260691890924e-06, "loss": 0.4523, "step": 551 }, { "epoch": 0.13137383233176653, "grad_norm": 0.5145055531378172, "learning_rate": 9.967040096959462e-06, "loss": 0.3992, "step": 552 }, { "epoch": 0.13161182840483132, "grad_norm": 0.4195472052055503, "learning_rate": 9.966818763802963e-06, "loss": 0.344, "step": 553 }, { "epoch": 0.13184982447789612, "grad_norm": 0.43253895948369897, "learning_rate": 9.966596692454323e-06, "loss": 0.4628, "step": 554 }, { "epoch": 0.1320878205509609, "grad_norm": 0.418918991234706, "learning_rate": 9.966373882946546e-06, "loss": 0.3967, "step": 555 }, { "epoch": 0.1323258166240257, "grad_norm": 0.45069506384756325, "learning_rate": 9.966150335312747e-06, "loss": 0.3592, "step": 556 }, { "epoch": 0.1325638126970905, "grad_norm": 0.4112688079328426, "learning_rate": 9.965926049586154e-06, "loss": 0.4051, "step": 557 }, { "epoch": 0.1328018087701553, "grad_norm": 0.4306546680664368, "learning_rate": 9.965701025800098e-06, "loss": 0.4501, "step": 558 }, { "epoch": 0.1330398048432201, "grad_norm": 0.47742995855119286, "learning_rate": 9.965475263988024e-06, "loss": 0.3709, "step": 559 }, { "epoch": 0.13327780091628488, "grad_norm": 0.43252541349342494, "learning_rate": 9.965248764183486e-06, "loss": 0.3474, "step": 560 }, { "epoch": 0.13351579698934968, "grad_norm": 0.4244516806580105, "learning_rate": 9.965021526420146e-06, "loss": 0.4157, "step": 561 }, { "epoch": 0.13375379306241447, "grad_norm": 0.4501211384384804, "learning_rate": 9.96479355073178e-06, "loss": 0.4489, "step": 562 }, { "epoch": 0.13399178913547927, "grad_norm": 0.433684978151453, "learning_rate": 9.964564837152268e-06, "loss": 0.3325, "step": 563 }, { "epoch": 0.13422978520854406, "grad_norm": 0.4143350623973339, "learning_rate": 9.964335385715607e-06, "loss": 0.4103, "step": 564 }, { "epoch": 0.13446778128160886, "grad_norm": 0.4356764057938835, "learning_rate": 9.964105196455892e-06, "loss": 0.4426, "step": 565 }, { "epoch": 0.13470577735467365, "grad_norm": 0.43904123994773603, "learning_rate": 9.963874269407342e-06, "loss": 0.3823, "step": 566 }, { "epoch": 0.13494377342773844, "grad_norm": 0.4195562006737919, "learning_rate": 9.963642604604273e-06, "loss": 0.358, "step": 567 }, { "epoch": 0.13518176950080324, "grad_norm": 0.4134567373776786, "learning_rate": 9.963410202081118e-06, "loss": 0.3699, "step": 568 }, { "epoch": 0.13541976557386803, "grad_norm": 0.4677127657760346, "learning_rate": 9.96317706187242e-06, "loss": 0.4375, "step": 569 }, { "epoch": 0.13565776164693283, "grad_norm": 0.459487545289008, "learning_rate": 9.962943184012826e-06, "loss": 0.3227, "step": 570 }, { "epoch": 0.13589575771999762, "grad_norm": 0.46349644278250035, "learning_rate": 9.962708568537099e-06, "loss": 0.382, "step": 571 }, { "epoch": 0.13613375379306242, "grad_norm": 0.42793811916759666, "learning_rate": 9.962473215480106e-06, "loss": 0.4328, "step": 572 }, { "epoch": 0.1363717498661272, "grad_norm": 0.4132709478039074, "learning_rate": 9.962237124876828e-06, "loss": 0.4126, "step": 573 }, { "epoch": 0.136609745939192, "grad_norm": 0.4876228484575214, "learning_rate": 9.962000296762352e-06, "loss": 0.3429, "step": 574 }, { "epoch": 0.1368477420122568, "grad_norm": 0.3977015514378766, "learning_rate": 9.96176273117188e-06, "loss": 0.4365, "step": 575 }, { "epoch": 0.1370857380853216, "grad_norm": 0.4078608665053635, "learning_rate": 9.961524428140716e-06, "loss": 0.4907, "step": 576 }, { "epoch": 0.1373237341583864, "grad_norm": 0.43371567468393196, "learning_rate": 9.961285387704283e-06, "loss": 0.3612, "step": 577 }, { "epoch": 0.13756173023145118, "grad_norm": 0.4879930467640949, "learning_rate": 9.961045609898103e-06, "loss": 0.3276, "step": 578 }, { "epoch": 0.13779972630451598, "grad_norm": 0.40451489231885396, "learning_rate": 9.960805094757815e-06, "loss": 0.4356, "step": 579 }, { "epoch": 0.13803772237758077, "grad_norm": 0.44097468379761057, "learning_rate": 9.960563842319164e-06, "loss": 0.4218, "step": 580 }, { "epoch": 0.13827571845064557, "grad_norm": 0.5834242401504829, "learning_rate": 9.96032185261801e-06, "loss": 0.3877, "step": 581 }, { "epoch": 0.13851371452371036, "grad_norm": 0.45653361970962053, "learning_rate": 9.960079125690317e-06, "loss": 0.3879, "step": 582 }, { "epoch": 0.13875171059677516, "grad_norm": 0.4476140474436517, "learning_rate": 9.959835661572158e-06, "loss": 0.4525, "step": 583 }, { "epoch": 0.13898970666983995, "grad_norm": 0.4543506355962775, "learning_rate": 9.959591460299719e-06, "loss": 0.4012, "step": 584 }, { "epoch": 0.13922770274290475, "grad_norm": 0.4800146363435457, "learning_rate": 9.959346521909295e-06, "loss": 0.3524, "step": 585 }, { "epoch": 0.13946569881596954, "grad_norm": 0.4366124496205618, "learning_rate": 9.95910084643729e-06, "loss": 0.4189, "step": 586 }, { "epoch": 0.13970369488903434, "grad_norm": 0.44571391749959416, "learning_rate": 9.958854433920215e-06, "loss": 0.4703, "step": 587 }, { "epoch": 0.13994169096209913, "grad_norm": 0.49239121417665377, "learning_rate": 9.958607284394696e-06, "loss": 0.343, "step": 588 }, { "epoch": 0.14017968703516392, "grad_norm": 0.4317901276773715, "learning_rate": 9.958359397897465e-06, "loss": 0.3861, "step": 589 }, { "epoch": 0.14041768310822872, "grad_norm": 0.4147928486152337, "learning_rate": 9.958110774465364e-06, "loss": 0.4516, "step": 590 }, { "epoch": 0.1406556791812935, "grad_norm": 0.4611062418346612, "learning_rate": 9.957861414135343e-06, "loss": 0.3849, "step": 591 }, { "epoch": 0.1408936752543583, "grad_norm": 0.5052327412000163, "learning_rate": 9.957611316944465e-06, "loss": 0.4016, "step": 592 }, { "epoch": 0.1411316713274231, "grad_norm": 0.4732644464070684, "learning_rate": 9.957360482929898e-06, "loss": 0.3911, "step": 593 }, { "epoch": 0.1413696674004879, "grad_norm": 0.45055177739669783, "learning_rate": 9.957108912128927e-06, "loss": 0.4721, "step": 594 }, { "epoch": 0.1416076634735527, "grad_norm": 0.5207702754193928, "learning_rate": 9.956856604578937e-06, "loss": 0.3663, "step": 595 }, { "epoch": 0.1418456595466175, "grad_norm": 0.4532039740900448, "learning_rate": 9.95660356031743e-06, "loss": 0.3646, "step": 596 }, { "epoch": 0.14208365561968228, "grad_norm": 0.45255299216112244, "learning_rate": 9.956349779382014e-06, "loss": 0.4761, "step": 597 }, { "epoch": 0.14232165169274708, "grad_norm": 0.4294348039394876, "learning_rate": 9.956095261810404e-06, "loss": 0.4312, "step": 598 }, { "epoch": 0.14255964776581187, "grad_norm": 0.4145607881038628, "learning_rate": 9.955840007640432e-06, "loss": 0.3167, "step": 599 }, { "epoch": 0.14279764383887666, "grad_norm": 0.44934929004475394, "learning_rate": 9.955584016910033e-06, "loss": 0.3974, "step": 600 }, { "epoch": 0.14303563991194146, "grad_norm": 0.39995571799722107, "learning_rate": 9.955327289657253e-06, "loss": 0.4574, "step": 601 }, { "epoch": 0.14327363598500625, "grad_norm": 0.4365868901329556, "learning_rate": 9.955069825920249e-06, "loss": 0.3627, "step": 602 }, { "epoch": 0.14351163205807105, "grad_norm": 0.4542607300367256, "learning_rate": 9.954811625737289e-06, "loss": 0.3208, "step": 603 }, { "epoch": 0.14374962813113584, "grad_norm": 0.42854644501077194, "learning_rate": 9.954552689146743e-06, "loss": 0.4154, "step": 604 }, { "epoch": 0.14398762420420064, "grad_norm": 0.43253450331881993, "learning_rate": 9.954293016187098e-06, "loss": 0.4209, "step": 605 }, { "epoch": 0.14422562027726543, "grad_norm": 0.4283765316302236, "learning_rate": 9.954032606896946e-06, "loss": 0.3312, "step": 606 }, { "epoch": 0.14446361635033023, "grad_norm": 0.4341091606907487, "learning_rate": 9.953771461314994e-06, "loss": 0.4196, "step": 607 }, { "epoch": 0.14470161242339502, "grad_norm": 0.41550595013594926, "learning_rate": 9.953509579480052e-06, "loss": 0.4728, "step": 608 }, { "epoch": 0.14493960849645982, "grad_norm": 0.41104915967555633, "learning_rate": 9.953246961431043e-06, "loss": 0.3532, "step": 609 }, { "epoch": 0.1451776045695246, "grad_norm": 0.44491387505754176, "learning_rate": 9.952983607206996e-06, "loss": 0.3567, "step": 610 }, { "epoch": 0.1454156006425894, "grad_norm": 0.41859179197947394, "learning_rate": 9.952719516847055e-06, "loss": 0.4096, "step": 611 }, { "epoch": 0.1456535967156542, "grad_norm": 0.422145361038327, "learning_rate": 9.95245469039047e-06, "loss": 0.4246, "step": 612 }, { "epoch": 0.145891592788719, "grad_norm": 0.4500980509847598, "learning_rate": 9.9521891278766e-06, "loss": 0.3426, "step": 613 }, { "epoch": 0.1461295888617838, "grad_norm": 0.43000982194705367, "learning_rate": 9.951922829344914e-06, "loss": 0.3946, "step": 614 }, { "epoch": 0.14636758493484858, "grad_norm": 0.41053172531397664, "learning_rate": 9.951655794834991e-06, "loss": 0.4663, "step": 615 }, { "epoch": 0.14660558100791338, "grad_norm": 0.3923350216414785, "learning_rate": 9.951388024386519e-06, "loss": 0.3757, "step": 616 }, { "epoch": 0.14684357708097817, "grad_norm": 0.4599433113284599, "learning_rate": 9.951119518039297e-06, "loss": 0.3385, "step": 617 }, { "epoch": 0.14708157315404297, "grad_norm": 0.39908104730389854, "learning_rate": 9.950850275833226e-06, "loss": 0.4074, "step": 618 }, { "epoch": 0.14731956922710776, "grad_norm": 0.4511403028324386, "learning_rate": 9.950580297808329e-06, "loss": 0.4194, "step": 619 }, { "epoch": 0.14755756530017256, "grad_norm": 0.4617542670919041, "learning_rate": 9.950309584004728e-06, "loss": 0.3355, "step": 620 }, { "epoch": 0.14779556137323735, "grad_norm": 0.4226130485474674, "learning_rate": 9.950038134462655e-06, "loss": 0.3681, "step": 621 }, { "epoch": 0.14803355744630214, "grad_norm": 0.4247694573529077, "learning_rate": 9.949765949222461e-06, "loss": 0.4761, "step": 622 }, { "epoch": 0.14827155351936694, "grad_norm": 0.4516914981541404, "learning_rate": 9.949493028324593e-06, "loss": 0.417, "step": 623 }, { "epoch": 0.14850954959243173, "grad_norm": 0.45995529369267013, "learning_rate": 9.949219371809618e-06, "loss": 0.355, "step": 624 }, { "epoch": 0.14874754566549653, "grad_norm": 0.4610870216065006, "learning_rate": 9.948944979718206e-06, "loss": 0.4184, "step": 625 }, { "epoch": 0.14898554173856132, "grad_norm": 0.46221460180343904, "learning_rate": 9.94866985209114e-06, "loss": 0.4689, "step": 626 }, { "epoch": 0.14922353781162612, "grad_norm": 0.4241292231021412, "learning_rate": 9.948393988969307e-06, "loss": 0.3911, "step": 627 }, { "epoch": 0.1494615338846909, "grad_norm": 0.40337434368955516, "learning_rate": 9.948117390393713e-06, "loss": 0.3635, "step": 628 }, { "epoch": 0.1496995299577557, "grad_norm": 0.40666940123628886, "learning_rate": 9.947840056405461e-06, "loss": 0.4265, "step": 629 }, { "epoch": 0.1499375260308205, "grad_norm": 0.408720859695881, "learning_rate": 9.947561987045777e-06, "loss": 0.4675, "step": 630 }, { "epoch": 0.1501755221038853, "grad_norm": 0.46059437009326065, "learning_rate": 9.947283182355982e-06, "loss": 0.3846, "step": 631 }, { "epoch": 0.1504135181769501, "grad_norm": 0.40219876977724317, "learning_rate": 9.947003642377517e-06, "loss": 0.3583, "step": 632 }, { "epoch": 0.15065151425001488, "grad_norm": 0.40741994114516356, "learning_rate": 9.946723367151929e-06, "loss": 0.4467, "step": 633 }, { "epoch": 0.15088951032307968, "grad_norm": 0.4170974461418337, "learning_rate": 9.94644235672087e-06, "loss": 0.3964, "step": 634 }, { "epoch": 0.15112750639614447, "grad_norm": 0.3959538443481256, "learning_rate": 9.94616061112611e-06, "loss": 0.3624, "step": 635 }, { "epoch": 0.15136550246920927, "grad_norm": 0.41399916071040127, "learning_rate": 9.94587813040952e-06, "loss": 0.3988, "step": 636 }, { "epoch": 0.15160349854227406, "grad_norm": 0.42565733949269147, "learning_rate": 9.945594914613085e-06, "loss": 0.4244, "step": 637 }, { "epoch": 0.15184149461533886, "grad_norm": 0.4176195376045644, "learning_rate": 9.945310963778897e-06, "loss": 0.3422, "step": 638 }, { "epoch": 0.15207949068840365, "grad_norm": 0.4091797435607871, "learning_rate": 9.945026277949159e-06, "loss": 0.391, "step": 639 }, { "epoch": 0.15231748676146845, "grad_norm": 0.3752152693514474, "learning_rate": 9.944740857166181e-06, "loss": 0.4218, "step": 640 }, { "epoch": 0.15255548283453324, "grad_norm": 0.7050883051614025, "learning_rate": 9.944454701472387e-06, "loss": 0.3827, "step": 641 }, { "epoch": 0.15279347890759803, "grad_norm": 0.4433283675728129, "learning_rate": 9.944167810910304e-06, "loss": 0.3533, "step": 642 }, { "epoch": 0.15303147498066283, "grad_norm": 0.42554987379842735, "learning_rate": 9.94388018552257e-06, "loss": 0.4089, "step": 643 }, { "epoch": 0.15326947105372762, "grad_norm": 0.45679111789324556, "learning_rate": 9.943591825351934e-06, "loss": 0.4456, "step": 644 }, { "epoch": 0.15350746712679242, "grad_norm": 0.42999137088818135, "learning_rate": 9.943302730441258e-06, "loss": 0.3426, "step": 645 }, { "epoch": 0.1537454631998572, "grad_norm": 0.43170003868689516, "learning_rate": 9.943012900833503e-06, "loss": 0.3613, "step": 646 }, { "epoch": 0.153983459272922, "grad_norm": 0.40514208501535537, "learning_rate": 9.942722336571746e-06, "loss": 0.459, "step": 647 }, { "epoch": 0.1542214553459868, "grad_norm": 0.42503224905946857, "learning_rate": 9.942431037699171e-06, "loss": 0.4123, "step": 648 }, { "epoch": 0.1544594514190516, "grad_norm": 0.43534422977552745, "learning_rate": 9.942139004259077e-06, "loss": 0.3489, "step": 649 }, { "epoch": 0.1546974474921164, "grad_norm": 0.4042874898448957, "learning_rate": 9.941846236294863e-06, "loss": 0.417, "step": 650 }, { "epoch": 0.15493544356518119, "grad_norm": 0.41685565190135837, "learning_rate": 9.941552733850044e-06, "loss": 0.4741, "step": 651 }, { "epoch": 0.15517343963824598, "grad_norm": 0.41165103596561803, "learning_rate": 9.941258496968238e-06, "loss": 0.3744, "step": 652 }, { "epoch": 0.15541143571131077, "grad_norm": 0.4215966209056301, "learning_rate": 9.940963525693181e-06, "loss": 0.3437, "step": 653 }, { "epoch": 0.15564943178437557, "grad_norm": 0.4524386189484754, "learning_rate": 9.94066782006871e-06, "loss": 0.4291, "step": 654 }, { "epoch": 0.15588742785744036, "grad_norm": 0.43482975599739715, "learning_rate": 9.940371380138774e-06, "loss": 0.4164, "step": 655 }, { "epoch": 0.15612542393050516, "grad_norm": 0.4120821183177027, "learning_rate": 9.940074205947432e-06, "loss": 0.3363, "step": 656 }, { "epoch": 0.15636342000356995, "grad_norm": 0.4309229287404229, "learning_rate": 9.939776297538853e-06, "loss": 0.389, "step": 657 }, { "epoch": 0.15660141607663475, "grad_norm": 0.416688460486632, "learning_rate": 9.93947765495731e-06, "loss": 0.4333, "step": 658 }, { "epoch": 0.15683941214969954, "grad_norm": 0.3965370666176588, "learning_rate": 9.939178278247192e-06, "loss": 0.3478, "step": 659 }, { "epoch": 0.15707740822276434, "grad_norm": 0.439104289476318, "learning_rate": 9.938878167452991e-06, "loss": 0.3427, "step": 660 }, { "epoch": 0.15731540429582913, "grad_norm": 0.398185404048493, "learning_rate": 9.938577322619315e-06, "loss": 0.3966, "step": 661 }, { "epoch": 0.15755340036889393, "grad_norm": 0.4488481583859823, "learning_rate": 9.938275743790872e-06, "loss": 0.4251, "step": 662 }, { "epoch": 0.15779139644195872, "grad_norm": 0.4417275681933899, "learning_rate": 9.937973431012488e-06, "loss": 0.3735, "step": 663 }, { "epoch": 0.15802939251502351, "grad_norm": 0.4693818789749731, "learning_rate": 9.937670384329092e-06, "loss": 0.3718, "step": 664 }, { "epoch": 0.1582673885880883, "grad_norm": 0.44060498282158367, "learning_rate": 9.937366603785725e-06, "loss": 0.4519, "step": 665 }, { "epoch": 0.1585053846611531, "grad_norm": 0.4132724605397321, "learning_rate": 9.937062089427534e-06, "loss": 0.3665, "step": 666 }, { "epoch": 0.1587433807342179, "grad_norm": 0.43487153487168945, "learning_rate": 9.936756841299782e-06, "loss": 0.3501, "step": 667 }, { "epoch": 0.1589813768072827, "grad_norm": 0.4454465202025332, "learning_rate": 9.936450859447833e-06, "loss": 0.4349, "step": 668 }, { "epoch": 0.1592193728803475, "grad_norm": 0.43517274964693736, "learning_rate": 9.936144143917164e-06, "loss": 0.4639, "step": 669 }, { "epoch": 0.15945736895341228, "grad_norm": 0.4325185752961809, "learning_rate": 9.935836694753363e-06, "loss": 0.355, "step": 670 }, { "epoch": 0.15969536502647708, "grad_norm": 0.41185000783549897, "learning_rate": 9.93552851200212e-06, "loss": 0.3643, "step": 671 }, { "epoch": 0.15993336109954187, "grad_norm": 0.44265593925954544, "learning_rate": 9.935219595709242e-06, "loss": 0.4427, "step": 672 }, { "epoch": 0.16017135717260667, "grad_norm": 0.47046598610709517, "learning_rate": 9.93490994592064e-06, "loss": 0.3921, "step": 673 }, { "epoch": 0.16040935324567146, "grad_norm": 0.427596260881768, "learning_rate": 9.934599562682337e-06, "loss": 0.3758, "step": 674 }, { "epoch": 0.16064734931873625, "grad_norm": 0.42891689676165107, "learning_rate": 9.934288446040462e-06, "loss": 0.3723, "step": 675 }, { "epoch": 0.16088534539180105, "grad_norm": 0.40195671977273745, "learning_rate": 9.933976596041257e-06, "loss": 0.4701, "step": 676 }, { "epoch": 0.16112334146486584, "grad_norm": 0.43868503957164495, "learning_rate": 9.933664012731067e-06, "loss": 0.3516, "step": 677 }, { "epoch": 0.1613613375379306, "grad_norm": 0.45694726273935127, "learning_rate": 9.933350696156354e-06, "loss": 0.3707, "step": 678 }, { "epoch": 0.1615993336109954, "grad_norm": 0.42986712701679414, "learning_rate": 9.933036646363681e-06, "loss": 0.4296, "step": 679 }, { "epoch": 0.1618373296840602, "grad_norm": 0.38103639863281863, "learning_rate": 9.932721863399726e-06, "loss": 0.4153, "step": 680 }, { "epoch": 0.162075325757125, "grad_norm": 0.4320260369164701, "learning_rate": 9.93240634731127e-06, "loss": 0.3337, "step": 681 }, { "epoch": 0.1623133218301898, "grad_norm": 0.42202425560257006, "learning_rate": 9.93209009814521e-06, "loss": 0.3749, "step": 682 }, { "epoch": 0.16255131790325458, "grad_norm": 0.39918790424517825, "learning_rate": 9.93177311594855e-06, "loss": 0.4151, "step": 683 }, { "epoch": 0.16278931397631938, "grad_norm": 0.41085741596254716, "learning_rate": 9.931455400768396e-06, "loss": 0.3713, "step": 684 }, { "epoch": 0.16302731004938417, "grad_norm": 0.4935289189499038, "learning_rate": 9.931136952651971e-06, "loss": 0.3496, "step": 685 }, { "epoch": 0.16326530612244897, "grad_norm": 0.45454318553894524, "learning_rate": 9.930817771646606e-06, "loss": 0.4423, "step": 686 }, { "epoch": 0.16350330219551376, "grad_norm": 0.48363104373210164, "learning_rate": 9.930497857799737e-06, "loss": 0.4216, "step": 687 }, { "epoch": 0.16374129826857856, "grad_norm": 0.4024082102755574, "learning_rate": 9.93017721115891e-06, "loss": 0.3559, "step": 688 }, { "epoch": 0.16397929434164335, "grad_norm": 0.4423776261521702, "learning_rate": 9.929855831771787e-06, "loss": 0.3834, "step": 689 }, { "epoch": 0.16421729041470814, "grad_norm": 0.44126311420707715, "learning_rate": 9.929533719686123e-06, "loss": 0.4315, "step": 690 }, { "epoch": 0.16445528648777294, "grad_norm": 0.4387429322912089, "learning_rate": 9.929210874949802e-06, "loss": 0.3487, "step": 691 }, { "epoch": 0.16469328256083773, "grad_norm": 0.48730072451609946, "learning_rate": 9.928887297610801e-06, "loss": 0.3254, "step": 692 }, { "epoch": 0.16493127863390253, "grad_norm": 0.4229571083939119, "learning_rate": 9.928562987717211e-06, "loss": 0.3936, "step": 693 }, { "epoch": 0.16516927470696732, "grad_norm": 0.3987563067271361, "learning_rate": 9.928237945317237e-06, "loss": 0.4242, "step": 694 }, { "epoch": 0.16540727078003212, "grad_norm": 0.4566722968242409, "learning_rate": 9.927912170459183e-06, "loss": 0.346, "step": 695 }, { "epoch": 0.1656452668530969, "grad_norm": 0.46544117565173826, "learning_rate": 9.927585663191472e-06, "loss": 0.377, "step": 696 }, { "epoch": 0.1658832629261617, "grad_norm": 0.42065889884419416, "learning_rate": 9.927258423562628e-06, "loss": 0.4574, "step": 697 }, { "epoch": 0.1661212589992265, "grad_norm": 0.41117147375225527, "learning_rate": 9.926930451621286e-06, "loss": 0.411, "step": 698 }, { "epoch": 0.1663592550722913, "grad_norm": 0.41917410595753857, "learning_rate": 9.926601747416194e-06, "loss": 0.3413, "step": 699 }, { "epoch": 0.1665972511453561, "grad_norm": 0.513199643744508, "learning_rate": 9.926272310996205e-06, "loss": 0.4063, "step": 700 }, { "epoch": 0.16683524721842088, "grad_norm": 0.42672332270069796, "learning_rate": 9.92594214241028e-06, "loss": 0.4861, "step": 701 }, { "epoch": 0.16707324329148568, "grad_norm": 0.42431192157186515, "learning_rate": 9.925611241707491e-06, "loss": 0.3939, "step": 702 }, { "epoch": 0.16731123936455047, "grad_norm": 0.44467532229285445, "learning_rate": 9.925279608937014e-06, "loss": 0.3696, "step": 703 }, { "epoch": 0.16754923543761527, "grad_norm": 0.4515098096528452, "learning_rate": 9.924947244148147e-06, "loss": 0.4285, "step": 704 }, { "epoch": 0.16778723151068006, "grad_norm": 0.5043978783638837, "learning_rate": 9.924614147390278e-06, "loss": 0.407, "step": 705 }, { "epoch": 0.16802522758374486, "grad_norm": 0.44962332273904415, "learning_rate": 9.92428031871292e-06, "loss": 0.3473, "step": 706 }, { "epoch": 0.16826322365680965, "grad_norm": 0.4212768922385841, "learning_rate": 9.923945758165686e-06, "loss": 0.3899, "step": 707 }, { "epoch": 0.16850121972987445, "grad_norm": 0.42396104130969997, "learning_rate": 9.923610465798298e-06, "loss": 0.4148, "step": 708 }, { "epoch": 0.16873921580293924, "grad_norm": 0.43780183253810606, "learning_rate": 9.923274441660593e-06, "loss": 0.3524, "step": 709 }, { "epoch": 0.16897721187600404, "grad_norm": 0.4610507977132175, "learning_rate": 9.922937685802508e-06, "loss": 0.3738, "step": 710 }, { "epoch": 0.16921520794906883, "grad_norm": 0.41170477402274724, "learning_rate": 9.922600198274096e-06, "loss": 0.4127, "step": 711 }, { "epoch": 0.16945320402213362, "grad_norm": 0.42904482538288863, "learning_rate": 9.922261979125516e-06, "loss": 0.4375, "step": 712 }, { "epoch": 0.16969120009519842, "grad_norm": 0.4288980919174409, "learning_rate": 9.921923028407034e-06, "loss": 0.3382, "step": 713 }, { "epoch": 0.1699291961682632, "grad_norm": 0.400850498314156, "learning_rate": 9.921583346169027e-06, "loss": 0.3488, "step": 714 }, { "epoch": 0.170167192241328, "grad_norm": 0.46108960454734477, "learning_rate": 9.92124293246198e-06, "loss": 0.4419, "step": 715 }, { "epoch": 0.1704051883143928, "grad_norm": 0.41992077014825, "learning_rate": 9.92090178733649e-06, "loss": 0.4189, "step": 716 }, { "epoch": 0.1706431843874576, "grad_norm": 0.48252193952318556, "learning_rate": 9.920559910843255e-06, "loss": 0.3313, "step": 717 }, { "epoch": 0.1708811804605224, "grad_norm": 0.40035812359955575, "learning_rate": 9.920217303033091e-06, "loss": 0.4049, "step": 718 }, { "epoch": 0.1711191765335872, "grad_norm": 0.4431295892233984, "learning_rate": 9.919873963956914e-06, "loss": 0.4627, "step": 719 }, { "epoch": 0.17135717260665198, "grad_norm": 0.4546851162600903, "learning_rate": 9.919529893665753e-06, "loss": 0.3459, "step": 720 }, { "epoch": 0.17159516867971678, "grad_norm": 0.41399727681843856, "learning_rate": 9.919185092210748e-06, "loss": 0.3504, "step": 721 }, { "epoch": 0.17183316475278157, "grad_norm": 0.39447751341186205, "learning_rate": 9.918839559643143e-06, "loss": 0.4322, "step": 722 }, { "epoch": 0.17207116082584636, "grad_norm": 0.4316965191862936, "learning_rate": 9.918493296014294e-06, "loss": 0.3914, "step": 723 }, { "epoch": 0.17230915689891116, "grad_norm": 0.4190761857592741, "learning_rate": 9.918146301375663e-06, "loss": 0.3526, "step": 724 }, { "epoch": 0.17254715297197595, "grad_norm": 0.4405259759469165, "learning_rate": 9.917798575778821e-06, "loss": 0.405, "step": 725 }, { "epoch": 0.17278514904504075, "grad_norm": 0.39930877974901136, "learning_rate": 9.917450119275452e-06, "loss": 0.4576, "step": 726 }, { "epoch": 0.17302314511810554, "grad_norm": 0.4312658018371688, "learning_rate": 9.917100931917343e-06, "loss": 0.3494, "step": 727 }, { "epoch": 0.17326114119117034, "grad_norm": 0.3952105575090353, "learning_rate": 9.916751013756393e-06, "loss": 0.3349, "step": 728 }, { "epoch": 0.17349913726423513, "grad_norm": 0.4114863899271299, "learning_rate": 9.916400364844608e-06, "loss": 0.4229, "step": 729 }, { "epoch": 0.17373713333729993, "grad_norm": 0.42634304050454525, "learning_rate": 9.916048985234102e-06, "loss": 0.4014, "step": 730 }, { "epoch": 0.17397512941036472, "grad_norm": 0.46609894460053153, "learning_rate": 9.9156968749771e-06, "loss": 0.3438, "step": 731 }, { "epoch": 0.17421312548342952, "grad_norm": 0.4204390869113515, "learning_rate": 9.915344034125931e-06, "loss": 0.4, "step": 732 }, { "epoch": 0.1744511215564943, "grad_norm": 0.411331032799109, "learning_rate": 9.914990462733042e-06, "loss": 0.4429, "step": 733 }, { "epoch": 0.1746891176295591, "grad_norm": 0.4311784563828495, "learning_rate": 9.914636160850979e-06, "loss": 0.3685, "step": 734 }, { "epoch": 0.1749271137026239, "grad_norm": 0.43611688575958385, "learning_rate": 9.914281128532399e-06, "loss": 0.3606, "step": 735 }, { "epoch": 0.1751651097756887, "grad_norm": 0.4083035624121461, "learning_rate": 9.91392536583007e-06, "loss": 0.4403, "step": 736 }, { "epoch": 0.1754031058487535, "grad_norm": 0.4515206561127267, "learning_rate": 9.913568872796867e-06, "loss": 0.4095, "step": 737 }, { "epoch": 0.17564110192181828, "grad_norm": 0.39490410731530035, "learning_rate": 9.913211649485776e-06, "loss": 0.3601, "step": 738 }, { "epoch": 0.17587909799488308, "grad_norm": 0.4383233004999705, "learning_rate": 9.912853695949884e-06, "loss": 0.3656, "step": 739 }, { "epoch": 0.17611709406794787, "grad_norm": 0.4395334470031347, "learning_rate": 9.912495012242396e-06, "loss": 0.4211, "step": 740 }, { "epoch": 0.17635509014101267, "grad_norm": 0.4397061820033599, "learning_rate": 9.91213559841662e-06, "loss": 0.3788, "step": 741 }, { "epoch": 0.17659308621407746, "grad_norm": 0.4378799122528206, "learning_rate": 9.911775454525974e-06, "loss": 0.3375, "step": 742 }, { "epoch": 0.17683108228714225, "grad_norm": 0.40859435165523883, "learning_rate": 9.911414580623983e-06, "loss": 0.3983, "step": 743 }, { "epoch": 0.17706907836020705, "grad_norm": 0.4344842442523242, "learning_rate": 9.911052976764282e-06, "loss": 0.4477, "step": 744 }, { "epoch": 0.17730707443327184, "grad_norm": 0.42409343119659304, "learning_rate": 9.910690643000617e-06, "loss": 0.331, "step": 745 }, { "epoch": 0.17754507050633664, "grad_norm": 0.44353774653745326, "learning_rate": 9.910327579386836e-06, "loss": 0.3685, "step": 746 }, { "epoch": 0.17778306657940143, "grad_norm": 0.39112060910154156, "learning_rate": 9.909963785976902e-06, "loss": 0.4222, "step": 747 }, { "epoch": 0.17802106265246623, "grad_norm": 0.41719770556324215, "learning_rate": 9.909599262824882e-06, "loss": 0.4003, "step": 748 }, { "epoch": 0.17825905872553102, "grad_norm": 0.43782801010917444, "learning_rate": 9.909234009984956e-06, "loss": 0.3264, "step": 749 }, { "epoch": 0.17849705479859582, "grad_norm": 0.43932280484024017, "learning_rate": 9.908868027511407e-06, "loss": 0.3948, "step": 750 }, { "epoch": 0.1787350508716606, "grad_norm": 0.40931591350618013, "learning_rate": 9.908501315458628e-06, "loss": 0.4369, "step": 751 }, { "epoch": 0.1789730469447254, "grad_norm": 0.440394461817193, "learning_rate": 9.908133873881125e-06, "loss": 0.3308, "step": 752 }, { "epoch": 0.1792110430177902, "grad_norm": 0.45585993680493553, "learning_rate": 9.907765702833506e-06, "loss": 0.3327, "step": 753 }, { "epoch": 0.179449039090855, "grad_norm": 0.39891546180528054, "learning_rate": 9.90739680237049e-06, "loss": 0.4372, "step": 754 }, { "epoch": 0.1796870351639198, "grad_norm": 0.4449856003881457, "learning_rate": 9.907027172546907e-06, "loss": 0.3646, "step": 755 }, { "epoch": 0.17992503123698458, "grad_norm": 0.43483545394451123, "learning_rate": 9.906656813417692e-06, "loss": 0.366, "step": 756 }, { "epoch": 0.18016302731004938, "grad_norm": 0.422673166996465, "learning_rate": 9.90628572503789e-06, "loss": 0.3759, "step": 757 }, { "epoch": 0.18040102338311417, "grad_norm": 0.39567142752341666, "learning_rate": 9.905913907462655e-06, "loss": 0.4366, "step": 758 }, { "epoch": 0.18063901945617897, "grad_norm": 0.4683314806260047, "learning_rate": 9.905541360747244e-06, "loss": 0.362, "step": 759 }, { "epoch": 0.18087701552924376, "grad_norm": 0.41522141951305347, "learning_rate": 9.90516808494703e-06, "loss": 0.3395, "step": 760 }, { "epoch": 0.18111501160230856, "grad_norm": 0.4303510055280143, "learning_rate": 9.904794080117493e-06, "loss": 0.4321, "step": 761 }, { "epoch": 0.18135300767537335, "grad_norm": 0.4228437966345767, "learning_rate": 9.904419346314216e-06, "loss": 0.441, "step": 762 }, { "epoch": 0.18159100374843815, "grad_norm": 0.4453386428802394, "learning_rate": 9.904043883592893e-06, "loss": 0.3773, "step": 763 }, { "epoch": 0.18182899982150294, "grad_norm": 1.180486578415128, "learning_rate": 9.90366769200933e-06, "loss": 0.3609, "step": 764 }, { "epoch": 0.18206699589456773, "grad_norm": 0.42781342072889006, "learning_rate": 9.903290771619437e-06, "loss": 0.4432, "step": 765 }, { "epoch": 0.18230499196763253, "grad_norm": 0.38960119770687374, "learning_rate": 9.902913122479235e-06, "loss": 0.3728, "step": 766 }, { "epoch": 0.18254298804069732, "grad_norm": 0.4321322538249792, "learning_rate": 9.902534744644848e-06, "loss": 0.3431, "step": 767 }, { "epoch": 0.18278098411376212, "grad_norm": 0.41307757787063704, "learning_rate": 9.902155638172517e-06, "loss": 0.395, "step": 768 }, { "epoch": 0.1830189801868269, "grad_norm": 0.43556469923967867, "learning_rate": 9.901775803118584e-06, "loss": 0.4441, "step": 769 }, { "epoch": 0.1832569762598917, "grad_norm": 0.43167762486626765, "learning_rate": 9.901395239539502e-06, "loss": 0.3372, "step": 770 }, { "epoch": 0.1834949723329565, "grad_norm": 0.416467157274216, "learning_rate": 9.901013947491834e-06, "loss": 0.355, "step": 771 }, { "epoch": 0.1837329684060213, "grad_norm": 0.4141392350996442, "learning_rate": 9.900631927032247e-06, "loss": 0.4116, "step": 772 }, { "epoch": 0.1839709644790861, "grad_norm": 0.42131938024420973, "learning_rate": 9.90024917821752e-06, "loss": 0.3775, "step": 773 }, { "epoch": 0.18420896055215089, "grad_norm": 0.38656999194996833, "learning_rate": 9.899865701104542e-06, "loss": 0.3404, "step": 774 }, { "epoch": 0.18444695662521568, "grad_norm": 0.4002326366629186, "learning_rate": 9.8994814957503e-06, "loss": 0.4006, "step": 775 }, { "epoch": 0.18468495269828047, "grad_norm": 0.389329793747449, "learning_rate": 9.899096562211902e-06, "loss": 0.4635, "step": 776 }, { "epoch": 0.18492294877134527, "grad_norm": 0.412679750416995, "learning_rate": 9.898710900546557e-06, "loss": 0.3451, "step": 777 }, { "epoch": 0.18516094484441006, "grad_norm": 0.42487703444957303, "learning_rate": 9.898324510811583e-06, "loss": 0.3428, "step": 778 }, { "epoch": 0.18539894091747486, "grad_norm": 0.3907619553000587, "learning_rate": 9.89793739306441e-06, "loss": 0.4392, "step": 779 }, { "epoch": 0.18563693699053965, "grad_norm": 0.4193572680245468, "learning_rate": 9.897549547362569e-06, "loss": 0.4373, "step": 780 }, { "epoch": 0.18587493306360445, "grad_norm": 0.39493589356753456, "learning_rate": 9.897160973763706e-06, "loss": 0.3539, "step": 781 }, { "epoch": 0.18611292913666924, "grad_norm": 0.4440516227501279, "learning_rate": 9.896771672325574e-06, "loss": 0.3769, "step": 782 }, { "epoch": 0.18635092520973404, "grad_norm": 0.44352023507130256, "learning_rate": 9.89638164310603e-06, "loss": 0.4338, "step": 783 }, { "epoch": 0.18658892128279883, "grad_norm": 0.7671181173054992, "learning_rate": 9.895990886163043e-06, "loss": 0.3319, "step": 784 }, { "epoch": 0.18682691735586363, "grad_norm": 0.4685322094472268, "learning_rate": 9.89559940155469e-06, "loss": 0.3454, "step": 785 }, { "epoch": 0.18706491342892842, "grad_norm": 0.3828784304357339, "learning_rate": 9.895207189339154e-06, "loss": 0.4445, "step": 786 }, { "epoch": 0.18730290950199321, "grad_norm": 0.42324641037940064, "learning_rate": 9.89481424957473e-06, "loss": 0.451, "step": 787 }, { "epoch": 0.187540905575058, "grad_norm": 0.4365750038109837, "learning_rate": 9.894420582319814e-06, "loss": 0.3191, "step": 788 }, { "epoch": 0.1877789016481228, "grad_norm": 0.40443174222338457, "learning_rate": 9.894026187632917e-06, "loss": 0.3748, "step": 789 }, { "epoch": 0.1880168977211876, "grad_norm": 0.43152637845700487, "learning_rate": 9.893631065572659e-06, "loss": 0.4441, "step": 790 }, { "epoch": 0.1882548937942524, "grad_norm": 0.43199571990589086, "learning_rate": 9.893235216197761e-06, "loss": 0.3711, "step": 791 }, { "epoch": 0.1884928898673172, "grad_norm": 0.46404959339010077, "learning_rate": 9.892838639567057e-06, "loss": 0.359, "step": 792 }, { "epoch": 0.18873088594038198, "grad_norm": 0.4441492172576122, "learning_rate": 9.892441335739487e-06, "loss": 0.4042, "step": 793 }, { "epoch": 0.18896888201344678, "grad_norm": 0.42927081504797776, "learning_rate": 9.892043304774102e-06, "loss": 0.4278, "step": 794 }, { "epoch": 0.18920687808651157, "grad_norm": 0.402576668783229, "learning_rate": 9.89164454673006e-06, "loss": 0.38, "step": 795 }, { "epoch": 0.18944487415957637, "grad_norm": 0.37582462659523325, "learning_rate": 9.891245061666622e-06, "loss": 0.3614, "step": 796 }, { "epoch": 0.18968287023264116, "grad_norm": 0.3988214342258959, "learning_rate": 9.890844849643166e-06, "loss": 0.4387, "step": 797 }, { "epoch": 0.18992086630570595, "grad_norm": 0.4910334602397654, "learning_rate": 9.890443910719171e-06, "loss": 0.3962, "step": 798 }, { "epoch": 0.19015886237877075, "grad_norm": 0.44461874782652483, "learning_rate": 9.890042244954229e-06, "loss": 0.3385, "step": 799 }, { "epoch": 0.19039685845183554, "grad_norm": 0.4129631941986688, "learning_rate": 9.889639852408035e-06, "loss": 0.3928, "step": 800 }, { "epoch": 0.19063485452490034, "grad_norm": 0.41914251475794334, "learning_rate": 9.889236733140393e-06, "loss": 0.4463, "step": 801 }, { "epoch": 0.19087285059796513, "grad_norm": 0.41664422509463567, "learning_rate": 9.88883288721122e-06, "loss": 0.3765, "step": 802 }, { "epoch": 0.19111084667102993, "grad_norm": 0.4045420912022183, "learning_rate": 9.888428314680536e-06, "loss": 0.3567, "step": 803 }, { "epoch": 0.19134884274409472, "grad_norm": 0.4044005074831901, "learning_rate": 9.888023015608471e-06, "loss": 0.3931, "step": 804 }, { "epoch": 0.19158683881715952, "grad_norm": 0.4368135710203652, "learning_rate": 9.887616990055262e-06, "loss": 0.4112, "step": 805 }, { "epoch": 0.1918248348902243, "grad_norm": 0.4882695812113364, "learning_rate": 9.887210238081253e-06, "loss": 0.3695, "step": 806 }, { "epoch": 0.1920628309632891, "grad_norm": 0.4246207428107935, "learning_rate": 9.8868027597469e-06, "loss": 0.3797, "step": 807 }, { "epoch": 0.1923008270363539, "grad_norm": 0.46283957801455083, "learning_rate": 9.886394555112764e-06, "loss": 0.4513, "step": 808 }, { "epoch": 0.1925388231094187, "grad_norm": 0.4047912499967522, "learning_rate": 9.885985624239513e-06, "loss": 0.3578, "step": 809 }, { "epoch": 0.1927768191824835, "grad_norm": 0.4174808369463013, "learning_rate": 9.885575967187924e-06, "loss": 0.3537, "step": 810 }, { "epoch": 0.19301481525554828, "grad_norm": 0.4347320503909233, "learning_rate": 9.885165584018882e-06, "loss": 0.3847, "step": 811 }, { "epoch": 0.19325281132861308, "grad_norm": 0.44226534184677574, "learning_rate": 9.884754474793383e-06, "loss": 0.4511, "step": 812 }, { "epoch": 0.19349080740167787, "grad_norm": 0.4019420309353, "learning_rate": 9.884342639572526e-06, "loss": 0.3352, "step": 813 }, { "epoch": 0.19372880347474267, "grad_norm": 0.42514275612922164, "learning_rate": 9.88393007841752e-06, "loss": 0.3673, "step": 814 }, { "epoch": 0.19396679954780746, "grad_norm": 0.4524089013354289, "learning_rate": 9.88351679138968e-06, "loss": 0.4277, "step": 815 }, { "epoch": 0.19420479562087226, "grad_norm": 0.4380590877999043, "learning_rate": 9.883102778550434e-06, "loss": 0.3627, "step": 816 }, { "epoch": 0.19444279169393705, "grad_norm": 0.4165421579055438, "learning_rate": 9.882688039961312e-06, "loss": 0.345, "step": 817 }, { "epoch": 0.19468078776700184, "grad_norm": 0.43180936146351057, "learning_rate": 9.882272575683956e-06, "loss": 0.4111, "step": 818 }, { "epoch": 0.19491878384006664, "grad_norm": 0.41648821424267424, "learning_rate": 9.881856385780115e-06, "loss": 0.4578, "step": 819 }, { "epoch": 0.19515677991313143, "grad_norm": 0.4246404967531619, "learning_rate": 9.881439470311642e-06, "loss": 0.3842, "step": 820 }, { "epoch": 0.19539477598619623, "grad_norm": 0.4158895838872154, "learning_rate": 9.881021829340502e-06, "loss": 0.41, "step": 821 }, { "epoch": 0.19563277205926102, "grad_norm": 0.4149994996559407, "learning_rate": 9.880603462928769e-06, "loss": 0.4231, "step": 822 }, { "epoch": 0.19587076813232582, "grad_norm": 0.4089944684040092, "learning_rate": 9.880184371138621e-06, "loss": 0.3609, "step": 823 }, { "epoch": 0.1961087642053906, "grad_norm": 0.44441855579372713, "learning_rate": 9.879764554032345e-06, "loss": 0.3333, "step": 824 }, { "epoch": 0.1963467602784554, "grad_norm": 0.4230323557425423, "learning_rate": 9.879344011672337e-06, "loss": 0.4026, "step": 825 }, { "epoch": 0.1965847563515202, "grad_norm": 0.42605983334806596, "learning_rate": 9.8789227441211e-06, "loss": 0.4439, "step": 826 }, { "epoch": 0.196822752424585, "grad_norm": 0.44788672346466435, "learning_rate": 9.878500751441244e-06, "loss": 0.3576, "step": 827 }, { "epoch": 0.1970607484976498, "grad_norm": 0.42604961830620175, "learning_rate": 9.878078033695488e-06, "loss": 0.3765, "step": 828 }, { "epoch": 0.19729874457071458, "grad_norm": 0.4348482551029076, "learning_rate": 9.877654590946659e-06, "loss": 0.4327, "step": 829 }, { "epoch": 0.19753674064377938, "grad_norm": 0.41378312933719785, "learning_rate": 9.87723042325769e-06, "loss": 0.4134, "step": 830 }, { "epoch": 0.19777473671684417, "grad_norm": 0.473934565450725, "learning_rate": 9.876805530691622e-06, "loss": 0.3382, "step": 831 }, { "epoch": 0.19801273278990897, "grad_norm": 0.45241327431142664, "learning_rate": 9.876379913311607e-06, "loss": 0.3837, "step": 832 }, { "epoch": 0.19825072886297376, "grad_norm": 0.4046931569037168, "learning_rate": 9.875953571180901e-06, "loss": 0.4148, "step": 833 }, { "epoch": 0.19848872493603856, "grad_norm": 0.4515187001854004, "learning_rate": 9.875526504362868e-06, "loss": 0.3804, "step": 834 }, { "epoch": 0.19872672100910335, "grad_norm": 0.4533213069296073, "learning_rate": 9.875098712920983e-06, "loss": 0.3442, "step": 835 }, { "epoch": 0.19896471708216815, "grad_norm": 0.4523183437793231, "learning_rate": 9.874670196918824e-06, "loss": 0.4091, "step": 836 }, { "epoch": 0.19920271315523294, "grad_norm": 0.4119738644745412, "learning_rate": 9.874240956420082e-06, "loss": 0.4687, "step": 837 }, { "epoch": 0.19944070922829774, "grad_norm": 0.4699475156689954, "learning_rate": 9.87381099148855e-06, "loss": 0.3553, "step": 838 }, { "epoch": 0.19967870530136253, "grad_norm": 0.41221446850629756, "learning_rate": 9.873380302188133e-06, "loss": 0.3856, "step": 839 }, { "epoch": 0.19991670137442732, "grad_norm": 0.3967941653688478, "learning_rate": 9.87294888858284e-06, "loss": 0.4608, "step": 840 }, { "epoch": 0.20015469744749212, "grad_norm": 0.4446657496487925, "learning_rate": 9.872516750736793e-06, "loss": 0.4013, "step": 841 }, { "epoch": 0.2003926935205569, "grad_norm": 0.4896648889382981, "learning_rate": 9.872083888714217e-06, "loss": 0.356, "step": 842 }, { "epoch": 0.2006306895936217, "grad_norm": 0.4087173120218746, "learning_rate": 9.871650302579443e-06, "loss": 0.3968, "step": 843 }, { "epoch": 0.2008686856666865, "grad_norm": 0.4824564537672377, "learning_rate": 9.871215992396917e-06, "loss": 0.4077, "step": 844 }, { "epoch": 0.2011066817397513, "grad_norm": 0.44137153458158945, "learning_rate": 9.870780958231186e-06, "loss": 0.3466, "step": 845 }, { "epoch": 0.2013446778128161, "grad_norm": 0.5156882053665172, "learning_rate": 9.870345200146907e-06, "loss": 0.375, "step": 846 }, { "epoch": 0.2015826738858809, "grad_norm": 0.3651462313643267, "learning_rate": 9.869908718208845e-06, "loss": 0.4293, "step": 847 }, { "epoch": 0.20182066995894568, "grad_norm": 0.4248226402638282, "learning_rate": 9.869471512481872e-06, "loss": 0.4093, "step": 848 }, { "epoch": 0.20205866603201048, "grad_norm": 0.4715061071976561, "learning_rate": 9.869033583030967e-06, "loss": 0.3187, "step": 849 }, { "epoch": 0.20229666210507527, "grad_norm": 0.4360335200519038, "learning_rate": 9.868594929921217e-06, "loss": 0.3947, "step": 850 }, { "epoch": 0.20253465817814006, "grad_norm": 0.4128931217238059, "learning_rate": 9.86815555321782e-06, "loss": 0.4361, "step": 851 }, { "epoch": 0.20277265425120486, "grad_norm": 0.46321878255592136, "learning_rate": 9.867715452986073e-06, "loss": 0.3231, "step": 852 }, { "epoch": 0.20301065032426965, "grad_norm": 0.4537539113370709, "learning_rate": 9.867274629291387e-06, "loss": 0.3619, "step": 853 }, { "epoch": 0.20324864639733445, "grad_norm": 0.38249330366239725, "learning_rate": 9.866833082199283e-06, "loss": 0.3993, "step": 854 }, { "epoch": 0.20348664247039924, "grad_norm": 0.43771271278034846, "learning_rate": 9.866390811775382e-06, "loss": 0.3913, "step": 855 }, { "epoch": 0.20372463854346404, "grad_norm": 0.46356127634456645, "learning_rate": 9.865947818085417e-06, "loss": 0.3215, "step": 856 }, { "epoch": 0.20396263461652883, "grad_norm": 0.495299460550108, "learning_rate": 9.86550410119523e-06, "loss": 0.3485, "step": 857 }, { "epoch": 0.20420063068959363, "grad_norm": 0.4143122108558567, "learning_rate": 9.865059661170767e-06, "loss": 0.4334, "step": 858 }, { "epoch": 0.20443862676265842, "grad_norm": 0.4325780620093191, "learning_rate": 9.86461449807808e-06, "loss": 0.379, "step": 859 }, { "epoch": 0.20467662283572322, "grad_norm": 0.5084472841414205, "learning_rate": 9.864168611983336e-06, "loss": 0.3371, "step": 860 }, { "epoch": 0.204914618908788, "grad_norm": 0.4451974603394198, "learning_rate": 9.863722002952803e-06, "loss": 0.3918, "step": 861 }, { "epoch": 0.2051526149818528, "grad_norm": 0.4293240665529044, "learning_rate": 9.863274671052857e-06, "loss": 0.4285, "step": 862 }, { "epoch": 0.2053906110549176, "grad_norm": 0.42020107235283133, "learning_rate": 9.862826616349981e-06, "loss": 0.3356, "step": 863 }, { "epoch": 0.2056286071279824, "grad_norm": 0.45803176046948774, "learning_rate": 9.862377838910771e-06, "loss": 0.4254, "step": 864 }, { "epoch": 0.2058666032010472, "grad_norm": 0.3886605229465983, "learning_rate": 9.861928338801926e-06, "loss": 0.4262, "step": 865 }, { "epoch": 0.20610459927411198, "grad_norm": 0.43472442169007486, "learning_rate": 9.86147811609025e-06, "loss": 0.3713, "step": 866 }, { "epoch": 0.20634259534717678, "grad_norm": 0.4414211701916826, "learning_rate": 9.861027170842659e-06, "loss": 0.3361, "step": 867 }, { "epoch": 0.20658059142024157, "grad_norm": 0.408979611759581, "learning_rate": 9.860575503126175e-06, "loss": 0.4036, "step": 868 }, { "epoch": 0.20681858749330637, "grad_norm": 0.446536065672617, "learning_rate": 9.860123113007928e-06, "loss": 0.4488, "step": 869 }, { "epoch": 0.20705658356637116, "grad_norm": 0.436694557262441, "learning_rate": 9.85967000055515e-06, "loss": 0.3634, "step": 870 }, { "epoch": 0.20729457963943596, "grad_norm": 0.4541956461930287, "learning_rate": 9.859216165835188e-06, "loss": 0.3835, "step": 871 }, { "epoch": 0.20753257571250075, "grad_norm": 0.4213848003370073, "learning_rate": 9.858761608915492e-06, "loss": 0.4334, "step": 872 }, { "epoch": 0.20777057178556554, "grad_norm": 0.3930202275134771, "learning_rate": 9.858306329863623e-06, "loss": 0.3955, "step": 873 }, { "epoch": 0.20800856785863034, "grad_norm": 0.427845820752473, "learning_rate": 9.857850328747243e-06, "loss": 0.3356, "step": 874 }, { "epoch": 0.20824656393169513, "grad_norm": 0.4116624892541286, "learning_rate": 9.857393605634126e-06, "loss": 0.3699, "step": 875 }, { "epoch": 0.20848456000475993, "grad_norm": 0.3935251767233762, "learning_rate": 9.856936160592155e-06, "loss": 0.4307, "step": 876 }, { "epoch": 0.20872255607782472, "grad_norm": 0.42170977712231184, "learning_rate": 9.856477993689316e-06, "loss": 0.3545, "step": 877 }, { "epoch": 0.20896055215088952, "grad_norm": 0.41048028309068124, "learning_rate": 9.856019104993702e-06, "loss": 0.3381, "step": 878 }, { "epoch": 0.2091985482239543, "grad_norm": 0.40569183356117694, "learning_rate": 9.855559494573517e-06, "loss": 0.4362, "step": 879 }, { "epoch": 0.2094365442970191, "grad_norm": 0.42666793677125886, "learning_rate": 9.855099162497071e-06, "loss": 0.3783, "step": 880 }, { "epoch": 0.2096745403700839, "grad_norm": 0.43438959276226846, "learning_rate": 9.854638108832781e-06, "loss": 0.3769, "step": 881 }, { "epoch": 0.2099125364431487, "grad_norm": 0.44294773883559535, "learning_rate": 9.854176333649169e-06, "loss": 0.401, "step": 882 }, { "epoch": 0.2101505325162135, "grad_norm": 0.40033962525880623, "learning_rate": 9.853713837014867e-06, "loss": 0.4307, "step": 883 }, { "epoch": 0.21038852858927828, "grad_norm": 0.42544362225337135, "learning_rate": 9.853250618998612e-06, "loss": 0.3605, "step": 884 }, { "epoch": 0.21062652466234308, "grad_norm": 0.43069551338393347, "learning_rate": 9.852786679669256e-06, "loss": 0.3538, "step": 885 }, { "epoch": 0.21086452073540787, "grad_norm": 0.4061340048491632, "learning_rate": 9.852322019095744e-06, "loss": 0.3855, "step": 886 }, { "epoch": 0.21110251680847267, "grad_norm": 0.4200438920047967, "learning_rate": 9.85185663734714e-06, "loss": 0.4657, "step": 887 }, { "epoch": 0.21134051288153746, "grad_norm": 0.40315771492500546, "learning_rate": 9.85139053449261e-06, "loss": 0.323, "step": 888 }, { "epoch": 0.21157850895460226, "grad_norm": 0.451869373976093, "learning_rate": 9.85092371060143e-06, "loss": 0.4123, "step": 889 }, { "epoch": 0.21181650502766705, "grad_norm": 0.41009915304865363, "learning_rate": 9.85045616574298e-06, "loss": 0.4342, "step": 890 }, { "epoch": 0.21205450110073185, "grad_norm": 0.41018214294921573, "learning_rate": 9.84998789998675e-06, "loss": 0.4131, "step": 891 }, { "epoch": 0.21229249717379664, "grad_norm": 0.41395012445520085, "learning_rate": 9.849518913402334e-06, "loss": 0.3472, "step": 892 }, { "epoch": 0.21253049324686143, "grad_norm": 0.41478482807464195, "learning_rate": 9.849049206059435e-06, "loss": 0.4359, "step": 893 }, { "epoch": 0.21276848931992623, "grad_norm": 0.3969815086554544, "learning_rate": 9.848578778027867e-06, "loss": 0.4184, "step": 894 }, { "epoch": 0.21300648539299102, "grad_norm": 0.4297335515729988, "learning_rate": 9.848107629377544e-06, "loss": 0.3719, "step": 895 }, { "epoch": 0.21324448146605582, "grad_norm": 0.3855698648491326, "learning_rate": 9.84763576017849e-06, "loss": 0.3658, "step": 896 }, { "epoch": 0.2134824775391206, "grad_norm": 0.40093853952594977, "learning_rate": 9.847163170500837e-06, "loss": 0.4394, "step": 897 }, { "epoch": 0.2137204736121854, "grad_norm": 0.4485624156969946, "learning_rate": 9.846689860414824e-06, "loss": 0.3776, "step": 898 }, { "epoch": 0.2139584696852502, "grad_norm": 0.3794469849541886, "learning_rate": 9.846215829990797e-06, "loss": 0.3115, "step": 899 }, { "epoch": 0.214196465758315, "grad_norm": 0.3972841317444796, "learning_rate": 9.84574107929921e-06, "loss": 0.3894, "step": 900 }, { "epoch": 0.2144344618313798, "grad_norm": 0.38348004951058207, "learning_rate": 9.845265608410616e-06, "loss": 0.4287, "step": 901 }, { "epoch": 0.21467245790444459, "grad_norm": 0.41303667553057977, "learning_rate": 9.84478941739569e-06, "loss": 0.3239, "step": 902 }, { "epoch": 0.21491045397750938, "grad_norm": 0.40879152143826697, "learning_rate": 9.844312506325202e-06, "loss": 0.3797, "step": 903 }, { "epoch": 0.21514845005057417, "grad_norm": 0.4604571778006916, "learning_rate": 9.843834875270032e-06, "loss": 0.4199, "step": 904 }, { "epoch": 0.21538644612363897, "grad_norm": 0.4152085389985221, "learning_rate": 9.84335652430117e-06, "loss": 0.3847, "step": 905 }, { "epoch": 0.21562444219670376, "grad_norm": 0.4149816757480418, "learning_rate": 9.842877453489708e-06, "loss": 0.3592, "step": 906 }, { "epoch": 0.21586243826976856, "grad_norm": 0.4299620220648408, "learning_rate": 9.84239766290685e-06, "loss": 0.4043, "step": 907 }, { "epoch": 0.21610043434283335, "grad_norm": 0.41035423107133623, "learning_rate": 9.841917152623905e-06, "loss": 0.4585, "step": 908 }, { "epoch": 0.21633843041589815, "grad_norm": 0.44913348339639636, "learning_rate": 9.841435922712288e-06, "loss": 0.3911, "step": 909 }, { "epoch": 0.21657642648896294, "grad_norm": 0.405292221298209, "learning_rate": 9.84095397324352e-06, "loss": 0.3582, "step": 910 }, { "epoch": 0.21681442256202774, "grad_norm": 0.38536854563574374, "learning_rate": 9.840471304289233e-06, "loss": 0.4007, "step": 911 }, { "epoch": 0.21705241863509253, "grad_norm": 0.47808379504573534, "learning_rate": 9.839987915921163e-06, "loss": 0.4611, "step": 912 }, { "epoch": 0.21729041470815733, "grad_norm": 0.3934343577154057, "learning_rate": 9.839503808211153e-06, "loss": 0.3176, "step": 913 }, { "epoch": 0.21752841078122212, "grad_norm": 0.4253557198903114, "learning_rate": 9.839018981231151e-06, "loss": 0.3818, "step": 914 }, { "epoch": 0.21776640685428691, "grad_norm": 0.4060612395908892, "learning_rate": 9.838533435053221e-06, "loss": 0.4327, "step": 915 }, { "epoch": 0.2180044029273517, "grad_norm": 0.4044627954830839, "learning_rate": 9.83804716974952e-06, "loss": 0.3697, "step": 916 }, { "epoch": 0.2182423990004165, "grad_norm": 0.3898450504134681, "learning_rate": 9.837560185392325e-06, "loss": 0.3444, "step": 917 }, { "epoch": 0.2184803950734813, "grad_norm": 0.41317608963767616, "learning_rate": 9.837072482054009e-06, "loss": 0.4076, "step": 918 }, { "epoch": 0.2187183911465461, "grad_norm": 0.4138083448258549, "learning_rate": 9.83658405980706e-06, "loss": 0.4545, "step": 919 }, { "epoch": 0.2189563872196109, "grad_norm": 0.41247702913642026, "learning_rate": 9.836094918724067e-06, "loss": 0.3613, "step": 920 }, { "epoch": 0.21919438329267568, "grad_norm": 0.4121156299354065, "learning_rate": 9.83560505887773e-06, "loss": 0.3765, "step": 921 }, { "epoch": 0.21943237936574048, "grad_norm": 0.43268431314703193, "learning_rate": 9.835114480340855e-06, "loss": 0.4259, "step": 922 }, { "epoch": 0.21967037543880527, "grad_norm": 0.38798899747199356, "learning_rate": 9.834623183186352e-06, "loss": 0.3945, "step": 923 }, { "epoch": 0.21990837151187007, "grad_norm": 0.40463819440452653, "learning_rate": 9.834131167487241e-06, "loss": 0.3419, "step": 924 }, { "epoch": 0.22014636758493486, "grad_norm": 0.4158947107356108, "learning_rate": 9.833638433316647e-06, "loss": 0.3918, "step": 925 }, { "epoch": 0.22038436365799965, "grad_norm": 0.3951265208744111, "learning_rate": 9.833144980747806e-06, "loss": 0.4467, "step": 926 }, { "epoch": 0.22062235973106445, "grad_norm": 0.3866457296285503, "learning_rate": 9.832650809854054e-06, "loss": 0.3626, "step": 927 }, { "epoch": 0.22086035580412924, "grad_norm": 0.4148351326369386, "learning_rate": 9.832155920708838e-06, "loss": 0.3362, "step": 928 }, { "epoch": 0.22109835187719404, "grad_norm": 0.3976030627374797, "learning_rate": 9.831660313385709e-06, "loss": 0.4314, "step": 929 }, { "epoch": 0.22133634795025883, "grad_norm": 0.4157333689112259, "learning_rate": 9.831163987958329e-06, "loss": 0.4044, "step": 930 }, { "epoch": 0.22157434402332363, "grad_norm": 0.4408772739009037, "learning_rate": 9.830666944500462e-06, "loss": 0.3282, "step": 931 }, { "epoch": 0.22181234009638842, "grad_norm": 0.41164918567935477, "learning_rate": 9.830169183085983e-06, "loss": 0.381, "step": 932 }, { "epoch": 0.22205033616945322, "grad_norm": 0.3644162693747717, "learning_rate": 9.829670703788873e-06, "loss": 0.4151, "step": 933 }, { "epoch": 0.222288332242518, "grad_norm": 0.4207226846601998, "learning_rate": 9.829171506683211e-06, "loss": 0.3884, "step": 934 }, { "epoch": 0.2225263283155828, "grad_norm": 0.45066402524896876, "learning_rate": 9.828671591843198e-06, "loss": 0.3369, "step": 935 }, { "epoch": 0.2227643243886476, "grad_norm": 0.4168689897275966, "learning_rate": 9.828170959343131e-06, "loss": 0.3938, "step": 936 }, { "epoch": 0.2230023204617124, "grad_norm": 0.44074947992378183, "learning_rate": 9.827669609257417e-06, "loss": 0.433, "step": 937 }, { "epoch": 0.2232403165347772, "grad_norm": 0.47103576178023904, "learning_rate": 9.827167541660568e-06, "loss": 0.3565, "step": 938 }, { "epoch": 0.22347831260784198, "grad_norm": 0.4532327477806086, "learning_rate": 9.826664756627202e-06, "loss": 0.3961, "step": 939 }, { "epoch": 0.22371630868090678, "grad_norm": 0.39014864862033083, "learning_rate": 9.826161254232048e-06, "loss": 0.4257, "step": 940 }, { "epoch": 0.22395430475397157, "grad_norm": 0.40042087107333996, "learning_rate": 9.825657034549939e-06, "loss": 0.3866, "step": 941 }, { "epoch": 0.22419230082703637, "grad_norm": 0.44006439544231385, "learning_rate": 9.825152097655813e-06, "loss": 0.3589, "step": 942 }, { "epoch": 0.22443029690010116, "grad_norm": 0.3999068012035896, "learning_rate": 9.824646443624717e-06, "loss": 0.3891, "step": 943 }, { "epoch": 0.22466829297316596, "grad_norm": 0.4539949228586678, "learning_rate": 9.824140072531805e-06, "loss": 0.4532, "step": 944 }, { "epoch": 0.22490628904623075, "grad_norm": 0.3981879315816523, "learning_rate": 9.823632984452331e-06, "loss": 0.3465, "step": 945 }, { "epoch": 0.22514428511929555, "grad_norm": 0.6439992315492964, "learning_rate": 9.823125179461668e-06, "loss": 0.376, "step": 946 }, { "epoch": 0.22538228119236034, "grad_norm": 0.404887917419206, "learning_rate": 9.822616657635284e-06, "loss": 0.4472, "step": 947 }, { "epoch": 0.22562027726542513, "grad_norm": 0.37708651982188657, "learning_rate": 9.822107419048759e-06, "loss": 0.4146, "step": 948 }, { "epoch": 0.2258582733384899, "grad_norm": 0.44302674607158987, "learning_rate": 9.821597463777779e-06, "loss": 0.321, "step": 949 }, { "epoch": 0.2260962694115547, "grad_norm": 0.412240348327169, "learning_rate": 9.821086791898133e-06, "loss": 0.3731, "step": 950 }, { "epoch": 0.2263342654846195, "grad_norm": 0.40579745891438523, "learning_rate": 9.820575403485724e-06, "loss": 0.4699, "step": 951 }, { "epoch": 0.22657226155768428, "grad_norm": 0.415668474457593, "learning_rate": 9.820063298616553e-06, "loss": 0.3718, "step": 952 }, { "epoch": 0.22681025763074908, "grad_norm": 0.416764959736151, "learning_rate": 9.819550477366735e-06, "loss": 0.3393, "step": 953 }, { "epoch": 0.22704825370381387, "grad_norm": 0.44453596559445757, "learning_rate": 9.819036939812485e-06, "loss": 0.4483, "step": 954 }, { "epoch": 0.22728624977687867, "grad_norm": 0.3709861408947193, "learning_rate": 9.818522686030127e-06, "loss": 0.4047, "step": 955 }, { "epoch": 0.22752424584994346, "grad_norm": 0.40210692784551644, "learning_rate": 9.818007716096096e-06, "loss": 0.3185, "step": 956 }, { "epoch": 0.22776224192300826, "grad_norm": 0.3862126896605156, "learning_rate": 9.817492030086926e-06, "loss": 0.3905, "step": 957 }, { "epoch": 0.22800023799607305, "grad_norm": 0.43152498874354184, "learning_rate": 9.816975628079261e-06, "loss": 0.4296, "step": 958 }, { "epoch": 0.22823823406913785, "grad_norm": 0.38525294582007225, "learning_rate": 9.816458510149852e-06, "loss": 0.3463, "step": 959 }, { "epoch": 0.22847623014220264, "grad_norm": 0.47012279193131934, "learning_rate": 9.815940676375554e-06, "loss": 0.3733, "step": 960 }, { "epoch": 0.22871422621526744, "grad_norm": 0.4002528394439282, "learning_rate": 9.815422126833332e-06, "loss": 0.4243, "step": 961 }, { "epoch": 0.22895222228833223, "grad_norm": 0.4248826037103133, "learning_rate": 9.814902861600252e-06, "loss": 0.4592, "step": 962 }, { "epoch": 0.22919021836139702, "grad_norm": 0.4022803320480314, "learning_rate": 9.814382880753493e-06, "loss": 0.361, "step": 963 }, { "epoch": 0.22942821443446182, "grad_norm": 0.424538658023901, "learning_rate": 9.813862184370338e-06, "loss": 0.3863, "step": 964 }, { "epoch": 0.2296662105075266, "grad_norm": 0.44976984034315987, "learning_rate": 9.81334077252817e-06, "loss": 0.4503, "step": 965 }, { "epoch": 0.2299042065805914, "grad_norm": 0.406304483755596, "learning_rate": 9.812818645304488e-06, "loss": 0.3875, "step": 966 }, { "epoch": 0.2301422026536562, "grad_norm": 0.48831090846163266, "learning_rate": 9.812295802776893e-06, "loss": 0.3351, "step": 967 }, { "epoch": 0.230380198726721, "grad_norm": 0.4010981600344825, "learning_rate": 9.81177224502309e-06, "loss": 0.4074, "step": 968 }, { "epoch": 0.2306181947997858, "grad_norm": 0.39821279677246363, "learning_rate": 9.811247972120895e-06, "loss": 0.4441, "step": 969 }, { "epoch": 0.2308561908728506, "grad_norm": 0.40402056505396167, "learning_rate": 9.810722984148224e-06, "loss": 0.3292, "step": 970 }, { "epoch": 0.23109418694591538, "grad_norm": 0.4031749039115374, "learning_rate": 9.810197281183109e-06, "loss": 0.3967, "step": 971 }, { "epoch": 0.23133218301898018, "grad_norm": 0.4254043618777442, "learning_rate": 9.809670863303678e-06, "loss": 0.4345, "step": 972 }, { "epoch": 0.23157017909204497, "grad_norm": 0.41821250677571714, "learning_rate": 9.809143730588172e-06, "loss": 0.3698, "step": 973 }, { "epoch": 0.23180817516510976, "grad_norm": 0.4186518496968692, "learning_rate": 9.808615883114935e-06, "loss": 0.3435, "step": 974 }, { "epoch": 0.23204617123817456, "grad_norm": 0.447054433602328, "learning_rate": 9.808087320962418e-06, "loss": 0.3868, "step": 975 }, { "epoch": 0.23228416731123935, "grad_norm": 0.40528303959506257, "learning_rate": 9.807558044209178e-06, "loss": 0.4353, "step": 976 }, { "epoch": 0.23252216338430415, "grad_norm": 0.4201076442750277, "learning_rate": 9.80702805293388e-06, "loss": 0.3498, "step": 977 }, { "epoch": 0.23276015945736894, "grad_norm": 0.43371214248349416, "learning_rate": 9.806497347215294e-06, "loss": 0.3148, "step": 978 }, { "epoch": 0.23299815553043374, "grad_norm": 0.41147178302674675, "learning_rate": 9.805965927132294e-06, "loss": 0.4017, "step": 979 }, { "epoch": 0.23323615160349853, "grad_norm": 0.4447084847180371, "learning_rate": 9.805433792763866e-06, "loss": 0.4182, "step": 980 }, { "epoch": 0.23347414767656333, "grad_norm": 0.41126728345921754, "learning_rate": 9.804900944189093e-06, "loss": 0.3648, "step": 981 }, { "epoch": 0.23371214374962812, "grad_norm": 0.41273761768511535, "learning_rate": 9.804367381487172e-06, "loss": 0.385, "step": 982 }, { "epoch": 0.23395013982269292, "grad_norm": 0.39008444226884603, "learning_rate": 9.803833104737406e-06, "loss": 0.4549, "step": 983 }, { "epoch": 0.2341881358957577, "grad_norm": 0.40144010395161167, "learning_rate": 9.803298114019198e-06, "loss": 0.371, "step": 984 }, { "epoch": 0.2344261319688225, "grad_norm": 0.39554229899897064, "learning_rate": 9.802762409412062e-06, "loss": 0.3357, "step": 985 }, { "epoch": 0.2346641280418873, "grad_norm": 0.7571829198075979, "learning_rate": 9.802225990995618e-06, "loss": 0.3972, "step": 986 }, { "epoch": 0.2349021241149521, "grad_norm": 0.4175979906090598, "learning_rate": 9.801688858849589e-06, "loss": 0.4162, "step": 987 }, { "epoch": 0.2351401201880169, "grad_norm": 0.39744051393120755, "learning_rate": 9.80115101305381e-06, "loss": 0.3587, "step": 988 }, { "epoch": 0.23537811626108168, "grad_norm": 0.3999755158754268, "learning_rate": 9.800612453688214e-06, "loss": 0.3679, "step": 989 }, { "epoch": 0.23561611233414648, "grad_norm": 0.41712508601670123, "learning_rate": 9.800073180832848e-06, "loss": 0.4463, "step": 990 }, { "epoch": 0.23585410840721127, "grad_norm": 0.4401207780378245, "learning_rate": 9.799533194567856e-06, "loss": 0.3739, "step": 991 }, { "epoch": 0.23609210448027607, "grad_norm": 0.47720852526507335, "learning_rate": 9.7989924949735e-06, "loss": 0.3634, "step": 992 }, { "epoch": 0.23633010055334086, "grad_norm": 0.41441117101780556, "learning_rate": 9.798451082130136e-06, "loss": 0.4021, "step": 993 }, { "epoch": 0.23656809662640566, "grad_norm": 0.4079927127031793, "learning_rate": 9.797908956118233e-06, "loss": 0.4398, "step": 994 }, { "epoch": 0.23680609269947045, "grad_norm": 0.4514084993876973, "learning_rate": 9.797366117018365e-06, "loss": 0.353, "step": 995 }, { "epoch": 0.23704408877253524, "grad_norm": 0.41156120093358467, "learning_rate": 9.79682256491121e-06, "loss": 0.3714, "step": 996 }, { "epoch": 0.23728208484560004, "grad_norm": 0.39736420037308545, "learning_rate": 9.796278299877556e-06, "loss": 0.4682, "step": 997 }, { "epoch": 0.23752008091866483, "grad_norm": 0.419750377621859, "learning_rate": 9.795733321998291e-06, "loss": 0.3953, "step": 998 }, { "epoch": 0.23775807699172963, "grad_norm": 0.4523615063724095, "learning_rate": 9.795187631354415e-06, "loss": 0.3176, "step": 999 }, { "epoch": 0.23799607306479442, "grad_norm": 0.43280687276990515, "learning_rate": 9.794641228027029e-06, "loss": 0.3767, "step": 1000 }, { "epoch": 0.23823406913785922, "grad_norm": 0.41701831566090125, "learning_rate": 9.794094112097342e-06, "loss": 0.4651, "step": 1001 }, { "epoch": 0.238472065210924, "grad_norm": 0.4670265558164397, "learning_rate": 9.793546283646671e-06, "loss": 0.3804, "step": 1002 }, { "epoch": 0.2387100612839888, "grad_norm": 0.4073253123192443, "learning_rate": 9.792997742756433e-06, "loss": 0.3447, "step": 1003 }, { "epoch": 0.2389480573570536, "grad_norm": 0.41912347417284207, "learning_rate": 9.792448489508161e-06, "loss": 0.4335, "step": 1004 }, { "epoch": 0.2391860534301184, "grad_norm": 0.4601404438983729, "learning_rate": 9.791898523983483e-06, "loss": 0.3913, "step": 1005 }, { "epoch": 0.2394240495031832, "grad_norm": 0.40669062578592385, "learning_rate": 9.791347846264137e-06, "loss": 0.3174, "step": 1006 }, { "epoch": 0.23966204557624798, "grad_norm": 0.40618171733993275, "learning_rate": 9.790796456431971e-06, "loss": 0.4043, "step": 1007 }, { "epoch": 0.23990004164931278, "grad_norm": 0.41827989422393813, "learning_rate": 9.79024435456893e-06, "loss": 0.4413, "step": 1008 }, { "epoch": 0.24013803772237757, "grad_norm": 0.431949416274619, "learning_rate": 9.789691540757076e-06, "loss": 0.3945, "step": 1009 }, { "epoch": 0.24037603379544237, "grad_norm": 0.41413010096326724, "learning_rate": 9.789138015078565e-06, "loss": 0.3339, "step": 1010 }, { "epoch": 0.24061402986850716, "grad_norm": 0.4140278014836118, "learning_rate": 9.78858377761567e-06, "loss": 0.4029, "step": 1011 }, { "epoch": 0.24085202594157196, "grad_norm": 0.4285228881221896, "learning_rate": 9.78802882845076e-06, "loss": 0.4099, "step": 1012 }, { "epoch": 0.24109002201463675, "grad_norm": 0.4491959646131581, "learning_rate": 9.787473167666316e-06, "loss": 0.3299, "step": 1013 }, { "epoch": 0.24132801808770155, "grad_norm": 0.45078633145305924, "learning_rate": 9.786916795344925e-06, "loss": 0.3761, "step": 1014 }, { "epoch": 0.24156601416076634, "grad_norm": 0.40484830252152926, "learning_rate": 9.786359711569273e-06, "loss": 0.4174, "step": 1015 }, { "epoch": 0.24180401023383113, "grad_norm": 0.4129343063024655, "learning_rate": 9.785801916422162e-06, "loss": 0.3641, "step": 1016 }, { "epoch": 0.24204200630689593, "grad_norm": 0.3896049676049958, "learning_rate": 9.78524340998649e-06, "loss": 0.3469, "step": 1017 }, { "epoch": 0.24228000237996072, "grad_norm": 0.397046837899395, "learning_rate": 9.784684192345264e-06, "loss": 0.3873, "step": 1018 }, { "epoch": 0.24251799845302552, "grad_norm": 0.46413478846406936, "learning_rate": 9.7841242635816e-06, "loss": 0.4772, "step": 1019 }, { "epoch": 0.2427559945260903, "grad_norm": 0.4277807175436547, "learning_rate": 9.78356362377872e-06, "loss": 0.3502, "step": 1020 }, { "epoch": 0.2429939905991551, "grad_norm": 0.40246848535234875, "learning_rate": 9.783002273019942e-06, "loss": 0.3317, "step": 1021 }, { "epoch": 0.2432319866722199, "grad_norm": 0.40816497903302856, "learning_rate": 9.782440211388703e-06, "loss": 0.4474, "step": 1022 }, { "epoch": 0.2434699827452847, "grad_norm": 0.43384610531907175, "learning_rate": 9.781877438968536e-06, "loss": 0.3799, "step": 1023 }, { "epoch": 0.2437079788183495, "grad_norm": 0.425087792025929, "learning_rate": 9.781313955843084e-06, "loss": 0.3573, "step": 1024 }, { "epoch": 0.24394597489141429, "grad_norm": 0.4026711207325404, "learning_rate": 9.780749762096093e-06, "loss": 0.3878, "step": 1025 }, { "epoch": 0.24418397096447908, "grad_norm": 0.4098774528539441, "learning_rate": 9.780184857811419e-06, "loss": 0.4786, "step": 1026 }, { "epoch": 0.24442196703754387, "grad_norm": 0.4396169899920244, "learning_rate": 9.779619243073017e-06, "loss": 0.3444, "step": 1027 }, { "epoch": 0.24465996311060867, "grad_norm": 0.4136758278535869, "learning_rate": 9.779052917964955e-06, "loss": 0.3948, "step": 1028 }, { "epoch": 0.24489795918367346, "grad_norm": 0.3739736169388403, "learning_rate": 9.7784858825714e-06, "loss": 0.3961, "step": 1029 }, { "epoch": 0.24513595525673826, "grad_norm": 0.4184534974361922, "learning_rate": 9.77791813697663e-06, "loss": 0.4134, "step": 1030 }, { "epoch": 0.24537395132980305, "grad_norm": 0.4728932911361564, "learning_rate": 9.777349681265024e-06, "loss": 0.3557, "step": 1031 }, { "epoch": 0.24561194740286785, "grad_norm": 0.41399344692303164, "learning_rate": 9.77678051552107e-06, "loss": 0.3869, "step": 1032 }, { "epoch": 0.24584994347593264, "grad_norm": 0.49034199619398566, "learning_rate": 9.77621063982936e-06, "loss": 0.4363, "step": 1033 }, { "epoch": 0.24608793954899744, "grad_norm": 0.4165647555586178, "learning_rate": 9.775640054274591e-06, "loss": 0.3841, "step": 1034 }, { "epoch": 0.24632593562206223, "grad_norm": 0.4360715981984538, "learning_rate": 9.775068758941566e-06, "loss": 0.3205, "step": 1035 }, { "epoch": 0.24656393169512703, "grad_norm": 0.389169460851251, "learning_rate": 9.774496753915193e-06, "loss": 0.3785, "step": 1036 }, { "epoch": 0.24680192776819182, "grad_norm": 0.39337607363580057, "learning_rate": 9.773924039280488e-06, "loss": 0.4302, "step": 1037 }, { "epoch": 0.24703992384125661, "grad_norm": 0.39250700422998375, "learning_rate": 9.77335061512257e-06, "loss": 0.3382, "step": 1038 }, { "epoch": 0.2472779199143214, "grad_norm": 0.43938604330305114, "learning_rate": 9.772776481526662e-06, "loss": 0.3888, "step": 1039 }, { "epoch": 0.2475159159873862, "grad_norm": 0.39206815609324325, "learning_rate": 9.772201638578099e-06, "loss": 0.4238, "step": 1040 }, { "epoch": 0.247753912060451, "grad_norm": 0.4053806301912741, "learning_rate": 9.77162608636231e-06, "loss": 0.3748, "step": 1041 }, { "epoch": 0.2479919081335158, "grad_norm": 0.4413688344206532, "learning_rate": 9.771049824964843e-06, "loss": 0.3448, "step": 1042 }, { "epoch": 0.2482299042065806, "grad_norm": 0.4272845904110967, "learning_rate": 9.77047285447134e-06, "loss": 0.4019, "step": 1043 }, { "epoch": 0.24846790027964538, "grad_norm": 0.41367780286322975, "learning_rate": 9.769895174967556e-06, "loss": 0.4339, "step": 1044 }, { "epoch": 0.24870589635271018, "grad_norm": 0.41454713221485506, "learning_rate": 9.769316786539349e-06, "loss": 0.368, "step": 1045 }, { "epoch": 0.24894389242577497, "grad_norm": 0.40425323505335387, "learning_rate": 9.768737689272678e-06, "loss": 0.3679, "step": 1046 }, { "epoch": 0.24918188849883977, "grad_norm": 0.38710204326010633, "learning_rate": 9.768157883253616e-06, "loss": 0.4258, "step": 1047 }, { "epoch": 0.24941988457190456, "grad_norm": 0.40041946638737086, "learning_rate": 9.76757736856833e-06, "loss": 0.4006, "step": 1048 }, { "epoch": 0.24965788064496935, "grad_norm": 0.36660667068666375, "learning_rate": 9.766996145303107e-06, "loss": 0.3208, "step": 1049 }, { "epoch": 0.24989587671803415, "grad_norm": 0.40641295360789265, "learning_rate": 9.766414213544325e-06, "loss": 0.3991, "step": 1050 }, { "epoch": 0.25013387279109894, "grad_norm": 0.3963317481756286, "learning_rate": 9.765831573378474e-06, "loss": 0.467, "step": 1051 }, { "epoch": 0.25037186886416374, "grad_norm": 0.4102266739054517, "learning_rate": 9.765248224892152e-06, "loss": 0.3432, "step": 1052 }, { "epoch": 0.25060986493722853, "grad_norm": 0.5225965842019219, "learning_rate": 9.764664168172057e-06, "loss": 0.3248, "step": 1053 }, { "epoch": 0.2508478610102933, "grad_norm": 0.39422579626773063, "learning_rate": 9.764079403304996e-06, "loss": 0.441, "step": 1054 }, { "epoch": 0.2510858570833581, "grad_norm": 0.43871315920548154, "learning_rate": 9.763493930377877e-06, "loss": 0.4206, "step": 1055 }, { "epoch": 0.2513238531564229, "grad_norm": 0.39618156243497527, "learning_rate": 9.762907749477717e-06, "loss": 0.3543, "step": 1056 }, { "epoch": 0.2515618492294877, "grad_norm": 0.40119944869118124, "learning_rate": 9.762320860691636e-06, "loss": 0.3664, "step": 1057 }, { "epoch": 0.2517998453025525, "grad_norm": 0.4448657055439697, "learning_rate": 9.76173326410686e-06, "loss": 0.4404, "step": 1058 }, { "epoch": 0.2520378413756173, "grad_norm": 0.4504360863126996, "learning_rate": 9.761144959810723e-06, "loss": 0.3729, "step": 1059 }, { "epoch": 0.2522758374486821, "grad_norm": 0.3858257157958738, "learning_rate": 9.760555947890659e-06, "loss": 0.3264, "step": 1060 }, { "epoch": 0.2525138335217469, "grad_norm": 0.42098269355466916, "learning_rate": 9.759966228434212e-06, "loss": 0.4308, "step": 1061 }, { "epoch": 0.2527518295948117, "grad_norm": 0.4414996944483038, "learning_rate": 9.759375801529026e-06, "loss": 0.4122, "step": 1062 }, { "epoch": 0.2529898256678765, "grad_norm": 0.4083823838111461, "learning_rate": 9.758784667262856e-06, "loss": 0.3455, "step": 1063 }, { "epoch": 0.2532278217409413, "grad_norm": 0.39532890350795247, "learning_rate": 9.758192825723556e-06, "loss": 0.3742, "step": 1064 }, { "epoch": 0.25346581781400607, "grad_norm": 0.7090467989608195, "learning_rate": 9.757600276999092e-06, "loss": 0.4249, "step": 1065 }, { "epoch": 0.25370381388707086, "grad_norm": 0.40621648230909274, "learning_rate": 9.757007021177529e-06, "loss": 0.3793, "step": 1066 }, { "epoch": 0.25394180996013566, "grad_norm": 0.4208314833352698, "learning_rate": 9.756413058347039e-06, "loss": 0.3424, "step": 1067 }, { "epoch": 0.25417980603320045, "grad_norm": 0.377078139573875, "learning_rate": 9.7558183885959e-06, "loss": 0.3902, "step": 1068 }, { "epoch": 0.25441780210626525, "grad_norm": 0.44783916724457257, "learning_rate": 9.755223012012498e-06, "loss": 0.4407, "step": 1069 }, { "epoch": 0.25465579817933004, "grad_norm": 0.39045199889729937, "learning_rate": 9.754626928685315e-06, "loss": 0.3372, "step": 1070 }, { "epoch": 0.25489379425239483, "grad_norm": 0.4319090741276457, "learning_rate": 9.754030138702948e-06, "loss": 0.3226, "step": 1071 }, { "epoch": 0.25513179032545963, "grad_norm": 0.3789725941806326, "learning_rate": 9.753432642154093e-06, "loss": 0.4311, "step": 1072 }, { "epoch": 0.2553697863985244, "grad_norm": 0.4119736475459839, "learning_rate": 9.752834439127554e-06, "loss": 0.4179, "step": 1073 }, { "epoch": 0.2556077824715892, "grad_norm": 0.46500296910275435, "learning_rate": 9.752235529712237e-06, "loss": 0.3472, "step": 1074 }, { "epoch": 0.255845778544654, "grad_norm": 0.410321050312457, "learning_rate": 9.751635913997155e-06, "loss": 0.3981, "step": 1075 }, { "epoch": 0.2560837746177188, "grad_norm": 0.41821928052237395, "learning_rate": 9.751035592071427e-06, "loss": 0.4358, "step": 1076 }, { "epoch": 0.2563217706907836, "grad_norm": 0.4250687984115567, "learning_rate": 9.750434564024276e-06, "loss": 0.3738, "step": 1077 }, { "epoch": 0.2565597667638484, "grad_norm": 0.4407759908452419, "learning_rate": 9.749832829945027e-06, "loss": 0.3292, "step": 1078 }, { "epoch": 0.2567977628369132, "grad_norm": 0.36605112835248904, "learning_rate": 9.749230389923117e-06, "loss": 0.4139, "step": 1079 }, { "epoch": 0.257035758909978, "grad_norm": 0.46252554242389143, "learning_rate": 9.748627244048077e-06, "loss": 0.4072, "step": 1080 }, { "epoch": 0.2572737549830428, "grad_norm": 0.4131656517840768, "learning_rate": 9.748023392409556e-06, "loss": 0.3444, "step": 1081 }, { "epoch": 0.2575117510561076, "grad_norm": 0.4244606448210475, "learning_rate": 9.747418835097298e-06, "loss": 0.385, "step": 1082 }, { "epoch": 0.25774974712917237, "grad_norm": 0.39038280156155775, "learning_rate": 9.746813572201154e-06, "loss": 0.442, "step": 1083 }, { "epoch": 0.25798774320223716, "grad_norm": 0.4367941002506421, "learning_rate": 9.746207603811085e-06, "loss": 0.3848, "step": 1084 }, { "epoch": 0.25822573927530196, "grad_norm": 0.43484071040684835, "learning_rate": 9.74560093001715e-06, "loss": 0.317, "step": 1085 }, { "epoch": 0.25846373534836675, "grad_norm": 0.35367482678247547, "learning_rate": 9.744993550909514e-06, "loss": 0.3873, "step": 1086 }, { "epoch": 0.25870173142143155, "grad_norm": 0.4941729589297459, "learning_rate": 9.744385466578453e-06, "loss": 0.4777, "step": 1087 }, { "epoch": 0.25893972749449634, "grad_norm": 0.4272644514387727, "learning_rate": 9.74377667711434e-06, "loss": 0.3518, "step": 1088 }, { "epoch": 0.25917772356756114, "grad_norm": 0.4169441718338264, "learning_rate": 9.743167182607659e-06, "loss": 0.3746, "step": 1089 }, { "epoch": 0.25941571964062593, "grad_norm": 0.4436977383688021, "learning_rate": 9.742556983148994e-06, "loss": 0.4359, "step": 1090 }, { "epoch": 0.2596537157136907, "grad_norm": 0.44907822644616785, "learning_rate": 9.741946078829035e-06, "loss": 0.3661, "step": 1091 }, { "epoch": 0.2598917117867555, "grad_norm": 0.5085499247152285, "learning_rate": 9.74133446973858e-06, "loss": 0.3236, "step": 1092 }, { "epoch": 0.2601297078598203, "grad_norm": 0.4095219628657235, "learning_rate": 9.740722155968527e-06, "loss": 0.3725, "step": 1093 }, { "epoch": 0.2603677039328851, "grad_norm": 0.4492820629768859, "learning_rate": 9.74010913760988e-06, "loss": 0.4523, "step": 1094 }, { "epoch": 0.2606057000059499, "grad_norm": 0.4632589956141342, "learning_rate": 9.739495414753754e-06, "loss": 0.3544, "step": 1095 }, { "epoch": 0.2608436960790147, "grad_norm": 0.43509465444418566, "learning_rate": 9.738880987491357e-06, "loss": 0.3868, "step": 1096 }, { "epoch": 0.2610816921520795, "grad_norm": 0.4155977515794823, "learning_rate": 9.738265855914014e-06, "loss": 0.413, "step": 1097 }, { "epoch": 0.2613196882251443, "grad_norm": 0.4944891223357048, "learning_rate": 9.737650020113143e-06, "loss": 0.385, "step": 1098 }, { "epoch": 0.2615576842982091, "grad_norm": 0.4408252014561249, "learning_rate": 9.737033480180276e-06, "loss": 0.3339, "step": 1099 }, { "epoch": 0.2617956803712739, "grad_norm": 0.40705862767289636, "learning_rate": 9.736416236207046e-06, "loss": 0.3871, "step": 1100 }, { "epoch": 0.26203367644433867, "grad_norm": 0.4087671198515162, "learning_rate": 9.735798288285189e-06, "loss": 0.4766, "step": 1101 }, { "epoch": 0.26227167251740346, "grad_norm": 0.4129939242716965, "learning_rate": 9.73517963650655e-06, "loss": 0.3562, "step": 1102 }, { "epoch": 0.26250966859046826, "grad_norm": 0.3870745733554611, "learning_rate": 9.734560280963072e-06, "loss": 0.3615, "step": 1103 }, { "epoch": 0.26274766466353305, "grad_norm": 0.39750563414632717, "learning_rate": 9.73394022174681e-06, "loss": 0.4273, "step": 1104 }, { "epoch": 0.26298566073659785, "grad_norm": 0.4523534366058027, "learning_rate": 9.73331945894992e-06, "loss": 0.3872, "step": 1105 }, { "epoch": 0.26322365680966264, "grad_norm": 0.48990020782286, "learning_rate": 9.73269799266466e-06, "loss": 0.3577, "step": 1106 }, { "epoch": 0.26346165288272744, "grad_norm": 0.43917343471689085, "learning_rate": 9.732075822983398e-06, "loss": 0.3667, "step": 1107 }, { "epoch": 0.26369964895579223, "grad_norm": 0.4263835822823487, "learning_rate": 9.731452949998603e-06, "loss": 0.4421, "step": 1108 }, { "epoch": 0.263937645028857, "grad_norm": 0.46278837021200253, "learning_rate": 9.730829373802847e-06, "loss": 0.3615, "step": 1109 }, { "epoch": 0.2641756411019218, "grad_norm": 0.40000848194964195, "learning_rate": 9.730205094488813e-06, "loss": 0.3516, "step": 1110 }, { "epoch": 0.2644136371749866, "grad_norm": 0.40932668897348323, "learning_rate": 9.729580112149283e-06, "loss": 0.3911, "step": 1111 }, { "epoch": 0.2646516332480514, "grad_norm": 0.42423822678370915, "learning_rate": 9.728954426877142e-06, "loss": 0.3937, "step": 1112 }, { "epoch": 0.2648896293211162, "grad_norm": 0.4536666880057892, "learning_rate": 9.728328038765387e-06, "loss": 0.3364, "step": 1113 }, { "epoch": 0.265127625394181, "grad_norm": 0.4096764042742354, "learning_rate": 9.72770094790711e-06, "loss": 0.3995, "step": 1114 }, { "epoch": 0.2653656214672458, "grad_norm": 0.41042568452857936, "learning_rate": 9.727073154395516e-06, "loss": 0.4235, "step": 1115 }, { "epoch": 0.2656036175403106, "grad_norm": 0.4929851761073694, "learning_rate": 9.726444658323908e-06, "loss": 0.3269, "step": 1116 }, { "epoch": 0.2658416136133754, "grad_norm": 0.44283586974793293, "learning_rate": 9.725815459785696e-06, "loss": 0.3496, "step": 1117 }, { "epoch": 0.2660796096864402, "grad_norm": 0.4146273039324895, "learning_rate": 9.725185558874399e-06, "loss": 0.3658, "step": 1118 }, { "epoch": 0.26631760575950497, "grad_norm": 0.4319382476751413, "learning_rate": 9.72455495568363e-06, "loss": 0.4564, "step": 1119 }, { "epoch": 0.26655560183256977, "grad_norm": 0.48931654146756204, "learning_rate": 9.723923650307116e-06, "loss": 0.3553, "step": 1120 }, { "epoch": 0.26679359790563456, "grad_norm": 0.4701876581191389, "learning_rate": 9.723291642838682e-06, "loss": 0.3524, "step": 1121 }, { "epoch": 0.26703159397869936, "grad_norm": 0.39377148630113995, "learning_rate": 9.722658933372262e-06, "loss": 0.4599, "step": 1122 }, { "epoch": 0.26726959005176415, "grad_norm": 0.4558315940897939, "learning_rate": 9.722025522001892e-06, "loss": 0.389, "step": 1123 }, { "epoch": 0.26750758612482894, "grad_norm": 0.4215971407730172, "learning_rate": 9.721391408821713e-06, "loss": 0.3064, "step": 1124 }, { "epoch": 0.26774558219789374, "grad_norm": 0.3911192317304776, "learning_rate": 9.720756593925967e-06, "loss": 0.424, "step": 1125 }, { "epoch": 0.26798357827095853, "grad_norm": 0.3867608031443001, "learning_rate": 9.720121077409006e-06, "loss": 0.4329, "step": 1126 }, { "epoch": 0.26822157434402333, "grad_norm": 0.4335469962868136, "learning_rate": 9.719484859365283e-06, "loss": 0.3384, "step": 1127 }, { "epoch": 0.2684595704170881, "grad_norm": 0.41163503862716694, "learning_rate": 9.718847939889354e-06, "loss": 0.3526, "step": 1128 }, { "epoch": 0.2686975664901529, "grad_norm": 0.3892507716482463, "learning_rate": 9.718210319075883e-06, "loss": 0.428, "step": 1129 }, { "epoch": 0.2689355625632177, "grad_norm": 0.4116306866869224, "learning_rate": 9.717571997019637e-06, "loss": 0.3982, "step": 1130 }, { "epoch": 0.2691735586362825, "grad_norm": 0.39033870830232814, "learning_rate": 9.716932973815485e-06, "loss": 0.3799, "step": 1131 }, { "epoch": 0.2694115547093473, "grad_norm": 0.40225942909727946, "learning_rate": 9.716293249558401e-06, "loss": 0.3807, "step": 1132 }, { "epoch": 0.2696495507824121, "grad_norm": 0.39112529026808723, "learning_rate": 9.715652824343465e-06, "loss": 0.4281, "step": 1133 }, { "epoch": 0.2698875468554769, "grad_norm": 0.3851730979067281, "learning_rate": 9.71501169826586e-06, "loss": 0.3295, "step": 1134 }, { "epoch": 0.2701255429285417, "grad_norm": 0.41373661540628287, "learning_rate": 9.714369871420872e-06, "loss": 0.3928, "step": 1135 }, { "epoch": 0.2703635390016065, "grad_norm": 0.4461170158222069, "learning_rate": 9.713727343903893e-06, "loss": 0.4109, "step": 1136 }, { "epoch": 0.2706015350746713, "grad_norm": 0.4212053908749594, "learning_rate": 9.71308411581042e-06, "loss": 0.4206, "step": 1137 }, { "epoch": 0.27083953114773607, "grad_norm": 0.4563483334376799, "learning_rate": 9.71244018723605e-06, "loss": 0.3494, "step": 1138 }, { "epoch": 0.27107752722080086, "grad_norm": 0.4063198246744073, "learning_rate": 9.711795558276489e-06, "loss": 0.3797, "step": 1139 }, { "epoch": 0.27131552329386566, "grad_norm": 0.4766611835404524, "learning_rate": 9.711150229027544e-06, "loss": 0.4136, "step": 1140 }, { "epoch": 0.27155351936693045, "grad_norm": 0.43835373298275027, "learning_rate": 9.710504199585127e-06, "loss": 0.3632, "step": 1141 }, { "epoch": 0.27179151543999525, "grad_norm": 0.4164970849311708, "learning_rate": 9.709857470045251e-06, "loss": 0.329, "step": 1142 }, { "epoch": 0.27202951151306004, "grad_norm": 0.40401054483859816, "learning_rate": 9.709210040504042e-06, "loss": 0.3835, "step": 1143 }, { "epoch": 0.27226750758612484, "grad_norm": 0.39714074001954824, "learning_rate": 9.708561911057719e-06, "loss": 0.4795, "step": 1144 }, { "epoch": 0.27250550365918963, "grad_norm": 0.4680110103103199, "learning_rate": 9.707913081802613e-06, "loss": 0.341, "step": 1145 }, { "epoch": 0.2727434997322544, "grad_norm": 0.4089665368213675, "learning_rate": 9.707263552835153e-06, "loss": 0.3453, "step": 1146 }, { "epoch": 0.2729814958053192, "grad_norm": 0.39910736552697396, "learning_rate": 9.70661332425188e-06, "loss": 0.3954, "step": 1147 }, { "epoch": 0.273219491878384, "grad_norm": 0.4278429044590152, "learning_rate": 9.705962396149428e-06, "loss": 0.3727, "step": 1148 }, { "epoch": 0.2734574879514488, "grad_norm": 0.4381769229333954, "learning_rate": 9.705310768624545e-06, "loss": 0.3003, "step": 1149 }, { "epoch": 0.2736954840245136, "grad_norm": 0.3892819098989479, "learning_rate": 9.704658441774078e-06, "loss": 0.3448, "step": 1150 }, { "epoch": 0.2739334800975784, "grad_norm": 0.45797697785561536, "learning_rate": 9.704005415694979e-06, "loss": 0.4175, "step": 1151 }, { "epoch": 0.2741714761706432, "grad_norm": 0.4458801671185096, "learning_rate": 9.703351690484305e-06, "loss": 0.3442, "step": 1152 }, { "epoch": 0.274409472243708, "grad_norm": 0.43193201208559157, "learning_rate": 9.702697266239211e-06, "loss": 0.3373, "step": 1153 }, { "epoch": 0.2746474683167728, "grad_norm": 0.41679902408312985, "learning_rate": 9.702042143056966e-06, "loss": 0.3958, "step": 1154 }, { "epoch": 0.2748854643898376, "grad_norm": 0.443366075981643, "learning_rate": 9.701386321034937e-06, "loss": 0.4387, "step": 1155 }, { "epoch": 0.27512346046290237, "grad_norm": 0.4278061958287098, "learning_rate": 9.700729800270592e-06, "loss": 0.3615, "step": 1156 }, { "epoch": 0.27536145653596716, "grad_norm": 0.4281403145666002, "learning_rate": 9.700072580861511e-06, "loss": 0.3827, "step": 1157 }, { "epoch": 0.27559945260903196, "grad_norm": 0.4404470075524074, "learning_rate": 9.699414662905368e-06, "loss": 0.4316, "step": 1158 }, { "epoch": 0.27583744868209675, "grad_norm": 0.44475673605890376, "learning_rate": 9.698756046499948e-06, "loss": 0.3357, "step": 1159 }, { "epoch": 0.27607544475516155, "grad_norm": 0.43252690813011324, "learning_rate": 9.698096731743139e-06, "loss": 0.3298, "step": 1160 }, { "epoch": 0.27631344082822634, "grad_norm": 0.40596838215176706, "learning_rate": 9.69743671873293e-06, "loss": 0.3876, "step": 1161 }, { "epoch": 0.27655143690129114, "grad_norm": 0.38244031961085745, "learning_rate": 9.696776007567414e-06, "loss": 0.42, "step": 1162 }, { "epoch": 0.27678943297435593, "grad_norm": 0.39348329580696617, "learning_rate": 9.696114598344794e-06, "loss": 0.3526, "step": 1163 }, { "epoch": 0.2770274290474207, "grad_norm": 0.40702007665049295, "learning_rate": 9.695452491163367e-06, "loss": 0.3586, "step": 1164 }, { "epoch": 0.2772654251204855, "grad_norm": 0.3806659463696473, "learning_rate": 9.694789686121538e-06, "loss": 0.4213, "step": 1165 }, { "epoch": 0.2775034211935503, "grad_norm": 0.526039242215381, "learning_rate": 9.69412618331782e-06, "loss": 0.3702, "step": 1166 }, { "epoch": 0.2777414172666151, "grad_norm": 0.4034865628814754, "learning_rate": 9.693461982850824e-06, "loss": 0.3582, "step": 1167 }, { "epoch": 0.2779794133396799, "grad_norm": 0.36522029047320065, "learning_rate": 9.692797084819265e-06, "loss": 0.3887, "step": 1168 }, { "epoch": 0.2782174094127447, "grad_norm": 0.40180604628740285, "learning_rate": 9.692131489321968e-06, "loss": 0.4349, "step": 1169 }, { "epoch": 0.2784554054858095, "grad_norm": 0.4153489962964815, "learning_rate": 9.691465196457852e-06, "loss": 0.3318, "step": 1170 }, { "epoch": 0.2786934015588743, "grad_norm": 0.3762423285238886, "learning_rate": 9.690798206325947e-06, "loss": 0.3601, "step": 1171 }, { "epoch": 0.2789313976319391, "grad_norm": 0.41201247864367047, "learning_rate": 9.690130519025382e-06, "loss": 0.444, "step": 1172 }, { "epoch": 0.2791693937050039, "grad_norm": 0.4139424779362683, "learning_rate": 9.689462134655396e-06, "loss": 0.4238, "step": 1173 }, { "epoch": 0.27940738977806867, "grad_norm": 0.4213437892645641, "learning_rate": 9.688793053315324e-06, "loss": 0.3575, "step": 1174 }, { "epoch": 0.27964538585113347, "grad_norm": 0.42068466169572954, "learning_rate": 9.688123275104611e-06, "loss": 0.3755, "step": 1175 }, { "epoch": 0.27988338192419826, "grad_norm": 0.44836158304136864, "learning_rate": 9.6874528001228e-06, "loss": 0.4326, "step": 1176 }, { "epoch": 0.28012137799726305, "grad_norm": 0.4033459951159842, "learning_rate": 9.686781628469537e-06, "loss": 0.366, "step": 1177 }, { "epoch": 0.28035937407032785, "grad_norm": 0.427641362560405, "learning_rate": 9.686109760244583e-06, "loss": 0.3167, "step": 1178 }, { "epoch": 0.28059737014339264, "grad_norm": 0.4312381613704766, "learning_rate": 9.685437195547788e-06, "loss": 0.4068, "step": 1179 }, { "epoch": 0.28083536621645744, "grad_norm": 0.38271427698464, "learning_rate": 9.684763934479116e-06, "loss": 0.3542, "step": 1180 }, { "epoch": 0.28107336228952223, "grad_norm": 0.40970093005177965, "learning_rate": 9.684089977138625e-06, "loss": 0.3241, "step": 1181 }, { "epoch": 0.281311358362587, "grad_norm": 0.47135849095032084, "learning_rate": 9.683415323626487e-06, "loss": 0.3948, "step": 1182 }, { "epoch": 0.2815493544356518, "grad_norm": 0.4150750744550006, "learning_rate": 9.682739974042967e-06, "loss": 0.4717, "step": 1183 }, { "epoch": 0.2817873505087166, "grad_norm": 0.3988087657033761, "learning_rate": 9.682063928488444e-06, "loss": 0.3406, "step": 1184 }, { "epoch": 0.2820253465817814, "grad_norm": 0.3730302298116086, "learning_rate": 9.68138718706339e-06, "loss": 0.3291, "step": 1185 }, { "epoch": 0.2822633426548462, "grad_norm": 0.41362346247623033, "learning_rate": 9.68070974986839e-06, "loss": 0.4121, "step": 1186 }, { "epoch": 0.282501338727911, "grad_norm": 0.4049749435958661, "learning_rate": 9.680031617004127e-06, "loss": 0.4326, "step": 1187 }, { "epoch": 0.2827393348009758, "grad_norm": 0.43281535830805595, "learning_rate": 9.679352788571385e-06, "loss": 0.3082, "step": 1188 }, { "epoch": 0.2829773308740406, "grad_norm": 0.38623462120774466, "learning_rate": 9.678673264671057e-06, "loss": 0.3458, "step": 1189 }, { "epoch": 0.2832153269471054, "grad_norm": 0.40708256399448933, "learning_rate": 9.677993045404138e-06, "loss": 0.4467, "step": 1190 }, { "epoch": 0.2834533230201702, "grad_norm": 0.3996160590648185, "learning_rate": 9.677312130871724e-06, "loss": 0.3843, "step": 1191 }, { "epoch": 0.283691319093235, "grad_norm": 0.43448067080567276, "learning_rate": 9.676630521175017e-06, "loss": 0.3182, "step": 1192 }, { "epoch": 0.28392931516629977, "grad_norm": 0.42327090704304354, "learning_rate": 9.67594821641532e-06, "loss": 0.3932, "step": 1193 }, { "epoch": 0.28416731123936456, "grad_norm": 0.39790381975349265, "learning_rate": 9.675265216694041e-06, "loss": 0.4357, "step": 1194 }, { "epoch": 0.28440530731242936, "grad_norm": 0.42504381205655906, "learning_rate": 9.67458152211269e-06, "loss": 0.3491, "step": 1195 }, { "epoch": 0.28464330338549415, "grad_norm": 0.37577693430446824, "learning_rate": 9.673897132772881e-06, "loss": 0.3572, "step": 1196 }, { "epoch": 0.28488129945855895, "grad_norm": 0.38908763308437694, "learning_rate": 9.67321204877633e-06, "loss": 0.4457, "step": 1197 }, { "epoch": 0.28511929553162374, "grad_norm": 0.4153450452315847, "learning_rate": 9.672526270224861e-06, "loss": 0.4017, "step": 1198 }, { "epoch": 0.28535729160468853, "grad_norm": 0.3652029600299754, "learning_rate": 9.671839797220394e-06, "loss": 0.312, "step": 1199 }, { "epoch": 0.28559528767775333, "grad_norm": 0.3965275761201531, "learning_rate": 9.67115262986496e-06, "loss": 0.3594, "step": 1200 }, { "epoch": 0.2858332837508181, "grad_norm": 0.3978539830135749, "learning_rate": 9.670464768260684e-06, "loss": 0.433, "step": 1201 }, { "epoch": 0.2860712798238829, "grad_norm": 0.3911610545469143, "learning_rate": 9.669776212509802e-06, "loss": 0.3459, "step": 1202 }, { "epoch": 0.2863092758969477, "grad_norm": 0.4563276723079206, "learning_rate": 9.669086962714651e-06, "loss": 0.3656, "step": 1203 }, { "epoch": 0.2865472719700125, "grad_norm": 0.37157585507019325, "learning_rate": 9.66839701897767e-06, "loss": 0.4093, "step": 1204 }, { "epoch": 0.2867852680430773, "grad_norm": 0.43109471368785796, "learning_rate": 9.667706381401401e-06, "loss": 0.4173, "step": 1205 }, { "epoch": 0.2870232641161421, "grad_norm": 0.4106000046269324, "learning_rate": 9.667015050088489e-06, "loss": 0.3157, "step": 1206 }, { "epoch": 0.2872612601892069, "grad_norm": 0.49437005628521224, "learning_rate": 9.666323025141687e-06, "loss": 0.4168, "step": 1207 }, { "epoch": 0.2874992562622717, "grad_norm": 0.423786447227811, "learning_rate": 9.66563030666384e-06, "loss": 0.4489, "step": 1208 }, { "epoch": 0.2877372523353365, "grad_norm": 0.41056267338791924, "learning_rate": 9.66493689475791e-06, "loss": 0.3675, "step": 1209 }, { "epoch": 0.2879752484084013, "grad_norm": 0.38996061322766, "learning_rate": 9.664242789526952e-06, "loss": 0.3332, "step": 1210 }, { "epoch": 0.28821324448146607, "grad_norm": 0.38150783205902883, "learning_rate": 9.663547991074129e-06, "loss": 0.3972, "step": 1211 }, { "epoch": 0.28845124055453086, "grad_norm": 0.4577771253949555, "learning_rate": 9.662852499502702e-06, "loss": 0.4332, "step": 1212 }, { "epoch": 0.28868923662759566, "grad_norm": 0.39041968301837926, "learning_rate": 9.66215631491604e-06, "loss": 0.3268, "step": 1213 }, { "epoch": 0.28892723270066045, "grad_norm": 0.39588825022995383, "learning_rate": 9.661459437417616e-06, "loss": 0.3653, "step": 1214 }, { "epoch": 0.28916522877372525, "grad_norm": 0.38238412045047987, "learning_rate": 9.660761867110997e-06, "loss": 0.4436, "step": 1215 }, { "epoch": 0.28940322484679004, "grad_norm": 0.4388597314826315, "learning_rate": 9.660063604099866e-06, "loss": 0.3574, "step": 1216 }, { "epoch": 0.28964122091985484, "grad_norm": 0.3979764946768326, "learning_rate": 9.659364648487997e-06, "loss": 0.3016, "step": 1217 }, { "epoch": 0.28987921699291963, "grad_norm": 0.38473163200776395, "learning_rate": 9.658665000379275e-06, "loss": 0.4002, "step": 1218 }, { "epoch": 0.2901172130659844, "grad_norm": 0.410009814193448, "learning_rate": 9.657964659877683e-06, "loss": 0.4234, "step": 1219 }, { "epoch": 0.2903552091390492, "grad_norm": 0.44464576009309903, "learning_rate": 9.657263627087312e-06, "loss": 0.3546, "step": 1220 }, { "epoch": 0.290593205212114, "grad_norm": 0.3867603419054925, "learning_rate": 9.656561902112349e-06, "loss": 0.3863, "step": 1221 }, { "epoch": 0.2908312012851788, "grad_norm": 0.3979315475026651, "learning_rate": 9.655859485057091e-06, "loss": 0.4364, "step": 1222 }, { "epoch": 0.2910691973582436, "grad_norm": 0.3855514558708465, "learning_rate": 9.655156376025932e-06, "loss": 0.3804, "step": 1223 }, { "epoch": 0.2913071934313084, "grad_norm": 0.39833146644821477, "learning_rate": 9.654452575123373e-06, "loss": 0.3113, "step": 1224 }, { "epoch": 0.2915451895043732, "grad_norm": 0.3873575017477939, "learning_rate": 9.653748082454016e-06, "loss": 0.4139, "step": 1225 }, { "epoch": 0.291783185577438, "grad_norm": 0.4339801773731958, "learning_rate": 9.653042898122565e-06, "loss": 0.4278, "step": 1226 }, { "epoch": 0.2920211816505028, "grad_norm": 0.4448702833601803, "learning_rate": 9.652337022233829e-06, "loss": 0.3272, "step": 1227 }, { "epoch": 0.2922591777235676, "grad_norm": 0.42298609998301084, "learning_rate": 9.651630454892718e-06, "loss": 0.3342, "step": 1228 }, { "epoch": 0.29249717379663237, "grad_norm": 0.386945622028957, "learning_rate": 9.650923196204248e-06, "loss": 0.4296, "step": 1229 }, { "epoch": 0.29273516986969716, "grad_norm": 0.41831065108508075, "learning_rate": 9.650215246273529e-06, "loss": 0.4312, "step": 1230 }, { "epoch": 0.29297316594276196, "grad_norm": 0.39310689224557216, "learning_rate": 9.649506605205786e-06, "loss": 0.3573, "step": 1231 }, { "epoch": 0.29321116201582675, "grad_norm": 0.40652348372519026, "learning_rate": 9.648797273106338e-06, "loss": 0.3865, "step": 1232 }, { "epoch": 0.29344915808889155, "grad_norm": 0.38816699963135914, "learning_rate": 9.648087250080609e-06, "loss": 0.4138, "step": 1233 }, { "epoch": 0.29368715416195634, "grad_norm": 0.42050242112991676, "learning_rate": 9.647376536234126e-06, "loss": 0.3695, "step": 1234 }, { "epoch": 0.29392515023502114, "grad_norm": 0.45027811269540524, "learning_rate": 9.64666513167252e-06, "loss": 0.3329, "step": 1235 }, { "epoch": 0.29416314630808593, "grad_norm": 0.4324780377574711, "learning_rate": 9.645953036501521e-06, "loss": 0.412, "step": 1236 }, { "epoch": 0.2944011423811507, "grad_norm": 0.4229530775773719, "learning_rate": 9.645240250826969e-06, "loss": 0.4417, "step": 1237 }, { "epoch": 0.2946391384542155, "grad_norm": 0.5100450045808651, "learning_rate": 9.644526774754794e-06, "loss": 0.3331, "step": 1238 }, { "epoch": 0.2948771345272803, "grad_norm": 0.39386712906627963, "learning_rate": 9.643812608391042e-06, "loss": 0.4048, "step": 1239 }, { "epoch": 0.2951151306003451, "grad_norm": 0.4161776575953358, "learning_rate": 9.643097751841854e-06, "loss": 0.4666, "step": 1240 }, { "epoch": 0.2953531266734099, "grad_norm": 0.40609935662404656, "learning_rate": 9.642382205213476e-06, "loss": 0.3557, "step": 1241 }, { "epoch": 0.2955911227464747, "grad_norm": 0.40786129971537194, "learning_rate": 9.641665968612254e-06, "loss": 0.368, "step": 1242 }, { "epoch": 0.2958291188195395, "grad_norm": 0.39892048689403614, "learning_rate": 9.640949042144641e-06, "loss": 0.374, "step": 1243 }, { "epoch": 0.2960671148926043, "grad_norm": 0.42072884743876504, "learning_rate": 9.640231425917186e-06, "loss": 0.4409, "step": 1244 }, { "epoch": 0.2963051109656691, "grad_norm": 0.3809774278312238, "learning_rate": 9.63951312003655e-06, "loss": 0.3276, "step": 1245 }, { "epoch": 0.2965431070387339, "grad_norm": 0.4112816053420641, "learning_rate": 9.638794124609487e-06, "loss": 0.3342, "step": 1246 }, { "epoch": 0.29678110311179867, "grad_norm": 0.41137858878488315, "learning_rate": 9.63807443974286e-06, "loss": 0.4281, "step": 1247 }, { "epoch": 0.29701909918486347, "grad_norm": 0.4301212258272213, "learning_rate": 9.63735406554363e-06, "loss": 0.333, "step": 1248 }, { "epoch": 0.29725709525792826, "grad_norm": 0.47855201036627526, "learning_rate": 9.636633002118865e-06, "loss": 0.3555, "step": 1249 }, { "epoch": 0.29749509133099306, "grad_norm": 0.3926815914537034, "learning_rate": 9.635911249575729e-06, "loss": 0.3723, "step": 1250 }, { "epoch": 0.29773308740405785, "grad_norm": 0.41169709632873663, "learning_rate": 9.635188808021496e-06, "loss": 0.443, "step": 1251 }, { "epoch": 0.29797108347712264, "grad_norm": 0.41928978072848383, "learning_rate": 9.634465677563537e-06, "loss": 0.3731, "step": 1252 }, { "epoch": 0.29820907955018744, "grad_norm": 0.4466280351365597, "learning_rate": 9.633741858309325e-06, "loss": 0.3359, "step": 1253 }, { "epoch": 0.29844707562325223, "grad_norm": 0.5973113828554666, "learning_rate": 9.633017350366441e-06, "loss": 0.4, "step": 1254 }, { "epoch": 0.29868507169631703, "grad_norm": 0.4181046582171295, "learning_rate": 9.632292153842565e-06, "loss": 0.4306, "step": 1255 }, { "epoch": 0.2989230677693818, "grad_norm": 0.4310021297808129, "learning_rate": 9.631566268845476e-06, "loss": 0.3564, "step": 1256 }, { "epoch": 0.2991610638424466, "grad_norm": 0.38843714102902427, "learning_rate": 9.630839695483059e-06, "loss": 0.3675, "step": 1257 }, { "epoch": 0.2993990599155114, "grad_norm": 0.40697877470197674, "learning_rate": 9.630112433863304e-06, "loss": 0.4735, "step": 1258 }, { "epoch": 0.2996370559885762, "grad_norm": 0.4383586613694374, "learning_rate": 9.629384484094296e-06, "loss": 0.3543, "step": 1259 }, { "epoch": 0.299875052061641, "grad_norm": 0.44985065245385947, "learning_rate": 9.628655846284228e-06, "loss": 0.3336, "step": 1260 }, { "epoch": 0.3001130481347058, "grad_norm": 0.394156383482113, "learning_rate": 9.627926520541395e-06, "loss": 0.3836, "step": 1261 }, { "epoch": 0.3003510442077706, "grad_norm": 0.48348036503403014, "learning_rate": 9.627196506974192e-06, "loss": 0.4122, "step": 1262 }, { "epoch": 0.3005890402808354, "grad_norm": 0.4046212158723506, "learning_rate": 9.626465805691117e-06, "loss": 0.3412, "step": 1263 }, { "epoch": 0.3008270363539002, "grad_norm": 0.4637536508877546, "learning_rate": 9.625734416800768e-06, "loss": 0.3617, "step": 1264 }, { "epoch": 0.301065032426965, "grad_norm": 0.3696735431596717, "learning_rate": 9.625002340411851e-06, "loss": 0.421, "step": 1265 }, { "epoch": 0.30130302850002977, "grad_norm": 0.44039294060962975, "learning_rate": 9.624269576633168e-06, "loss": 0.342, "step": 1266 }, { "epoch": 0.30154102457309456, "grad_norm": 0.4066558094391624, "learning_rate": 9.623536125573628e-06, "loss": 0.3259, "step": 1267 }, { "epoch": 0.30177902064615936, "grad_norm": 0.4228494081970754, "learning_rate": 9.622801987342239e-06, "loss": 0.3966, "step": 1268 }, { "epoch": 0.30201701671922415, "grad_norm": 0.3929044823680207, "learning_rate": 9.622067162048111e-06, "loss": 0.4214, "step": 1269 }, { "epoch": 0.30225501279228895, "grad_norm": 0.4364618202087389, "learning_rate": 9.62133164980046e-06, "loss": 0.3539, "step": 1270 }, { "epoch": 0.30249300886535374, "grad_norm": 0.3973240447603393, "learning_rate": 9.620595450708598e-06, "loss": 0.3228, "step": 1271 }, { "epoch": 0.30273100493841854, "grad_norm": 0.423749076343449, "learning_rate": 9.619858564881945e-06, "loss": 0.4197, "step": 1272 }, { "epoch": 0.30296900101148333, "grad_norm": 0.42928761287682166, "learning_rate": 9.61912099243002e-06, "loss": 0.3675, "step": 1273 }, { "epoch": 0.3032069970845481, "grad_norm": 0.4262017901034838, "learning_rate": 9.618382733462443e-06, "loss": 0.3333, "step": 1274 }, { "epoch": 0.3034449931576129, "grad_norm": 0.41851797958081777, "learning_rate": 9.617643788088938e-06, "loss": 0.4031, "step": 1275 }, { "epoch": 0.3036829892306777, "grad_norm": 0.44040318016484536, "learning_rate": 9.616904156419332e-06, "loss": 0.4259, "step": 1276 }, { "epoch": 0.3039209853037425, "grad_norm": 0.41176036786394593, "learning_rate": 9.616163838563551e-06, "loss": 0.323, "step": 1277 }, { "epoch": 0.3041589813768073, "grad_norm": 0.39969851565063835, "learning_rate": 9.615422834631627e-06, "loss": 0.311, "step": 1278 }, { "epoch": 0.3043969774498721, "grad_norm": 0.4226290204089705, "learning_rate": 9.614681144733688e-06, "loss": 0.4125, "step": 1279 }, { "epoch": 0.3046349735229369, "grad_norm": 0.43676015567681714, "learning_rate": 9.61393876897997e-06, "loss": 0.4041, "step": 1280 }, { "epoch": 0.3048729695960017, "grad_norm": 0.3863010410381727, "learning_rate": 9.613195707480808e-06, "loss": 0.3125, "step": 1281 }, { "epoch": 0.3051109656690665, "grad_norm": 0.41088520843304066, "learning_rate": 9.612451960346636e-06, "loss": 0.3645, "step": 1282 }, { "epoch": 0.3053489617421313, "grad_norm": 0.4163362500331567, "learning_rate": 9.611707527688e-06, "loss": 0.4114, "step": 1283 }, { "epoch": 0.30558695781519607, "grad_norm": 0.4627242806057053, "learning_rate": 9.610962409615534e-06, "loss": 0.3343, "step": 1284 }, { "epoch": 0.30582495388826086, "grad_norm": 0.4027539448469817, "learning_rate": 9.610216606239987e-06, "loss": 0.3279, "step": 1285 }, { "epoch": 0.30606294996132566, "grad_norm": 0.4145181397037847, "learning_rate": 9.609470117672199e-06, "loss": 0.4067, "step": 1286 }, { "epoch": 0.30630094603439045, "grad_norm": 0.41297813308066056, "learning_rate": 9.608722944023119e-06, "loss": 0.3966, "step": 1287 }, { "epoch": 0.30653894210745525, "grad_norm": 0.410910470454594, "learning_rate": 9.607975085403796e-06, "loss": 0.3297, "step": 1288 }, { "epoch": 0.30677693818052004, "grad_norm": 0.3854845755046187, "learning_rate": 9.607226541925379e-06, "loss": 0.3767, "step": 1289 }, { "epoch": 0.30701493425358484, "grad_norm": 0.4443276950244607, "learning_rate": 9.60647731369912e-06, "loss": 0.4304, "step": 1290 }, { "epoch": 0.30725293032664963, "grad_norm": 0.48085590127195105, "learning_rate": 9.605727400836373e-06, "loss": 0.3869, "step": 1291 }, { "epoch": 0.3074909263997144, "grad_norm": 0.421202050029993, "learning_rate": 9.604976803448596e-06, "loss": 0.3321, "step": 1292 }, { "epoch": 0.3077289224727792, "grad_norm": 0.4177390717152937, "learning_rate": 9.604225521647343e-06, "loss": 0.4087, "step": 1293 }, { "epoch": 0.307966918545844, "grad_norm": 0.38933105632433157, "learning_rate": 9.603473555544277e-06, "loss": 0.4267, "step": 1294 }, { "epoch": 0.3082049146189088, "grad_norm": 0.4249818846999146, "learning_rate": 9.602720905251153e-06, "loss": 0.3674, "step": 1295 }, { "epoch": 0.3084429106919736, "grad_norm": 0.3917934403028404, "learning_rate": 9.601967570879837e-06, "loss": 0.3504, "step": 1296 }, { "epoch": 0.3086809067650384, "grad_norm": 0.4115539403873786, "learning_rate": 9.601213552542295e-06, "loss": 0.4318, "step": 1297 }, { "epoch": 0.3089189028381032, "grad_norm": 0.40804809857958424, "learning_rate": 9.600458850350588e-06, "loss": 0.3798, "step": 1298 }, { "epoch": 0.309156898911168, "grad_norm": 0.4101236081509535, "learning_rate": 9.599703464416888e-06, "loss": 0.322, "step": 1299 }, { "epoch": 0.3093948949842328, "grad_norm": 0.40151490793067635, "learning_rate": 9.598947394853459e-06, "loss": 0.3762, "step": 1300 }, { "epoch": 0.3096328910572976, "grad_norm": 0.4266339246416687, "learning_rate": 9.598190641772678e-06, "loss": 0.4228, "step": 1301 }, { "epoch": 0.30987088713036237, "grad_norm": 0.43973887455955873, "learning_rate": 9.597433205287013e-06, "loss": 0.3686, "step": 1302 }, { "epoch": 0.31010888320342717, "grad_norm": 0.3914498413085817, "learning_rate": 9.596675085509037e-06, "loss": 0.3618, "step": 1303 }, { "epoch": 0.31034687927649196, "grad_norm": 0.3984178847846324, "learning_rate": 9.595916282551429e-06, "loss": 0.4209, "step": 1304 }, { "epoch": 0.31058487534955675, "grad_norm": 0.39984926069708504, "learning_rate": 9.595156796526963e-06, "loss": 0.4215, "step": 1305 }, { "epoch": 0.31082287142262155, "grad_norm": 0.4070793173356604, "learning_rate": 9.59439662754852e-06, "loss": 0.3554, "step": 1306 }, { "epoch": 0.31106086749568634, "grad_norm": 0.3874699043146027, "learning_rate": 9.593635775729075e-06, "loss": 0.3398, "step": 1307 }, { "epoch": 0.31129886356875114, "grad_norm": 0.39786057856428775, "learning_rate": 9.592874241181715e-06, "loss": 0.4606, "step": 1308 }, { "epoch": 0.31153685964181593, "grad_norm": 0.4280746953470422, "learning_rate": 9.59211202401962e-06, "loss": 0.3604, "step": 1309 }, { "epoch": 0.3117748557148807, "grad_norm": 0.4021864469571394, "learning_rate": 9.591349124356075e-06, "loss": 0.3332, "step": 1310 }, { "epoch": 0.3120128517879455, "grad_norm": 0.39554183184913133, "learning_rate": 9.590585542304466e-06, "loss": 0.3999, "step": 1311 }, { "epoch": 0.3122508478610103, "grad_norm": 0.4424536440928366, "learning_rate": 9.58982127797828e-06, "loss": 0.4063, "step": 1312 }, { "epoch": 0.3124888439340751, "grad_norm": 0.45440939559298216, "learning_rate": 9.589056331491103e-06, "loss": 0.3491, "step": 1313 }, { "epoch": 0.3127268400071399, "grad_norm": 0.39330929295389805, "learning_rate": 9.58829070295663e-06, "loss": 0.3901, "step": 1314 }, { "epoch": 0.3129648360802047, "grad_norm": 0.38833530931998134, "learning_rate": 9.587524392488647e-06, "loss": 0.4466, "step": 1315 }, { "epoch": 0.3132028321532695, "grad_norm": 0.44664852368249736, "learning_rate": 9.586757400201052e-06, "loss": 0.3538, "step": 1316 }, { "epoch": 0.3134408282263343, "grad_norm": 0.40740414428696925, "learning_rate": 9.585989726207837e-06, "loss": 0.3446, "step": 1317 }, { "epoch": 0.3136788242993991, "grad_norm": 0.38148457875580716, "learning_rate": 9.585221370623095e-06, "loss": 0.3682, "step": 1318 }, { "epoch": 0.3139168203724639, "grad_norm": 0.43478324832719745, "learning_rate": 9.584452333561024e-06, "loss": 0.424, "step": 1319 }, { "epoch": 0.3141548164455287, "grad_norm": 0.4000748087214559, "learning_rate": 9.583682615135923e-06, "loss": 0.3392, "step": 1320 }, { "epoch": 0.31439281251859347, "grad_norm": 0.3835145914512324, "learning_rate": 9.58291221546219e-06, "loss": 0.3482, "step": 1321 }, { "epoch": 0.31463080859165826, "grad_norm": 0.45176179860066545, "learning_rate": 9.582141134654327e-06, "loss": 0.4232, "step": 1322 }, { "epoch": 0.31486880466472306, "grad_norm": 0.4268907832728068, "learning_rate": 9.581369372826933e-06, "loss": 0.4082, "step": 1323 }, { "epoch": 0.31510680073778785, "grad_norm": 0.41052383905112105, "learning_rate": 9.580596930094716e-06, "loss": 0.3438, "step": 1324 }, { "epoch": 0.31534479681085265, "grad_norm": 0.37945978663945285, "learning_rate": 9.579823806572474e-06, "loss": 0.4025, "step": 1325 }, { "epoch": 0.31558279288391744, "grad_norm": 0.46441757305264597, "learning_rate": 9.579050002375115e-06, "loss": 0.4172, "step": 1326 }, { "epoch": 0.31582078895698223, "grad_norm": 0.4393276338549456, "learning_rate": 9.578275517617646e-06, "loss": 0.3395, "step": 1327 }, { "epoch": 0.31605878503004703, "grad_norm": 0.41000616260473466, "learning_rate": 9.577500352415174e-06, "loss": 0.3254, "step": 1328 }, { "epoch": 0.3162967811031118, "grad_norm": 0.420002315041422, "learning_rate": 9.576724506882908e-06, "loss": 0.4023, "step": 1329 }, { "epoch": 0.3165347771761766, "grad_norm": 0.4236036611226363, "learning_rate": 9.575947981136158e-06, "loss": 0.367, "step": 1330 }, { "epoch": 0.3167727732492414, "grad_norm": 0.4483840411549537, "learning_rate": 9.575170775290333e-06, "loss": 0.366, "step": 1331 }, { "epoch": 0.3170107693223062, "grad_norm": 0.4313647137240431, "learning_rate": 9.574392889460947e-06, "loss": 0.3813, "step": 1332 }, { "epoch": 0.317248765395371, "grad_norm": 0.38650979945066577, "learning_rate": 9.573614323763613e-06, "loss": 0.4281, "step": 1333 }, { "epoch": 0.3174867614684358, "grad_norm": 0.41737500297562424, "learning_rate": 9.572835078314044e-06, "loss": 0.3513, "step": 1334 }, { "epoch": 0.3177247575415006, "grad_norm": 0.4348449187581223, "learning_rate": 9.572055153228056e-06, "loss": 0.3564, "step": 1335 }, { "epoch": 0.3179627536145654, "grad_norm": 0.39019537145273137, "learning_rate": 9.571274548621566e-06, "loss": 0.3876, "step": 1336 }, { "epoch": 0.3182007496876302, "grad_norm": 0.4246028933708349, "learning_rate": 9.570493264610589e-06, "loss": 0.397, "step": 1337 }, { "epoch": 0.318438745760695, "grad_norm": 0.39930253543441263, "learning_rate": 9.569711301311247e-06, "loss": 0.3201, "step": 1338 }, { "epoch": 0.31867674183375977, "grad_norm": 0.45407334734341276, "learning_rate": 9.568928658839754e-06, "loss": 0.3434, "step": 1339 }, { "epoch": 0.31891473790682456, "grad_norm": 0.39643025758667066, "learning_rate": 9.568145337312432e-06, "loss": 0.4361, "step": 1340 }, { "epoch": 0.31915273397988936, "grad_norm": 0.421815069567987, "learning_rate": 9.567361336845704e-06, "loss": 0.3701, "step": 1341 }, { "epoch": 0.31939073005295415, "grad_norm": 0.38833337925881534, "learning_rate": 9.566576657556089e-06, "loss": 0.3495, "step": 1342 }, { "epoch": 0.31962872612601895, "grad_norm": 0.4180768293262518, "learning_rate": 9.565791299560211e-06, "loss": 0.397, "step": 1343 }, { "epoch": 0.31986672219908374, "grad_norm": 0.4164700070432731, "learning_rate": 9.565005262974795e-06, "loss": 0.4271, "step": 1344 }, { "epoch": 0.32010471827214854, "grad_norm": 0.4468347817517033, "learning_rate": 9.564218547916664e-06, "loss": 0.3495, "step": 1345 }, { "epoch": 0.32034271434521333, "grad_norm": 0.40663558186072163, "learning_rate": 9.563431154502742e-06, "loss": 0.3238, "step": 1346 }, { "epoch": 0.3205807104182781, "grad_norm": 0.3817636234384891, "learning_rate": 9.562643082850058e-06, "loss": 0.4335, "step": 1347 }, { "epoch": 0.3208187064913429, "grad_norm": 0.4623172946195981, "learning_rate": 9.561854333075737e-06, "loss": 0.3936, "step": 1348 }, { "epoch": 0.3210567025644077, "grad_norm": 0.41347372584642295, "learning_rate": 9.561064905297007e-06, "loss": 0.3192, "step": 1349 }, { "epoch": 0.3212946986374725, "grad_norm": 0.45698235146730654, "learning_rate": 9.560274799631196e-06, "loss": 0.4274, "step": 1350 }, { "epoch": 0.3215326947105373, "grad_norm": 0.4125490994148022, "learning_rate": 9.559484016195734e-06, "loss": 0.4421, "step": 1351 }, { "epoch": 0.3217706907836021, "grad_norm": 0.3839103030987149, "learning_rate": 9.558692555108153e-06, "loss": 0.3393, "step": 1352 }, { "epoch": 0.3220086868566669, "grad_norm": 0.4091260775678065, "learning_rate": 9.557900416486082e-06, "loss": 0.3491, "step": 1353 }, { "epoch": 0.3222466829297317, "grad_norm": 0.3807018578165834, "learning_rate": 9.55710760044725e-06, "loss": 0.4185, "step": 1354 }, { "epoch": 0.3224846790027965, "grad_norm": 0.40209441835271953, "learning_rate": 9.556314107109492e-06, "loss": 0.4234, "step": 1355 }, { "epoch": 0.3227226750758612, "grad_norm": 0.40937014075451966, "learning_rate": 9.555519936590739e-06, "loss": 0.3316, "step": 1356 }, { "epoch": 0.322960671148926, "grad_norm": 0.3884304266780269, "learning_rate": 9.554725089009028e-06, "loss": 0.4138, "step": 1357 }, { "epoch": 0.3231986672219908, "grad_norm": 0.44485363699581787, "learning_rate": 9.553929564482486e-06, "loss": 0.4333, "step": 1358 }, { "epoch": 0.3234366632950556, "grad_norm": 0.3994706589980082, "learning_rate": 9.553133363129354e-06, "loss": 0.3354, "step": 1359 }, { "epoch": 0.3236746593681204, "grad_norm": 0.38575800184976405, "learning_rate": 9.552336485067966e-06, "loss": 0.3318, "step": 1360 }, { "epoch": 0.3239126554411852, "grad_norm": 0.38907588895075834, "learning_rate": 9.551538930416757e-06, "loss": 0.3931, "step": 1361 }, { "epoch": 0.32415065151425, "grad_norm": 0.4005989165863969, "learning_rate": 9.550740699294263e-06, "loss": 0.4475, "step": 1362 }, { "epoch": 0.3243886475873148, "grad_norm": 0.3978604964741817, "learning_rate": 9.54994179181912e-06, "loss": 0.3326, "step": 1363 }, { "epoch": 0.3246266436603796, "grad_norm": 0.41103874966576015, "learning_rate": 9.549142208110069e-06, "loss": 0.3415, "step": 1364 }, { "epoch": 0.32486463973344437, "grad_norm": 0.407764483418257, "learning_rate": 9.548341948285945e-06, "loss": 0.4316, "step": 1365 }, { "epoch": 0.32510263580650917, "grad_norm": 0.41035785125072904, "learning_rate": 9.547541012465684e-06, "loss": 0.3737, "step": 1366 }, { "epoch": 0.32534063187957396, "grad_norm": 0.43499657258118146, "learning_rate": 9.54673940076833e-06, "loss": 0.3175, "step": 1367 }, { "epoch": 0.32557862795263875, "grad_norm": 0.4004695765130625, "learning_rate": 9.545937113313019e-06, "loss": 0.3881, "step": 1368 }, { "epoch": 0.32581662402570355, "grad_norm": 0.4020663184428851, "learning_rate": 9.545134150218993e-06, "loss": 0.4156, "step": 1369 }, { "epoch": 0.32605462009876834, "grad_norm": 0.40642741251741493, "learning_rate": 9.544330511605591e-06, "loss": 0.3247, "step": 1370 }, { "epoch": 0.32629261617183314, "grad_norm": 0.3641358027632288, "learning_rate": 9.543526197592255e-06, "loss": 0.345, "step": 1371 }, { "epoch": 0.32653061224489793, "grad_norm": 0.3989061307234226, "learning_rate": 9.542721208298522e-06, "loss": 0.4211, "step": 1372 }, { "epoch": 0.3267686083179627, "grad_norm": 0.38215478134894165, "learning_rate": 9.541915543844036e-06, "loss": 0.3929, "step": 1373 }, { "epoch": 0.3270066043910275, "grad_norm": 0.39691981690953293, "learning_rate": 9.54110920434854e-06, "loss": 0.3495, "step": 1374 }, { "epoch": 0.3272446004640923, "grad_norm": 0.38215194964724575, "learning_rate": 9.540302189931874e-06, "loss": 0.385, "step": 1375 }, { "epoch": 0.3274825965371571, "grad_norm": 0.3606406479597386, "learning_rate": 9.539494500713982e-06, "loss": 0.4526, "step": 1376 }, { "epoch": 0.3277205926102219, "grad_norm": 0.40105227105638763, "learning_rate": 9.538686136814905e-06, "loss": 0.3464, "step": 1377 }, { "epoch": 0.3279585886832867, "grad_norm": 0.41070873761480936, "learning_rate": 9.537877098354787e-06, "loss": 0.3559, "step": 1378 }, { "epoch": 0.3281965847563515, "grad_norm": 0.40931042414878005, "learning_rate": 9.53706738545387e-06, "loss": 0.3865, "step": 1379 }, { "epoch": 0.3284345808294163, "grad_norm": 0.4254957758069491, "learning_rate": 9.536256998232496e-06, "loss": 0.3928, "step": 1380 }, { "epoch": 0.3286725769024811, "grad_norm": 0.4198060992786074, "learning_rate": 9.535445936811111e-06, "loss": 0.3368, "step": 1381 }, { "epoch": 0.3289105729755459, "grad_norm": 0.39019263851197356, "learning_rate": 9.53463420131026e-06, "loss": 0.3788, "step": 1382 }, { "epoch": 0.3291485690486107, "grad_norm": 0.3881968657734908, "learning_rate": 9.533821791850585e-06, "loss": 0.4478, "step": 1383 }, { "epoch": 0.32938656512167547, "grad_norm": 0.41104330210033657, "learning_rate": 9.533008708552829e-06, "loss": 0.3468, "step": 1384 }, { "epoch": 0.32962456119474026, "grad_norm": 0.4668376033750086, "learning_rate": 9.532194951537838e-06, "loss": 0.339, "step": 1385 }, { "epoch": 0.32986255726780506, "grad_norm": 0.41125619080230763, "learning_rate": 9.531380520926559e-06, "loss": 0.3966, "step": 1386 }, { "epoch": 0.33010055334086985, "grad_norm": 0.4471468164842655, "learning_rate": 9.53056541684003e-06, "loss": 0.4227, "step": 1387 }, { "epoch": 0.33033854941393465, "grad_norm": 0.3796087714093602, "learning_rate": 9.5297496393994e-06, "loss": 0.3417, "step": 1388 }, { "epoch": 0.33057654548699944, "grad_norm": 0.4097059136074371, "learning_rate": 9.528933188725913e-06, "loss": 0.3784, "step": 1389 }, { "epoch": 0.33081454156006423, "grad_norm": 0.41994702732943573, "learning_rate": 9.528116064940915e-06, "loss": 0.4296, "step": 1390 }, { "epoch": 0.33105253763312903, "grad_norm": 0.43876123590734234, "learning_rate": 9.52729826816585e-06, "loss": 0.3459, "step": 1391 }, { "epoch": 0.3312905337061938, "grad_norm": 0.44759563573787553, "learning_rate": 9.526479798522261e-06, "loss": 0.3338, "step": 1392 }, { "epoch": 0.3315285297792586, "grad_norm": 0.39997856436860535, "learning_rate": 9.525660656131794e-06, "loss": 0.3782, "step": 1393 }, { "epoch": 0.3317665258523234, "grad_norm": 0.4496886369666572, "learning_rate": 9.524840841116194e-06, "loss": 0.4685, "step": 1394 }, { "epoch": 0.3320045219253882, "grad_norm": 0.4074594148653707, "learning_rate": 9.524020353597306e-06, "loss": 0.3464, "step": 1395 }, { "epoch": 0.332242517998453, "grad_norm": 0.4240780629108365, "learning_rate": 9.523199193697076e-06, "loss": 0.3664, "step": 1396 }, { "epoch": 0.3324805140715178, "grad_norm": 0.3724547733959635, "learning_rate": 9.522377361537546e-06, "loss": 0.4263, "step": 1397 }, { "epoch": 0.3327185101445826, "grad_norm": 0.3848768663283912, "learning_rate": 9.521554857240863e-06, "loss": 0.3997, "step": 1398 }, { "epoch": 0.3329565062176474, "grad_norm": 0.3747787306372805, "learning_rate": 9.520731680929268e-06, "loss": 0.3324, "step": 1399 }, { "epoch": 0.3331945022907122, "grad_norm": 0.40168499777226635, "learning_rate": 9.51990783272511e-06, "loss": 0.3988, "step": 1400 }, { "epoch": 0.333432498363777, "grad_norm": 0.37263893468315484, "learning_rate": 9.519083312750829e-06, "loss": 0.433, "step": 1401 }, { "epoch": 0.33367049443684177, "grad_norm": 0.39450747544417814, "learning_rate": 9.518258121128971e-06, "loss": 0.3332, "step": 1402 }, { "epoch": 0.33390849050990656, "grad_norm": 0.4159109527655681, "learning_rate": 9.517432257982182e-06, "loss": 0.3519, "step": 1403 }, { "epoch": 0.33414648658297136, "grad_norm": 0.3758964436379061, "learning_rate": 9.516605723433202e-06, "loss": 0.404, "step": 1404 }, { "epoch": 0.33438448265603615, "grad_norm": 0.4167452335375169, "learning_rate": 9.515778517604876e-06, "loss": 0.3771, "step": 1405 }, { "epoch": 0.33462247872910095, "grad_norm": 0.4231708121211018, "learning_rate": 9.514950640620148e-06, "loss": 0.3153, "step": 1406 }, { "epoch": 0.33486047480216574, "grad_norm": 0.4358774633836512, "learning_rate": 9.514122092602061e-06, "loss": 0.3527, "step": 1407 }, { "epoch": 0.33509847087523054, "grad_norm": 0.3773178127174748, "learning_rate": 9.513292873673757e-06, "loss": 0.4515, "step": 1408 }, { "epoch": 0.33533646694829533, "grad_norm": 0.39844835991429833, "learning_rate": 9.512462983958476e-06, "loss": 0.3841, "step": 1409 }, { "epoch": 0.3355744630213601, "grad_norm": 0.41246380594274273, "learning_rate": 9.511632423579564e-06, "loss": 0.312, "step": 1410 }, { "epoch": 0.3358124590944249, "grad_norm": 0.39771084162538206, "learning_rate": 9.510801192660463e-06, "loss": 0.4066, "step": 1411 }, { "epoch": 0.3360504551674897, "grad_norm": 0.40735314602043377, "learning_rate": 9.509969291324711e-06, "loss": 0.4026, "step": 1412 }, { "epoch": 0.3362884512405545, "grad_norm": 0.4306392515796982, "learning_rate": 9.509136719695952e-06, "loss": 0.3391, "step": 1413 }, { "epoch": 0.3365264473136193, "grad_norm": 0.38787745703798865, "learning_rate": 9.508303477897925e-06, "loss": 0.3623, "step": 1414 }, { "epoch": 0.3367644433866841, "grad_norm": 0.38221096427891993, "learning_rate": 9.507469566054472e-06, "loss": 0.4347, "step": 1415 }, { "epoch": 0.3370024394597489, "grad_norm": 0.39794787975787377, "learning_rate": 9.506634984289532e-06, "loss": 0.3685, "step": 1416 }, { "epoch": 0.3372404355328137, "grad_norm": 0.42203140787297544, "learning_rate": 9.505799732727144e-06, "loss": 0.3255, "step": 1417 }, { "epoch": 0.3374784316058785, "grad_norm": 0.4130150729651241, "learning_rate": 9.504963811491448e-06, "loss": 0.3852, "step": 1418 }, { "epoch": 0.3377164276789433, "grad_norm": 0.4035329355042889, "learning_rate": 9.50412722070668e-06, "loss": 0.4231, "step": 1419 }, { "epoch": 0.33795442375200807, "grad_norm": 0.42264021258045936, "learning_rate": 9.503289960497184e-06, "loss": 0.3096, "step": 1420 }, { "epoch": 0.33819241982507287, "grad_norm": 0.41033520433424203, "learning_rate": 9.502452030987392e-06, "loss": 0.3462, "step": 1421 }, { "epoch": 0.33843041589813766, "grad_norm": 0.3734652547743581, "learning_rate": 9.501613432301843e-06, "loss": 0.4026, "step": 1422 }, { "epoch": 0.33866841197120245, "grad_norm": 0.44488771827168927, "learning_rate": 9.500774164565172e-06, "loss": 0.403, "step": 1423 }, { "epoch": 0.33890640804426725, "grad_norm": 0.41540692997233103, "learning_rate": 9.499934227902119e-06, "loss": 0.3038, "step": 1424 }, { "epoch": 0.33914440411733204, "grad_norm": 0.3759433684152908, "learning_rate": 9.499093622437516e-06, "loss": 0.3459, "step": 1425 }, { "epoch": 0.33938240019039684, "grad_norm": 0.3790112278132182, "learning_rate": 9.498252348296298e-06, "loss": 0.4454, "step": 1426 }, { "epoch": 0.33962039626346163, "grad_norm": 0.4478868649304047, "learning_rate": 9.4974104056035e-06, "loss": 0.3263, "step": 1427 }, { "epoch": 0.3398583923365264, "grad_norm": 0.3687692738652499, "learning_rate": 9.496567794484254e-06, "loss": 0.3447, "step": 1428 }, { "epoch": 0.3400963884095912, "grad_norm": 0.3815394491514148, "learning_rate": 9.495724515063795e-06, "loss": 0.411, "step": 1429 }, { "epoch": 0.340334384482656, "grad_norm": 0.41607171762494266, "learning_rate": 9.494880567467454e-06, "loss": 0.4161, "step": 1430 }, { "epoch": 0.3405723805557208, "grad_norm": 0.39572328438968524, "learning_rate": 9.494035951820662e-06, "loss": 0.332, "step": 1431 }, { "epoch": 0.3408103766287856, "grad_norm": 0.37481472394853776, "learning_rate": 9.493190668248951e-06, "loss": 0.3546, "step": 1432 }, { "epoch": 0.3410483727018504, "grad_norm": 0.40603528903956776, "learning_rate": 9.49234471687795e-06, "loss": 0.4331, "step": 1433 }, { "epoch": 0.3412863687749152, "grad_norm": 0.3562649954495181, "learning_rate": 9.491498097833391e-06, "loss": 0.3442, "step": 1434 }, { "epoch": 0.34152436484798, "grad_norm": 0.4633907342034106, "learning_rate": 9.490650811241098e-06, "loss": 0.3301, "step": 1435 }, { "epoch": 0.3417623609210448, "grad_norm": 0.4196029970855259, "learning_rate": 9.489802857227001e-06, "loss": 0.4032, "step": 1436 }, { "epoch": 0.3420003569941096, "grad_norm": 0.4036864305636982, "learning_rate": 9.488954235917129e-06, "loss": 0.4371, "step": 1437 }, { "epoch": 0.3422383530671744, "grad_norm": 0.4424631609113763, "learning_rate": 9.488104947437606e-06, "loss": 0.3401, "step": 1438 }, { "epoch": 0.34247634914023917, "grad_norm": 0.3868950566216425, "learning_rate": 9.487254991914655e-06, "loss": 0.3546, "step": 1439 }, { "epoch": 0.34271434521330396, "grad_norm": 0.3939081607602161, "learning_rate": 9.486404369474605e-06, "loss": 0.4171, "step": 1440 }, { "epoch": 0.34295234128636876, "grad_norm": 0.42524765774393825, "learning_rate": 9.485553080243877e-06, "loss": 0.4112, "step": 1441 }, { "epoch": 0.34319033735943355, "grad_norm": 0.40156763127279305, "learning_rate": 9.484701124348994e-06, "loss": 0.3195, "step": 1442 }, { "epoch": 0.34342833343249834, "grad_norm": 0.3769325847215234, "learning_rate": 9.483848501916578e-06, "loss": 0.3893, "step": 1443 }, { "epoch": 0.34366632950556314, "grad_norm": 0.38885175403043437, "learning_rate": 9.482995213073349e-06, "loss": 0.4137, "step": 1444 }, { "epoch": 0.34390432557862793, "grad_norm": 0.41829331584548823, "learning_rate": 9.482141257946128e-06, "loss": 0.3256, "step": 1445 }, { "epoch": 0.34414232165169273, "grad_norm": 0.39698859193541414, "learning_rate": 9.481286636661832e-06, "loss": 0.3461, "step": 1446 }, { "epoch": 0.3443803177247575, "grad_norm": 0.40947635708434343, "learning_rate": 9.480431349347482e-06, "loss": 0.4253, "step": 1447 }, { "epoch": 0.3446183137978223, "grad_norm": 0.4008658220372033, "learning_rate": 9.479575396130192e-06, "loss": 0.3822, "step": 1448 }, { "epoch": 0.3448563098708871, "grad_norm": 0.38802614165640026, "learning_rate": 9.478718777137178e-06, "loss": 0.3252, "step": 1449 }, { "epoch": 0.3450943059439519, "grad_norm": 0.40732799270557263, "learning_rate": 9.477861492495757e-06, "loss": 0.3823, "step": 1450 }, { "epoch": 0.3453323020170167, "grad_norm": 0.41644066379485684, "learning_rate": 9.47700354233334e-06, "loss": 0.4282, "step": 1451 }, { "epoch": 0.3455702980900815, "grad_norm": 0.4224474904646012, "learning_rate": 9.476144926777441e-06, "loss": 0.3412, "step": 1452 }, { "epoch": 0.3458082941631463, "grad_norm": 0.3909474423211058, "learning_rate": 9.475285645955672e-06, "loss": 0.3453, "step": 1453 }, { "epoch": 0.3460462902362111, "grad_norm": 0.3842803363987257, "learning_rate": 9.474425699995741e-06, "loss": 0.4122, "step": 1454 }, { "epoch": 0.3462842863092759, "grad_norm": 0.3862238147346478, "learning_rate": 9.473565089025463e-06, "loss": 0.3803, "step": 1455 }, { "epoch": 0.3465222823823407, "grad_norm": 0.39766464008162167, "learning_rate": 9.472703813172739e-06, "loss": 0.3174, "step": 1456 }, { "epoch": 0.34676027845540547, "grad_norm": 0.37570231724988296, "learning_rate": 9.47184187256558e-06, "loss": 0.3791, "step": 1457 }, { "epoch": 0.34699827452847026, "grad_norm": 0.4142156067032061, "learning_rate": 9.47097926733209e-06, "loss": 0.4024, "step": 1458 }, { "epoch": 0.34723627060153506, "grad_norm": 0.398792512942407, "learning_rate": 9.470115997600474e-06, "loss": 0.3421, "step": 1459 }, { "epoch": 0.34747426667459985, "grad_norm": 0.43012148252168575, "learning_rate": 9.469252063499036e-06, "loss": 0.3314, "step": 1460 }, { "epoch": 0.34771226274766465, "grad_norm": 0.4927776656728705, "learning_rate": 9.468387465156176e-06, "loss": 0.4153, "step": 1461 }, { "epoch": 0.34795025882072944, "grad_norm": 0.4096929455276192, "learning_rate": 9.467522202700399e-06, "loss": 0.4071, "step": 1462 }, { "epoch": 0.34818825489379424, "grad_norm": 0.44407738784995165, "learning_rate": 9.4666562762603e-06, "loss": 0.3292, "step": 1463 }, { "epoch": 0.34842625096685903, "grad_norm": 0.4261438734366603, "learning_rate": 9.465789685964579e-06, "loss": 0.3609, "step": 1464 }, { "epoch": 0.3486642470399238, "grad_norm": 0.3842203365009242, "learning_rate": 9.464922431942032e-06, "loss": 0.4201, "step": 1465 }, { "epoch": 0.3489022431129886, "grad_norm": 0.378083104719912, "learning_rate": 9.464054514321554e-06, "loss": 0.3694, "step": 1466 }, { "epoch": 0.3491402391860534, "grad_norm": 0.4040049074787795, "learning_rate": 9.46318593323214e-06, "loss": 0.3564, "step": 1467 }, { "epoch": 0.3493782352591182, "grad_norm": 0.425890416302795, "learning_rate": 9.462316688802884e-06, "loss": 0.389, "step": 1468 }, { "epoch": 0.349616231332183, "grad_norm": 0.3803658953106735, "learning_rate": 9.461446781162974e-06, "loss": 0.4331, "step": 1469 }, { "epoch": 0.3498542274052478, "grad_norm": 0.4060084138224938, "learning_rate": 9.4605762104417e-06, "loss": 0.3327, "step": 1470 }, { "epoch": 0.3500922234783126, "grad_norm": 0.640815748060444, "learning_rate": 9.459704976768455e-06, "loss": 0.3434, "step": 1471 }, { "epoch": 0.3503302195513774, "grad_norm": 0.42837101898379226, "learning_rate": 9.458833080272723e-06, "loss": 0.424, "step": 1472 }, { "epoch": 0.3505682156244422, "grad_norm": 0.4030658639433663, "learning_rate": 9.457960521084087e-06, "loss": 0.3982, "step": 1473 }, { "epoch": 0.350806211697507, "grad_norm": 0.3984195226956701, "learning_rate": 9.457087299332232e-06, "loss": 0.3275, "step": 1474 }, { "epoch": 0.35104420777057177, "grad_norm": 0.39898439381782036, "learning_rate": 9.456213415146943e-06, "loss": 0.3891, "step": 1475 }, { "epoch": 0.35128220384363656, "grad_norm": 0.39964981876808653, "learning_rate": 9.4553388686581e-06, "loss": 0.4239, "step": 1476 }, { "epoch": 0.35152019991670136, "grad_norm": 0.41473687287806216, "learning_rate": 9.454463659995678e-06, "loss": 0.3502, "step": 1477 }, { "epoch": 0.35175819598976615, "grad_norm": 0.40780054376337166, "learning_rate": 9.453587789289762e-06, "loss": 0.3565, "step": 1478 }, { "epoch": 0.35199619206283095, "grad_norm": 0.4040364947021049, "learning_rate": 9.452711256670521e-06, "loss": 0.4009, "step": 1479 }, { "epoch": 0.35223418813589574, "grad_norm": 0.43117573330093506, "learning_rate": 9.451834062268234e-06, "loss": 0.3894, "step": 1480 }, { "epoch": 0.35247218420896054, "grad_norm": 0.4115001277743943, "learning_rate": 9.450956206213272e-06, "loss": 0.3176, "step": 1481 }, { "epoch": 0.35271018028202533, "grad_norm": 0.3928594010225446, "learning_rate": 9.450077688636107e-06, "loss": 0.3696, "step": 1482 }, { "epoch": 0.3529481763550901, "grad_norm": 0.3924725511243915, "learning_rate": 9.449198509667307e-06, "loss": 0.429, "step": 1483 }, { "epoch": 0.3531861724281549, "grad_norm": 0.4026567257458122, "learning_rate": 9.448318669437541e-06, "loss": 0.3303, "step": 1484 }, { "epoch": 0.3534241685012197, "grad_norm": 0.38358768538776655, "learning_rate": 9.447438168077574e-06, "loss": 0.3477, "step": 1485 }, { "epoch": 0.3536621645742845, "grad_norm": 0.38097335550299927, "learning_rate": 9.446557005718271e-06, "loss": 0.3577, "step": 1486 }, { "epoch": 0.3539001606473493, "grad_norm": 0.3625205031776034, "learning_rate": 9.445675182490594e-06, "loss": 0.4126, "step": 1487 }, { "epoch": 0.3541381567204141, "grad_norm": 0.3598134954454926, "learning_rate": 9.444792698525606e-06, "loss": 0.3341, "step": 1488 }, { "epoch": 0.3543761527934789, "grad_norm": 0.4479832164810966, "learning_rate": 9.443909553954463e-06, "loss": 0.3451, "step": 1489 }, { "epoch": 0.3546141488665437, "grad_norm": 0.38803228377964066, "learning_rate": 9.443025748908423e-06, "loss": 0.4234, "step": 1490 }, { "epoch": 0.3548521449396085, "grad_norm": 0.4066075957863286, "learning_rate": 9.442141283518842e-06, "loss": 0.3549, "step": 1491 }, { "epoch": 0.3550901410126733, "grad_norm": 0.42403239997363124, "learning_rate": 9.441256157917174e-06, "loss": 0.3344, "step": 1492 }, { "epoch": 0.35532813708573807, "grad_norm": 0.38290202011308166, "learning_rate": 9.440370372234968e-06, "loss": 0.3861, "step": 1493 }, { "epoch": 0.35556613315880287, "grad_norm": 0.3890748893970797, "learning_rate": 9.439483926603876e-06, "loss": 0.4325, "step": 1494 }, { "epoch": 0.35580412923186766, "grad_norm": 0.41264784304893215, "learning_rate": 9.438596821155644e-06, "loss": 0.3185, "step": 1495 }, { "epoch": 0.35604212530493246, "grad_norm": 0.4150247829602727, "learning_rate": 9.43770905602212e-06, "loss": 0.3659, "step": 1496 }, { "epoch": 0.35628012137799725, "grad_norm": 0.3925170437737852, "learning_rate": 9.436820631335245e-06, "loss": 0.4141, "step": 1497 }, { "epoch": 0.35651811745106204, "grad_norm": 0.4286671248087841, "learning_rate": 9.435931547227064e-06, "loss": 0.381, "step": 1498 }, { "epoch": 0.35675611352412684, "grad_norm": 0.4388357656753384, "learning_rate": 9.435041803829716e-06, "loss": 0.3278, "step": 1499 }, { "epoch": 0.35699410959719163, "grad_norm": 0.418842687865229, "learning_rate": 9.434151401275436e-06, "loss": 0.3708, "step": 1500 }, { "epoch": 0.35723210567025643, "grad_norm": 0.4811664724649453, "learning_rate": 9.433260339696564e-06, "loss": 0.4451, "step": 1501 }, { "epoch": 0.3574701017433212, "grad_norm": 0.4847264758830864, "learning_rate": 9.432368619225532e-06, "loss": 0.3434, "step": 1502 }, { "epoch": 0.357708097816386, "grad_norm": 0.4157855613581623, "learning_rate": 9.43147623999487e-06, "loss": 0.3456, "step": 1503 }, { "epoch": 0.3579460938894508, "grad_norm": 0.39518229863812704, "learning_rate": 9.43058320213721e-06, "loss": 0.3947, "step": 1504 }, { "epoch": 0.3581840899625156, "grad_norm": 0.4171833220322581, "learning_rate": 9.42968950578528e-06, "loss": 0.3934, "step": 1505 }, { "epoch": 0.3584220860355804, "grad_norm": 0.46935586691478637, "learning_rate": 9.428795151071904e-06, "loss": 0.335, "step": 1506 }, { "epoch": 0.3586600821086452, "grad_norm": 0.4076327299219336, "learning_rate": 9.427900138130005e-06, "loss": 0.3517, "step": 1507 }, { "epoch": 0.35889807818171, "grad_norm": 0.39393198965417636, "learning_rate": 9.427004467092604e-06, "loss": 0.4382, "step": 1508 }, { "epoch": 0.3591360742547748, "grad_norm": 0.4165484912582241, "learning_rate": 9.42610813809282e-06, "loss": 0.3512, "step": 1509 }, { "epoch": 0.3593740703278396, "grad_norm": 0.45933344516028873, "learning_rate": 9.425211151263871e-06, "loss": 0.3291, "step": 1510 }, { "epoch": 0.3596120664009044, "grad_norm": 0.3722603137882553, "learning_rate": 9.42431350673907e-06, "loss": 0.3777, "step": 1511 }, { "epoch": 0.35985006247396917, "grad_norm": 0.4539895853130569, "learning_rate": 9.42341520465183e-06, "loss": 0.4187, "step": 1512 }, { "epoch": 0.36008805854703396, "grad_norm": 0.4630126425642641, "learning_rate": 9.42251624513566e-06, "loss": 0.3322, "step": 1513 }, { "epoch": 0.36032605462009876, "grad_norm": 0.4571830447922558, "learning_rate": 9.421616628324168e-06, "loss": 0.376, "step": 1514 }, { "epoch": 0.36056405069316355, "grad_norm": 0.3805355362272922, "learning_rate": 9.42071635435106e-06, "loss": 0.4204, "step": 1515 }, { "epoch": 0.36080204676622835, "grad_norm": 0.43191632003281544, "learning_rate": 9.41981542335014e-06, "loss": 0.3545, "step": 1516 }, { "epoch": 0.36104004283929314, "grad_norm": 0.40851897479644855, "learning_rate": 9.418913835455306e-06, "loss": 0.3339, "step": 1517 }, { "epoch": 0.36127803891235793, "grad_norm": 0.37903195693910496, "learning_rate": 9.418011590800556e-06, "loss": 0.3913, "step": 1518 }, { "epoch": 0.36151603498542273, "grad_norm": 0.4520503442337851, "learning_rate": 9.41710868951999e-06, "loss": 0.4169, "step": 1519 }, { "epoch": 0.3617540310584875, "grad_norm": 0.43239012543775074, "learning_rate": 9.416205131747796e-06, "loss": 0.3376, "step": 1520 }, { "epoch": 0.3619920271315523, "grad_norm": 0.41380758515497446, "learning_rate": 9.415300917618269e-06, "loss": 0.3672, "step": 1521 }, { "epoch": 0.3622300232046171, "grad_norm": 0.3843142207624058, "learning_rate": 9.414396047265797e-06, "loss": 0.4091, "step": 1522 }, { "epoch": 0.3624680192776819, "grad_norm": 0.37767641451983713, "learning_rate": 9.413490520824864e-06, "loss": 0.3759, "step": 1523 }, { "epoch": 0.3627060153507467, "grad_norm": 0.3684023556846619, "learning_rate": 9.412584338430056e-06, "loss": 0.3418, "step": 1524 }, { "epoch": 0.3629440114238115, "grad_norm": 0.5087384806908768, "learning_rate": 9.411677500216053e-06, "loss": 0.3908, "step": 1525 }, { "epoch": 0.3631820074968763, "grad_norm": 0.36310042219702077, "learning_rate": 9.410770006317634e-06, "loss": 0.4402, "step": 1526 }, { "epoch": 0.3634200035699411, "grad_norm": 0.36444105379216124, "learning_rate": 9.409861856869676e-06, "loss": 0.3427, "step": 1527 }, { "epoch": 0.3636579996430059, "grad_norm": 0.48371401020099114, "learning_rate": 9.40895305200715e-06, "loss": 0.3294, "step": 1528 }, { "epoch": 0.3638959957160707, "grad_norm": 0.4123934901314048, "learning_rate": 9.408043591865129e-06, "loss": 0.3941, "step": 1529 }, { "epoch": 0.36413399178913547, "grad_norm": 0.39143950957309054, "learning_rate": 9.407133476578778e-06, "loss": 0.3836, "step": 1530 }, { "epoch": 0.36437198786220026, "grad_norm": 0.4513000975255898, "learning_rate": 9.406222706283368e-06, "loss": 0.3188, "step": 1531 }, { "epoch": 0.36460998393526506, "grad_norm": 0.39125708494077566, "learning_rate": 9.405311281114258e-06, "loss": 0.3845, "step": 1532 }, { "epoch": 0.36484798000832985, "grad_norm": 0.39160593726214166, "learning_rate": 9.404399201206908e-06, "loss": 0.4172, "step": 1533 }, { "epoch": 0.36508597608139465, "grad_norm": 0.46690194148815245, "learning_rate": 9.40348646669688e-06, "loss": 0.3384, "step": 1534 }, { "epoch": 0.36532397215445944, "grad_norm": 0.43376276227001326, "learning_rate": 9.402573077719825e-06, "loss": 0.3107, "step": 1535 }, { "epoch": 0.36556196822752424, "grad_norm": 0.3748979225551678, "learning_rate": 9.401659034411496e-06, "loss": 0.3881, "step": 1536 }, { "epoch": 0.36579996430058903, "grad_norm": 0.41357476791350894, "learning_rate": 9.400744336907743e-06, "loss": 0.4555, "step": 1537 }, { "epoch": 0.3660379603736538, "grad_norm": 0.40289808520289133, "learning_rate": 9.399828985344513e-06, "loss": 0.3289, "step": 1538 }, { "epoch": 0.3662759564467186, "grad_norm": 0.37195330338030785, "learning_rate": 9.398912979857848e-06, "loss": 0.3662, "step": 1539 }, { "epoch": 0.3665139525197834, "grad_norm": 0.379044773247951, "learning_rate": 9.39799632058389e-06, "loss": 0.4099, "step": 1540 }, { "epoch": 0.3667519485928482, "grad_norm": 0.40409817593250036, "learning_rate": 9.397079007658878e-06, "loss": 0.3912, "step": 1541 }, { "epoch": 0.366989944665913, "grad_norm": 0.38093441918353926, "learning_rate": 9.396161041219147e-06, "loss": 0.3345, "step": 1542 }, { "epoch": 0.3672279407389778, "grad_norm": 0.4006873027568648, "learning_rate": 9.39524242140113e-06, "loss": 0.386, "step": 1543 }, { "epoch": 0.3674659368120426, "grad_norm": 0.407045960911348, "learning_rate": 9.394323148341355e-06, "loss": 0.4196, "step": 1544 }, { "epoch": 0.3677039328851074, "grad_norm": 0.45641130604553176, "learning_rate": 9.393403222176451e-06, "loss": 0.3589, "step": 1545 }, { "epoch": 0.3679419289581722, "grad_norm": 0.44633345529058044, "learning_rate": 9.392482643043142e-06, "loss": 0.3553, "step": 1546 }, { "epoch": 0.368179925031237, "grad_norm": 0.3856503746950216, "learning_rate": 9.391561411078245e-06, "loss": 0.414, "step": 1547 }, { "epoch": 0.36841792110430177, "grad_norm": 0.43687712395015343, "learning_rate": 9.39063952641868e-06, "loss": 0.3678, "step": 1548 }, { "epoch": 0.36865591717736657, "grad_norm": 0.43545658546660954, "learning_rate": 9.389716989201464e-06, "loss": 0.3333, "step": 1549 }, { "epoch": 0.36889391325043136, "grad_norm": 0.41050982682577164, "learning_rate": 9.388793799563706e-06, "loss": 0.392, "step": 1550 }, { "epoch": 0.36913190932349615, "grad_norm": 0.37175911777408144, "learning_rate": 9.387869957642616e-06, "loss": 0.4177, "step": 1551 }, { "epoch": 0.36936990539656095, "grad_norm": 0.4825250816525954, "learning_rate": 9.3869454635755e-06, "loss": 0.3373, "step": 1552 }, { "epoch": 0.36960790146962574, "grad_norm": 0.46259012792132853, "learning_rate": 9.38602031749976e-06, "loss": 0.3139, "step": 1553 }, { "epoch": 0.36984589754269054, "grad_norm": 0.43835976647878444, "learning_rate": 9.385094519552896e-06, "loss": 0.368, "step": 1554 }, { "epoch": 0.37008389361575533, "grad_norm": 0.4476474289989832, "learning_rate": 9.384168069872505e-06, "loss": 0.4029, "step": 1555 }, { "epoch": 0.3703218896888201, "grad_norm": 0.4670049546010331, "learning_rate": 9.38324096859628e-06, "loss": 0.3323, "step": 1556 }, { "epoch": 0.3705598857618849, "grad_norm": 0.379594355730863, "learning_rate": 9.382313215862009e-06, "loss": 0.376, "step": 1557 }, { "epoch": 0.3707978818349497, "grad_norm": 0.44935069232834013, "learning_rate": 9.38138481180758e-06, "loss": 0.4237, "step": 1558 }, { "epoch": 0.3710358779080145, "grad_norm": 0.5090826653616003, "learning_rate": 9.38045575657098e-06, "loss": 0.3653, "step": 1559 }, { "epoch": 0.3712738739810793, "grad_norm": 0.47245779020299394, "learning_rate": 9.37952605029029e-06, "loss": 0.3275, "step": 1560 }, { "epoch": 0.3715118700541441, "grad_norm": 0.40765167623096316, "learning_rate": 9.378595693103681e-06, "loss": 0.4096, "step": 1561 }, { "epoch": 0.3717498661272089, "grad_norm": 0.4459617175575773, "learning_rate": 9.37766468514943e-06, "loss": 0.4226, "step": 1562 }, { "epoch": 0.3719878622002737, "grad_norm": 0.43777206352478504, "learning_rate": 9.376733026565911e-06, "loss": 0.3128, "step": 1563 }, { "epoch": 0.3722258582733385, "grad_norm": 0.43171693526067784, "learning_rate": 9.375800717491588e-06, "loss": 0.3511, "step": 1564 }, { "epoch": 0.3724638543464033, "grad_norm": 0.37666167095835446, "learning_rate": 9.374867758065027e-06, "loss": 0.4001, "step": 1565 }, { "epoch": 0.3727018504194681, "grad_norm": 0.45130931365820925, "learning_rate": 9.373934148424887e-06, "loss": 0.371, "step": 1566 }, { "epoch": 0.37293984649253287, "grad_norm": 0.4067510576017569, "learning_rate": 9.372999888709927e-06, "loss": 0.3598, "step": 1567 }, { "epoch": 0.37317784256559766, "grad_norm": 0.3956103473115734, "learning_rate": 9.372064979059001e-06, "loss": 0.3735, "step": 1568 }, { "epoch": 0.37341583863866246, "grad_norm": 0.38688699762267664, "learning_rate": 9.371129419611059e-06, "loss": 0.4281, "step": 1569 }, { "epoch": 0.37365383471172725, "grad_norm": 0.42553445781670524, "learning_rate": 9.37019321050515e-06, "loss": 0.3071, "step": 1570 }, { "epoch": 0.37389183078479205, "grad_norm": 0.43085996359356, "learning_rate": 9.369256351880415e-06, "loss": 0.3452, "step": 1571 }, { "epoch": 0.37412982685785684, "grad_norm": 0.40686052760995856, "learning_rate": 9.368318843876097e-06, "loss": 0.4233, "step": 1572 }, { "epoch": 0.37436782293092163, "grad_norm": 0.359734329282973, "learning_rate": 9.36738068663153e-06, "loss": 0.3651, "step": 1573 }, { "epoch": 0.37460581900398643, "grad_norm": 0.3856761545147769, "learning_rate": 9.36644188028615e-06, "loss": 0.3182, "step": 1574 }, { "epoch": 0.3748438150770512, "grad_norm": 0.3957759378316436, "learning_rate": 9.365502424979488e-06, "loss": 0.4013, "step": 1575 }, { "epoch": 0.375081811150116, "grad_norm": 0.40850014589920525, "learning_rate": 9.364562320851167e-06, "loss": 0.4193, "step": 1576 }, { "epoch": 0.3753198072231808, "grad_norm": 0.38661039932841545, "learning_rate": 9.36362156804091e-06, "loss": 0.3553, "step": 1577 }, { "epoch": 0.3755578032962456, "grad_norm": 0.4003228203902698, "learning_rate": 9.362680166688538e-06, "loss": 0.3396, "step": 1578 }, { "epoch": 0.3757957993693104, "grad_norm": 0.3831137546008049, "learning_rate": 9.361738116933967e-06, "loss": 0.3872, "step": 1579 }, { "epoch": 0.3760337954423752, "grad_norm": 0.44314506167594236, "learning_rate": 9.360795418917205e-06, "loss": 0.3819, "step": 1580 }, { "epoch": 0.37627179151544, "grad_norm": 0.46088081221952837, "learning_rate": 9.359852072778365e-06, "loss": 0.3196, "step": 1581 }, { "epoch": 0.3765097875885048, "grad_norm": 0.481649845998446, "learning_rate": 9.35890807865765e-06, "loss": 0.3606, "step": 1582 }, { "epoch": 0.3767477836615696, "grad_norm": 0.4324693237486104, "learning_rate": 9.357963436695357e-06, "loss": 0.4206, "step": 1583 }, { "epoch": 0.3769857797346344, "grad_norm": 0.47833760040095646, "learning_rate": 9.357018147031888e-06, "loss": 0.3558, "step": 1584 }, { "epoch": 0.37722377580769917, "grad_norm": 0.3956699015096483, "learning_rate": 9.356072209807737e-06, "loss": 0.3198, "step": 1585 }, { "epoch": 0.37746177188076396, "grad_norm": 0.3864788423378403, "learning_rate": 9.35512562516349e-06, "loss": 0.4045, "step": 1586 }, { "epoch": 0.37769976795382876, "grad_norm": 0.4160431742514611, "learning_rate": 9.354178393239834e-06, "loss": 0.4082, "step": 1587 }, { "epoch": 0.37793776402689355, "grad_norm": 0.42646842694183207, "learning_rate": 9.353230514177553e-06, "loss": 0.3299, "step": 1588 }, { "epoch": 0.37817576009995835, "grad_norm": 0.4132760310497568, "learning_rate": 9.352281988117521e-06, "loss": 0.4134, "step": 1589 }, { "epoch": 0.37841375617302314, "grad_norm": 0.39054592818013045, "learning_rate": 9.35133281520072e-06, "loss": 0.4301, "step": 1590 }, { "epoch": 0.37865175224608794, "grad_norm": 0.4404186715801395, "learning_rate": 9.350382995568213e-06, "loss": 0.3641, "step": 1591 }, { "epoch": 0.37888974831915273, "grad_norm": 0.38490449743758554, "learning_rate": 9.349432529361168e-06, "loss": 0.3679, "step": 1592 }, { "epoch": 0.3791277443922175, "grad_norm": 0.38256739427952213, "learning_rate": 9.348481416720852e-06, "loss": 0.4013, "step": 1593 }, { "epoch": 0.3793657404652823, "grad_norm": 0.392281809140338, "learning_rate": 9.34752965778862e-06, "loss": 0.4262, "step": 1594 }, { "epoch": 0.3796037365383471, "grad_norm": 0.3603426933987028, "learning_rate": 9.346577252705929e-06, "loss": 0.3434, "step": 1595 }, { "epoch": 0.3798417326114119, "grad_norm": 0.3862895458269031, "learning_rate": 9.345624201614328e-06, "loss": 0.3273, "step": 1596 }, { "epoch": 0.3800797286844767, "grad_norm": 0.4424213661034337, "learning_rate": 9.344670504655466e-06, "loss": 0.4163, "step": 1597 }, { "epoch": 0.3803177247575415, "grad_norm": 0.3946042222856275, "learning_rate": 9.343716161971084e-06, "loss": 0.3853, "step": 1598 }, { "epoch": 0.3805557208306063, "grad_norm": 0.48228594107422296, "learning_rate": 9.342761173703023e-06, "loss": 0.3121, "step": 1599 }, { "epoch": 0.3807937169036711, "grad_norm": 0.4055270748007401, "learning_rate": 9.341805539993216e-06, "loss": 0.3568, "step": 1600 }, { "epoch": 0.3810317129767359, "grad_norm": 0.37266993056107267, "learning_rate": 9.340849260983695e-06, "loss": 0.4455, "step": 1601 }, { "epoch": 0.3812697090498007, "grad_norm": 0.4040859091869553, "learning_rate": 9.339892336816587e-06, "loss": 0.316, "step": 1602 }, { "epoch": 0.38150770512286547, "grad_norm": 0.3984553047662032, "learning_rate": 9.338934767634114e-06, "loss": 0.3299, "step": 1603 }, { "epoch": 0.38174570119593026, "grad_norm": 0.3937006805260035, "learning_rate": 9.337976553578593e-06, "loss": 0.4225, "step": 1604 }, { "epoch": 0.38198369726899506, "grad_norm": 0.37712895414520553, "learning_rate": 9.337017694792441e-06, "loss": 0.3939, "step": 1605 }, { "epoch": 0.38222169334205985, "grad_norm": 0.4069249598197311, "learning_rate": 9.336058191418167e-06, "loss": 0.3414, "step": 1606 }, { "epoch": 0.38245968941512465, "grad_norm": 0.37857978246964663, "learning_rate": 9.335098043598376e-06, "loss": 0.3751, "step": 1607 }, { "epoch": 0.38269768548818944, "grad_norm": 0.37160587062598166, "learning_rate": 9.334137251475771e-06, "loss": 0.4226, "step": 1608 }, { "epoch": 0.38293568156125424, "grad_norm": 0.4083708370687077, "learning_rate": 9.333175815193149e-06, "loss": 0.3667, "step": 1609 }, { "epoch": 0.38317367763431903, "grad_norm": 0.39386664440771063, "learning_rate": 9.332213734893406e-06, "loss": 0.3182, "step": 1610 }, { "epoch": 0.3834116737073838, "grad_norm": 0.4343970310481392, "learning_rate": 9.331251010719525e-06, "loss": 0.4, "step": 1611 }, { "epoch": 0.3836496697804486, "grad_norm": 0.43461958864883965, "learning_rate": 9.330287642814593e-06, "loss": 0.4267, "step": 1612 }, { "epoch": 0.3838876658535134, "grad_norm": 0.41875602341308255, "learning_rate": 9.329323631321793e-06, "loss": 0.3314, "step": 1613 }, { "epoch": 0.3841256619265782, "grad_norm": 0.3891910114438827, "learning_rate": 9.328358976384398e-06, "loss": 0.3693, "step": 1614 }, { "epoch": 0.384363657999643, "grad_norm": 0.3988395541503049, "learning_rate": 9.327393678145781e-06, "loss": 0.4159, "step": 1615 }, { "epoch": 0.3846016540727078, "grad_norm": 0.4273306553986297, "learning_rate": 9.32642773674941e-06, "loss": 0.3702, "step": 1616 }, { "epoch": 0.3848396501457726, "grad_norm": 0.4297415267388038, "learning_rate": 9.325461152338846e-06, "loss": 0.3125, "step": 1617 }, { "epoch": 0.3850776462188374, "grad_norm": 0.3713890087288812, "learning_rate": 9.324493925057747e-06, "loss": 0.3745, "step": 1618 }, { "epoch": 0.3853156422919022, "grad_norm": 0.397582778645666, "learning_rate": 9.32352605504987e-06, "loss": 0.4028, "step": 1619 }, { "epoch": 0.385553638364967, "grad_norm": 0.3879589456647183, "learning_rate": 9.322557542459061e-06, "loss": 0.3442, "step": 1620 }, { "epoch": 0.38579163443803177, "grad_norm": 0.3734347689407556, "learning_rate": 9.321588387429266e-06, "loss": 0.3492, "step": 1621 }, { "epoch": 0.38602963051109657, "grad_norm": 0.38372821085957537, "learning_rate": 9.320618590104525e-06, "loss": 0.4057, "step": 1622 }, { "epoch": 0.38626762658416136, "grad_norm": 0.39004512408218667, "learning_rate": 9.319648150628978e-06, "loss": 0.3837, "step": 1623 }, { "epoch": 0.38650562265722616, "grad_norm": 0.43356887512118736, "learning_rate": 9.318677069146848e-06, "loss": 0.3141, "step": 1624 }, { "epoch": 0.38674361873029095, "grad_norm": 0.4399012491269116, "learning_rate": 9.31770534580247e-06, "loss": 0.4022, "step": 1625 }, { "epoch": 0.38698161480335574, "grad_norm": 0.40492521303478335, "learning_rate": 9.316732980740262e-06, "loss": 0.419, "step": 1626 }, { "epoch": 0.38721961087642054, "grad_norm": 0.4061119104072092, "learning_rate": 9.315759974104741e-06, "loss": 0.3166, "step": 1627 }, { "epoch": 0.38745760694948533, "grad_norm": 0.3909427341832737, "learning_rate": 9.314786326040523e-06, "loss": 0.3332, "step": 1628 }, { "epoch": 0.38769560302255013, "grad_norm": 0.42034544526119255, "learning_rate": 9.313812036692314e-06, "loss": 0.3961, "step": 1629 }, { "epoch": 0.3879335990956149, "grad_norm": 0.43657367145331466, "learning_rate": 9.312837106204916e-06, "loss": 0.373, "step": 1630 }, { "epoch": 0.3881715951686797, "grad_norm": 0.37018141911486274, "learning_rate": 9.31186153472323e-06, "loss": 0.3114, "step": 1631 }, { "epoch": 0.3884095912417445, "grad_norm": 0.38503056353803755, "learning_rate": 9.31088532239225e-06, "loss": 0.386, "step": 1632 }, { "epoch": 0.3886475873148093, "grad_norm": 0.39233949025594983, "learning_rate": 9.309908469357067e-06, "loss": 0.416, "step": 1633 }, { "epoch": 0.3888855833878741, "grad_norm": 0.4433267496831673, "learning_rate": 9.308930975762862e-06, "loss": 0.3621, "step": 1634 }, { "epoch": 0.3891235794609389, "grad_norm": 0.3952777289255298, "learning_rate": 9.307952841754916e-06, "loss": 0.3255, "step": 1635 }, { "epoch": 0.3893615755340037, "grad_norm": 0.4022306208679625, "learning_rate": 9.306974067478602e-06, "loss": 0.3977, "step": 1636 }, { "epoch": 0.3895995716070685, "grad_norm": 0.41753431253514406, "learning_rate": 9.305994653079396e-06, "loss": 0.4056, "step": 1637 }, { "epoch": 0.3898375676801333, "grad_norm": 0.40246752404914526, "learning_rate": 9.305014598702857e-06, "loss": 0.3407, "step": 1638 }, { "epoch": 0.3900755637531981, "grad_norm": 0.400365444870483, "learning_rate": 9.304033904494649e-06, "loss": 0.3742, "step": 1639 }, { "epoch": 0.39031355982626287, "grad_norm": 0.4157295616816406, "learning_rate": 9.303052570600524e-06, "loss": 0.4111, "step": 1640 }, { "epoch": 0.39055155589932766, "grad_norm": 0.4525080007717071, "learning_rate": 9.302070597166337e-06, "loss": 0.3378, "step": 1641 }, { "epoch": 0.39078955197239246, "grad_norm": 0.3854211007538121, "learning_rate": 9.301087984338029e-06, "loss": 0.3288, "step": 1642 }, { "epoch": 0.39102754804545725, "grad_norm": 0.4832368919472511, "learning_rate": 9.300104732261645e-06, "loss": 0.3952, "step": 1643 }, { "epoch": 0.39126554411852205, "grad_norm": 0.42640640785474865, "learning_rate": 9.299120841083317e-06, "loss": 0.4237, "step": 1644 }, { "epoch": 0.39150354019158684, "grad_norm": 0.41078488904477306, "learning_rate": 9.298136310949278e-06, "loss": 0.3299, "step": 1645 }, { "epoch": 0.39174153626465164, "grad_norm": 0.3496318552591008, "learning_rate": 9.297151142005852e-06, "loss": 0.3274, "step": 1646 }, { "epoch": 0.39197953233771643, "grad_norm": 0.45341271456511995, "learning_rate": 9.296165334399458e-06, "loss": 0.4078, "step": 1647 }, { "epoch": 0.3922175284107812, "grad_norm": 0.4210029643414087, "learning_rate": 9.295178888276615e-06, "loss": 0.3651, "step": 1648 }, { "epoch": 0.392455524483846, "grad_norm": 0.38599905665403594, "learning_rate": 9.294191803783931e-06, "loss": 0.3477, "step": 1649 }, { "epoch": 0.3926935205569108, "grad_norm": 0.4189352579336025, "learning_rate": 9.293204081068113e-06, "loss": 0.3932, "step": 1650 }, { "epoch": 0.3929315166299756, "grad_norm": 0.43257183707928304, "learning_rate": 9.292215720275959e-06, "loss": 0.422, "step": 1651 }, { "epoch": 0.3931695127030404, "grad_norm": 0.4838474825444332, "learning_rate": 9.291226721554364e-06, "loss": 0.3335, "step": 1652 }, { "epoch": 0.3934075087761052, "grad_norm": 0.36325017737845905, "learning_rate": 9.290237085050318e-06, "loss": 0.3438, "step": 1653 }, { "epoch": 0.39364550484917, "grad_norm": 0.412707690424985, "learning_rate": 9.289246810910909e-06, "loss": 0.4129, "step": 1654 }, { "epoch": 0.3938835009222348, "grad_norm": 0.41393730782449073, "learning_rate": 9.288255899283309e-06, "loss": 0.4021, "step": 1655 }, { "epoch": 0.3941214969952996, "grad_norm": 0.4386727788302991, "learning_rate": 9.287264350314797e-06, "loss": 0.3621, "step": 1656 }, { "epoch": 0.3943594930683644, "grad_norm": 0.3681925197909611, "learning_rate": 9.286272164152744e-06, "loss": 0.3769, "step": 1657 }, { "epoch": 0.39459748914142917, "grad_norm": 0.4635910656535038, "learning_rate": 9.285279340944607e-06, "loss": 0.4101, "step": 1658 }, { "epoch": 0.39483548521449396, "grad_norm": 0.4598290354163402, "learning_rate": 9.284285880837947e-06, "loss": 0.3532, "step": 1659 }, { "epoch": 0.39507348128755876, "grad_norm": 0.3933380949916356, "learning_rate": 9.283291783980418e-06, "loss": 0.3303, "step": 1660 }, { "epoch": 0.39531147736062355, "grad_norm": 0.39884801926811836, "learning_rate": 9.282297050519767e-06, "loss": 0.3897, "step": 1661 }, { "epoch": 0.39554947343368835, "grad_norm": 0.5083918488666362, "learning_rate": 9.281301680603834e-06, "loss": 0.4365, "step": 1662 }, { "epoch": 0.39578746950675314, "grad_norm": 0.5222377151000596, "learning_rate": 9.280305674380558e-06, "loss": 0.3347, "step": 1663 }, { "epoch": 0.39602546557981794, "grad_norm": 0.40550870332567346, "learning_rate": 9.279309031997968e-06, "loss": 0.3609, "step": 1664 }, { "epoch": 0.39626346165288273, "grad_norm": 0.4047710761598781, "learning_rate": 9.278311753604192e-06, "loss": 0.437, "step": 1665 }, { "epoch": 0.3965014577259475, "grad_norm": 0.5151918734857975, "learning_rate": 9.277313839347449e-06, "loss": 0.3752, "step": 1666 }, { "epoch": 0.3967394537990123, "grad_norm": 0.4697431600744339, "learning_rate": 9.276315289376052e-06, "loss": 0.3242, "step": 1667 }, { "epoch": 0.3969774498720771, "grad_norm": 0.43709669423640757, "learning_rate": 9.275316103838414e-06, "loss": 0.3797, "step": 1668 }, { "epoch": 0.3972154459451419, "grad_norm": 0.4192015191302519, "learning_rate": 9.274316282883037e-06, "loss": 0.4335, "step": 1669 }, { "epoch": 0.3974534420182067, "grad_norm": 0.4117716004967471, "learning_rate": 9.273315826658518e-06, "loss": 0.3522, "step": 1670 }, { "epoch": 0.3976914380912715, "grad_norm": 0.42633382432239425, "learning_rate": 9.27231473531355e-06, "loss": 0.3689, "step": 1671 }, { "epoch": 0.3979294341643363, "grad_norm": 0.38633621983522715, "learning_rate": 9.271313008996922e-06, "loss": 0.4201, "step": 1672 }, { "epoch": 0.3981674302374011, "grad_norm": 0.42827859761102866, "learning_rate": 9.270310647857513e-06, "loss": 0.3973, "step": 1673 }, { "epoch": 0.3984054263104659, "grad_norm": 0.5003093170938062, "learning_rate": 9.269307652044298e-06, "loss": 0.326, "step": 1674 }, { "epoch": 0.3986434223835307, "grad_norm": 0.43808182708918314, "learning_rate": 9.26830402170635e-06, "loss": 0.3888, "step": 1675 }, { "epoch": 0.39888141845659547, "grad_norm": 0.41053386389897284, "learning_rate": 9.267299756992829e-06, "loss": 0.4315, "step": 1676 }, { "epoch": 0.39911941452966027, "grad_norm": 0.4230397692191716, "learning_rate": 9.266294858052998e-06, "loss": 0.3346, "step": 1677 }, { "epoch": 0.39935741060272506, "grad_norm": 0.431898235869182, "learning_rate": 9.265289325036209e-06, "loss": 0.3241, "step": 1678 }, { "epoch": 0.39959540667578985, "grad_norm": 0.39729706567685696, "learning_rate": 9.264283158091909e-06, "loss": 0.4009, "step": 1679 }, { "epoch": 0.39983340274885465, "grad_norm": 0.4004504040408847, "learning_rate": 9.263276357369635e-06, "loss": 0.3942, "step": 1680 }, { "epoch": 0.40007139882191944, "grad_norm": 0.4300597724152983, "learning_rate": 9.262268923019028e-06, "loss": 0.3161, "step": 1681 }, { "epoch": 0.40030939489498424, "grad_norm": 0.5008687287891455, "learning_rate": 9.261260855189815e-06, "loss": 0.3539, "step": 1682 }, { "epoch": 0.40054739096804903, "grad_norm": 0.36119133039165174, "learning_rate": 9.26025215403182e-06, "loss": 0.4348, "step": 1683 }, { "epoch": 0.4007853870411138, "grad_norm": 0.42846844104954446, "learning_rate": 9.259242819694963e-06, "loss": 0.3417, "step": 1684 }, { "epoch": 0.4010233831141786, "grad_norm": 0.4165944881715832, "learning_rate": 9.258232852329253e-06, "loss": 0.317, "step": 1685 }, { "epoch": 0.4012613791872434, "grad_norm": 0.3890173772649005, "learning_rate": 9.257222252084798e-06, "loss": 0.3775, "step": 1686 }, { "epoch": 0.4014993752603082, "grad_norm": 0.38942060350209845, "learning_rate": 9.256211019111799e-06, "loss": 0.4156, "step": 1687 }, { "epoch": 0.401737371333373, "grad_norm": 0.4134669751208099, "learning_rate": 9.255199153560546e-06, "loss": 0.3203, "step": 1688 }, { "epoch": 0.4019753674064378, "grad_norm": 0.40855285972159666, "learning_rate": 9.254186655581431e-06, "loss": 0.3485, "step": 1689 }, { "epoch": 0.4022133634795026, "grad_norm": 0.4066844623707453, "learning_rate": 9.253173525324937e-06, "loss": 0.448, "step": 1690 }, { "epoch": 0.4024513595525674, "grad_norm": 0.40579610697102486, "learning_rate": 9.252159762941638e-06, "loss": 0.3493, "step": 1691 }, { "epoch": 0.4026893556256322, "grad_norm": 0.4612752035521552, "learning_rate": 9.251145368582204e-06, "loss": 0.314, "step": 1692 }, { "epoch": 0.402927351698697, "grad_norm": 0.38917703961071853, "learning_rate": 9.2501303423974e-06, "loss": 0.3809, "step": 1693 }, { "epoch": 0.4031653477717618, "grad_norm": 0.42688170513775625, "learning_rate": 9.249114684538087e-06, "loss": 0.439, "step": 1694 }, { "epoch": 0.40340334384482657, "grad_norm": 0.4270682354405054, "learning_rate": 9.248098395155212e-06, "loss": 0.3197, "step": 1695 }, { "epoch": 0.40364133991789136, "grad_norm": 0.38427191838232466, "learning_rate": 9.247081474399821e-06, "loss": 0.3653, "step": 1696 }, { "epoch": 0.40387933599095616, "grad_norm": 0.3635373879235468, "learning_rate": 9.246063922423057e-06, "loss": 0.3965, "step": 1697 }, { "epoch": 0.40411733206402095, "grad_norm": 0.36800213725949676, "learning_rate": 9.24504573937615e-06, "loss": 0.3629, "step": 1698 }, { "epoch": 0.40435532813708575, "grad_norm": 0.46951813533096415, "learning_rate": 9.24402692541043e-06, "loss": 0.3301, "step": 1699 }, { "epoch": 0.40459332421015054, "grad_norm": 0.40731159678572715, "learning_rate": 9.243007480677317e-06, "loss": 0.3892, "step": 1700 }, { "epoch": 0.40483132028321533, "grad_norm": 0.39470797179107836, "learning_rate": 9.241987405328325e-06, "loss": 0.4199, "step": 1701 }, { "epoch": 0.40506931635628013, "grad_norm": 0.379892047471591, "learning_rate": 9.240966699515062e-06, "loss": 0.339, "step": 1702 }, { "epoch": 0.4053073124293449, "grad_norm": 0.4299870200864259, "learning_rate": 9.239945363389233e-06, "loss": 0.3226, "step": 1703 }, { "epoch": 0.4055453085024097, "grad_norm": 0.38016909458014425, "learning_rate": 9.238923397102629e-06, "loss": 0.4155, "step": 1704 }, { "epoch": 0.4057833045754745, "grad_norm": 0.41162849886265007, "learning_rate": 9.237900800807144e-06, "loss": 0.3961, "step": 1705 }, { "epoch": 0.4060213006485393, "grad_norm": 0.41727989058008336, "learning_rate": 9.23687757465476e-06, "loss": 0.3234, "step": 1706 }, { "epoch": 0.4062592967216041, "grad_norm": 0.4257234199843476, "learning_rate": 9.235853718797552e-06, "loss": 0.3652, "step": 1707 }, { "epoch": 0.4064972927946689, "grad_norm": 0.38093238010454294, "learning_rate": 9.234829233387692e-06, "loss": 0.4513, "step": 1708 }, { "epoch": 0.4067352888677337, "grad_norm": 0.42134937392720456, "learning_rate": 9.233804118577442e-06, "loss": 0.3595, "step": 1709 }, { "epoch": 0.4069732849407985, "grad_norm": 0.36011012728718184, "learning_rate": 9.232778374519162e-06, "loss": 0.3139, "step": 1710 }, { "epoch": 0.4072112810138633, "grad_norm": 0.3848429350956219, "learning_rate": 9.231752001365301e-06, "loss": 0.3874, "step": 1711 }, { "epoch": 0.4074492770869281, "grad_norm": 0.42989698919103714, "learning_rate": 9.230724999268405e-06, "loss": 0.4137, "step": 1712 }, { "epoch": 0.40768727315999287, "grad_norm": 0.40356043597179625, "learning_rate": 9.22969736838111e-06, "loss": 0.3497, "step": 1713 }, { "epoch": 0.40792526923305766, "grad_norm": 0.38088846927432707, "learning_rate": 9.22866910885615e-06, "loss": 0.3511, "step": 1714 }, { "epoch": 0.40816326530612246, "grad_norm": 0.36991122947115884, "learning_rate": 9.227640220846345e-06, "loss": 0.4034, "step": 1715 }, { "epoch": 0.40840126137918725, "grad_norm": 0.4121306552946537, "learning_rate": 9.226610704504619e-06, "loss": 0.3843, "step": 1716 }, { "epoch": 0.40863925745225205, "grad_norm": 0.39375147468162974, "learning_rate": 9.225580559983982e-06, "loss": 0.3329, "step": 1717 }, { "epoch": 0.40887725352531684, "grad_norm": 0.39350805991360777, "learning_rate": 9.224549787437536e-06, "loss": 0.398, "step": 1718 }, { "epoch": 0.40911524959838164, "grad_norm": 0.40232745688418725, "learning_rate": 9.223518387018481e-06, "loss": 0.4178, "step": 1719 }, { "epoch": 0.40935324567144643, "grad_norm": 0.39632952276294475, "learning_rate": 9.22248635888011e-06, "loss": 0.3531, "step": 1720 }, { "epoch": 0.4095912417445112, "grad_norm": 0.38179777362704265, "learning_rate": 9.221453703175805e-06, "loss": 0.3252, "step": 1721 }, { "epoch": 0.409829237817576, "grad_norm": 0.3717944610724434, "learning_rate": 9.220420420059048e-06, "loss": 0.4133, "step": 1722 }, { "epoch": 0.4100672338906408, "grad_norm": 0.3996883821281656, "learning_rate": 9.219386509683408e-06, "loss": 0.38, "step": 1723 }, { "epoch": 0.4103052299637056, "grad_norm": 0.47689973154409665, "learning_rate": 9.21835197220255e-06, "loss": 0.3705, "step": 1724 }, { "epoch": 0.4105432260367704, "grad_norm": 0.40061030822431853, "learning_rate": 9.217316807770232e-06, "loss": 0.3798, "step": 1725 }, { "epoch": 0.4107812221098352, "grad_norm": 0.3891661943835348, "learning_rate": 9.216281016540305e-06, "loss": 0.413, "step": 1726 }, { "epoch": 0.4110192181829, "grad_norm": 0.40874575169057586, "learning_rate": 9.215244598666712e-06, "loss": 0.3299, "step": 1727 }, { "epoch": 0.4112572142559648, "grad_norm": 0.4192275317369746, "learning_rate": 9.214207554303492e-06, "loss": 0.3232, "step": 1728 }, { "epoch": 0.4114952103290296, "grad_norm": 0.39895282478246985, "learning_rate": 9.213169883604776e-06, "loss": 0.4003, "step": 1729 }, { "epoch": 0.4117332064020944, "grad_norm": 0.4354043645900782, "learning_rate": 9.212131586724787e-06, "loss": 0.3975, "step": 1730 }, { "epoch": 0.41197120247515917, "grad_norm": 0.4120078987709819, "learning_rate": 9.211092663817839e-06, "loss": 0.3097, "step": 1731 }, { "epoch": 0.41220919854822397, "grad_norm": 0.389265114227728, "learning_rate": 9.210053115038345e-06, "loss": 0.3601, "step": 1732 }, { "epoch": 0.41244719462128876, "grad_norm": 0.5583703836161701, "learning_rate": 9.209012940540806e-06, "loss": 0.4214, "step": 1733 }, { "epoch": 0.41268519069435355, "grad_norm": 0.3795249796311343, "learning_rate": 9.207972140479817e-06, "loss": 0.3467, "step": 1734 }, { "epoch": 0.41292318676741835, "grad_norm": 0.37887544114873867, "learning_rate": 9.206930715010069e-06, "loss": 0.3176, "step": 1735 }, { "epoch": 0.41316118284048314, "grad_norm": 0.3908429675593901, "learning_rate": 9.205888664286343e-06, "loss": 0.3866, "step": 1736 }, { "epoch": 0.41339917891354794, "grad_norm": 0.4053593309349944, "learning_rate": 9.20484598846351e-06, "loss": 0.4207, "step": 1737 }, { "epoch": 0.41363717498661273, "grad_norm": 0.40431227924333274, "learning_rate": 9.203802687696543e-06, "loss": 0.2908, "step": 1738 }, { "epoch": 0.4138751710596775, "grad_norm": 0.41350971082279164, "learning_rate": 9.2027587621405e-06, "loss": 0.3968, "step": 1739 }, { "epoch": 0.4141131671327423, "grad_norm": 0.37481649448861415, "learning_rate": 9.201714211950532e-06, "loss": 0.4003, "step": 1740 }, { "epoch": 0.4143511632058071, "grad_norm": 0.3800874685713264, "learning_rate": 9.200669037281888e-06, "loss": 0.3346, "step": 1741 }, { "epoch": 0.4145891592788719, "grad_norm": 0.4080865424809329, "learning_rate": 9.199623238289903e-06, "loss": 0.3263, "step": 1742 }, { "epoch": 0.4148271553519367, "grad_norm": 0.38620720186828605, "learning_rate": 9.198576815130013e-06, "loss": 0.3869, "step": 1743 }, { "epoch": 0.4150651514250015, "grad_norm": 0.42012676664637677, "learning_rate": 9.197529767957742e-06, "loss": 0.4334, "step": 1744 }, { "epoch": 0.4153031474980663, "grad_norm": 0.44141968095141493, "learning_rate": 9.196482096928702e-06, "loss": 0.3451, "step": 1745 }, { "epoch": 0.4155411435711311, "grad_norm": 0.3868284133888196, "learning_rate": 9.19543380219861e-06, "loss": 0.3473, "step": 1746 }, { "epoch": 0.4157791396441959, "grad_norm": 0.3881735751224557, "learning_rate": 9.194384883923262e-06, "loss": 0.4139, "step": 1747 }, { "epoch": 0.4160171357172607, "grad_norm": 0.4022882687609258, "learning_rate": 9.193335342258558e-06, "loss": 0.3668, "step": 1748 }, { "epoch": 0.41625513179032547, "grad_norm": 0.36310020776516483, "learning_rate": 9.192285177360482e-06, "loss": 0.3155, "step": 1749 }, { "epoch": 0.41649312786339027, "grad_norm": 0.3999886426578984, "learning_rate": 9.191234389385119e-06, "loss": 0.3557, "step": 1750 }, { "epoch": 0.41673112393645506, "grad_norm": 0.3616937980400372, "learning_rate": 9.19018297848864e-06, "loss": 0.4089, "step": 1751 }, { "epoch": 0.41696912000951986, "grad_norm": 0.3834384131011674, "learning_rate": 9.189130944827308e-06, "loss": 0.3247, "step": 1752 }, { "epoch": 0.41720711608258465, "grad_norm": 0.38320709923324137, "learning_rate": 9.188078288557485e-06, "loss": 0.3301, "step": 1753 }, { "epoch": 0.41744511215564944, "grad_norm": 0.38222131872575443, "learning_rate": 9.18702500983562e-06, "loss": 0.4085, "step": 1754 }, { "epoch": 0.41768310822871424, "grad_norm": 0.3824160404741556, "learning_rate": 9.185971108818254e-06, "loss": 0.4017, "step": 1755 }, { "epoch": 0.41792110430177903, "grad_norm": 0.36169585842835983, "learning_rate": 9.184916585662029e-06, "loss": 0.3161, "step": 1756 }, { "epoch": 0.41815910037484383, "grad_norm": 0.372151927298045, "learning_rate": 9.183861440523667e-06, "loss": 0.386, "step": 1757 }, { "epoch": 0.4183970964479086, "grad_norm": 0.39912606436044784, "learning_rate": 9.182805673559993e-06, "loss": 0.4162, "step": 1758 }, { "epoch": 0.4186350925209734, "grad_norm": 0.4088578633797585, "learning_rate": 9.181749284927917e-06, "loss": 0.3041, "step": 1759 }, { "epoch": 0.4188730885940382, "grad_norm": 0.3872367130929281, "learning_rate": 9.180692274784445e-06, "loss": 0.3263, "step": 1760 }, { "epoch": 0.419111084667103, "grad_norm": 0.3909524702284084, "learning_rate": 9.179634643286677e-06, "loss": 0.4072, "step": 1761 }, { "epoch": 0.4193490807401678, "grad_norm": 0.39296104928130404, "learning_rate": 9.178576390591803e-06, "loss": 0.4156, "step": 1762 }, { "epoch": 0.4195870768132326, "grad_norm": 0.39592073252457005, "learning_rate": 9.177517516857102e-06, "loss": 0.3354, "step": 1763 }, { "epoch": 0.4198250728862974, "grad_norm": 0.4058493679855248, "learning_rate": 9.176458022239954e-06, "loss": 0.3626, "step": 1764 }, { "epoch": 0.4200630689593622, "grad_norm": 0.3971663194653234, "learning_rate": 9.175397906897821e-06, "loss": 0.4147, "step": 1765 }, { "epoch": 0.420301065032427, "grad_norm": 0.39799197552647586, "learning_rate": 9.174337170988265e-06, "loss": 0.3773, "step": 1766 }, { "epoch": 0.4205390611054918, "grad_norm": 0.3674682210725563, "learning_rate": 9.173275814668937e-06, "loss": 0.3324, "step": 1767 }, { "epoch": 0.42077705717855657, "grad_norm": 0.4437292514829072, "learning_rate": 9.17221383809758e-06, "loss": 0.3865, "step": 1768 }, { "epoch": 0.42101505325162136, "grad_norm": 0.3802556994977754, "learning_rate": 9.171151241432034e-06, "loss": 0.4228, "step": 1769 }, { "epoch": 0.42125304932468616, "grad_norm": 0.4701590991382848, "learning_rate": 9.170088024830223e-06, "loss": 0.3363, "step": 1770 }, { "epoch": 0.42149104539775095, "grad_norm": 0.40536949953318796, "learning_rate": 9.169024188450169e-06, "loss": 0.3709, "step": 1771 }, { "epoch": 0.42172904147081575, "grad_norm": 0.36959719177247113, "learning_rate": 9.167959732449983e-06, "loss": 0.407, "step": 1772 }, { "epoch": 0.42196703754388054, "grad_norm": 0.4019427295156592, "learning_rate": 9.16689465698787e-06, "loss": 0.387, "step": 1773 }, { "epoch": 0.42220503361694534, "grad_norm": 0.3795424586658338, "learning_rate": 9.165828962222128e-06, "loss": 0.3037, "step": 1774 }, { "epoch": 0.42244302969001013, "grad_norm": 0.41841783370483765, "learning_rate": 9.164762648311142e-06, "loss": 0.3775, "step": 1775 }, { "epoch": 0.4226810257630749, "grad_norm": 0.3622382328620447, "learning_rate": 9.163695715413399e-06, "loss": 0.3942, "step": 1776 }, { "epoch": 0.4229190218361397, "grad_norm": 0.36992410907411066, "learning_rate": 9.162628163687466e-06, "loss": 0.3483, "step": 1777 }, { "epoch": 0.4231570179092045, "grad_norm": 0.3847751360001051, "learning_rate": 9.16155999329201e-06, "loss": 0.3104, "step": 1778 }, { "epoch": 0.4233950139822693, "grad_norm": 0.395246830815519, "learning_rate": 9.160491204385786e-06, "loss": 0.3947, "step": 1779 }, { "epoch": 0.4236330100553341, "grad_norm": 0.38146783859148997, "learning_rate": 9.159421797127643e-06, "loss": 0.4296, "step": 1780 }, { "epoch": 0.4238710061283989, "grad_norm": 0.4112992635868304, "learning_rate": 9.158351771676523e-06, "loss": 0.3382, "step": 1781 }, { "epoch": 0.4241090022014637, "grad_norm": 0.36930776652809816, "learning_rate": 9.157281128191458e-06, "loss": 0.3593, "step": 1782 }, { "epoch": 0.4243469982745285, "grad_norm": 0.3908100344030319, "learning_rate": 9.156209866831568e-06, "loss": 0.3928, "step": 1783 }, { "epoch": 0.4245849943475933, "grad_norm": 0.36549619200757544, "learning_rate": 9.155137987756075e-06, "loss": 0.3465, "step": 1784 }, { "epoch": 0.4248229904206581, "grad_norm": 0.3849426680408924, "learning_rate": 9.154065491124284e-06, "loss": 0.2999, "step": 1785 }, { "epoch": 0.42506098649372287, "grad_norm": 0.3782937649081525, "learning_rate": 9.152992377095594e-06, "loss": 0.3892, "step": 1786 }, { "epoch": 0.42529898256678766, "grad_norm": 0.4181419981500838, "learning_rate": 9.151918645829495e-06, "loss": 0.4155, "step": 1787 }, { "epoch": 0.42553697863985246, "grad_norm": 0.40443588328082936, "learning_rate": 9.150844297485573e-06, "loss": 0.3487, "step": 1788 }, { "epoch": 0.42577497471291725, "grad_norm": 0.4037548245270624, "learning_rate": 9.149769332223502e-06, "loss": 0.3502, "step": 1789 }, { "epoch": 0.42601297078598205, "grad_norm": 0.37518033203980755, "learning_rate": 9.148693750203046e-06, "loss": 0.4035, "step": 1790 }, { "epoch": 0.42625096685904684, "grad_norm": 0.4112116522406348, "learning_rate": 9.147617551584066e-06, "loss": 0.3702, "step": 1791 }, { "epoch": 0.42648896293211164, "grad_norm": 0.403896574510639, "learning_rate": 9.14654073652651e-06, "loss": 0.3408, "step": 1792 }, { "epoch": 0.42672695900517643, "grad_norm": 0.3757284298168529, "learning_rate": 9.145463305190422e-06, "loss": 0.3561, "step": 1793 }, { "epoch": 0.4269649550782412, "grad_norm": 0.40025611305787007, "learning_rate": 9.144385257735931e-06, "loss": 0.407, "step": 1794 }, { "epoch": 0.427202951151306, "grad_norm": 0.3825822712921448, "learning_rate": 9.143306594323265e-06, "loss": 0.3361, "step": 1795 }, { "epoch": 0.4274409472243708, "grad_norm": 0.3979473641423701, "learning_rate": 9.142227315112739e-06, "loss": 0.3523, "step": 1796 }, { "epoch": 0.4276789432974356, "grad_norm": 0.37047270991043285, "learning_rate": 9.141147420264758e-06, "loss": 0.3966, "step": 1797 }, { "epoch": 0.4279169393705004, "grad_norm": 0.45550159986727345, "learning_rate": 9.140066909939824e-06, "loss": 0.3841, "step": 1798 }, { "epoch": 0.4281549354435652, "grad_norm": 0.4044470476748523, "learning_rate": 9.138985784298528e-06, "loss": 0.3111, "step": 1799 }, { "epoch": 0.42839293151663, "grad_norm": 0.41668001069625243, "learning_rate": 9.13790404350155e-06, "loss": 0.3681, "step": 1800 }, { "epoch": 0.4286309275896948, "grad_norm": 0.36553533604315613, "learning_rate": 9.136821687709664e-06, "loss": 0.4334, "step": 1801 }, { "epoch": 0.4288689236627596, "grad_norm": 0.37044586892206033, "learning_rate": 9.135738717083738e-06, "loss": 0.3309, "step": 1802 }, { "epoch": 0.4291069197358244, "grad_norm": 0.40087470340481024, "learning_rate": 9.134655131784723e-06, "loss": 0.3188, "step": 1803 }, { "epoch": 0.42934491580888917, "grad_norm": 0.35972133195166445, "learning_rate": 9.133570931973668e-06, "loss": 0.4269, "step": 1804 }, { "epoch": 0.42958291188195397, "grad_norm": 0.4047830499287121, "learning_rate": 9.132486117811715e-06, "loss": 0.3887, "step": 1805 }, { "epoch": 0.42982090795501876, "grad_norm": 0.372292118382629, "learning_rate": 9.131400689460091e-06, "loss": 0.3166, "step": 1806 }, { "epoch": 0.43005890402808356, "grad_norm": 0.3801968581448093, "learning_rate": 9.130314647080118e-06, "loss": 0.3544, "step": 1807 }, { "epoch": 0.43029690010114835, "grad_norm": 0.41390771036953183, "learning_rate": 9.129227990833212e-06, "loss": 0.4201, "step": 1808 }, { "epoch": 0.43053489617421314, "grad_norm": 0.37114026857947074, "learning_rate": 9.128140720880872e-06, "loss": 0.3168, "step": 1809 }, { "epoch": 0.43077289224727794, "grad_norm": 0.37943566827382635, "learning_rate": 9.127052837384696e-06, "loss": 0.3383, "step": 1810 }, { "epoch": 0.43101088832034273, "grad_norm": 0.37039949864005134, "learning_rate": 9.125964340506371e-06, "loss": 0.3921, "step": 1811 }, { "epoch": 0.43124888439340753, "grad_norm": 0.4009046077092876, "learning_rate": 9.12487523040767e-06, "loss": 0.4055, "step": 1812 }, { "epoch": 0.4314868804664723, "grad_norm": 0.415891186146942, "learning_rate": 9.12378550725047e-06, "loss": 0.3205, "step": 1813 }, { "epoch": 0.4317248765395371, "grad_norm": 0.39458867165479744, "learning_rate": 9.122695171196724e-06, "loss": 0.3616, "step": 1814 }, { "epoch": 0.4319628726126019, "grad_norm": 0.36931772090998816, "learning_rate": 9.121604222408484e-06, "loss": 0.4181, "step": 1815 }, { "epoch": 0.4322008686856667, "grad_norm": 0.4095231673961699, "learning_rate": 9.120512661047895e-06, "loss": 0.3514, "step": 1816 }, { "epoch": 0.4324388647587315, "grad_norm": 0.4938169837089172, "learning_rate": 9.119420487277186e-06, "loss": 0.3169, "step": 1817 }, { "epoch": 0.4326768608317963, "grad_norm": 0.41916255433374716, "learning_rate": 9.118327701258685e-06, "loss": 0.3944, "step": 1818 }, { "epoch": 0.4329148569048611, "grad_norm": 0.40908642674760587, "learning_rate": 9.117234303154806e-06, "loss": 0.4355, "step": 1819 }, { "epoch": 0.4331528529779259, "grad_norm": 0.3857608329887888, "learning_rate": 9.11614029312805e-06, "loss": 0.3259, "step": 1820 }, { "epoch": 0.4333908490509907, "grad_norm": 0.4067680142939106, "learning_rate": 9.11504567134102e-06, "loss": 0.3557, "step": 1821 }, { "epoch": 0.4336288451240555, "grad_norm": 0.37531284613422866, "learning_rate": 9.113950437956403e-06, "loss": 0.4032, "step": 1822 }, { "epoch": 0.43386684119712027, "grad_norm": 0.39254495182058463, "learning_rate": 9.112854593136976e-06, "loss": 0.3923, "step": 1823 }, { "epoch": 0.43410483727018506, "grad_norm": 0.4058157753281052, "learning_rate": 9.111758137045609e-06, "loss": 0.3266, "step": 1824 }, { "epoch": 0.43434283334324986, "grad_norm": 0.36694701438635535, "learning_rate": 9.110661069845263e-06, "loss": 0.3684, "step": 1825 }, { "epoch": 0.43458082941631465, "grad_norm": 0.36461266098587286, "learning_rate": 9.10956339169899e-06, "loss": 0.4108, "step": 1826 }, { "epoch": 0.43481882548937945, "grad_norm": 0.39631841763000103, "learning_rate": 9.10846510276993e-06, "loss": 0.337, "step": 1827 }, { "epoch": 0.43505682156244424, "grad_norm": 0.40343286423290825, "learning_rate": 9.107366203221318e-06, "loss": 0.344, "step": 1828 }, { "epoch": 0.43529481763550903, "grad_norm": 0.36284461096995285, "learning_rate": 9.106266693216477e-06, "loss": 0.3818, "step": 1829 }, { "epoch": 0.43553281370857383, "grad_norm": 0.42037283100640777, "learning_rate": 9.10516657291882e-06, "loss": 0.4134, "step": 1830 }, { "epoch": 0.4357708097816386, "grad_norm": 0.4135556119364222, "learning_rate": 9.104065842491854e-06, "loss": 0.3355, "step": 1831 }, { "epoch": 0.4360088058547034, "grad_norm": 0.5512601557394179, "learning_rate": 9.102964502099175e-06, "loss": 0.3644, "step": 1832 }, { "epoch": 0.4362468019277682, "grad_norm": 0.3726460220199828, "learning_rate": 9.101862551904467e-06, "loss": 0.4127, "step": 1833 }, { "epoch": 0.436484798000833, "grad_norm": 0.3889721147844331, "learning_rate": 9.100759992071509e-06, "loss": 0.3612, "step": 1834 }, { "epoch": 0.4367227940738978, "grad_norm": 0.3827603318719862, "learning_rate": 9.099656822764169e-06, "loss": 0.2957, "step": 1835 }, { "epoch": 0.4369607901469626, "grad_norm": 0.3637594579871201, "learning_rate": 9.098553044146404e-06, "loss": 0.3518, "step": 1836 }, { "epoch": 0.4371987862200274, "grad_norm": 0.41257520091473093, "learning_rate": 9.097448656382263e-06, "loss": 0.4387, "step": 1837 }, { "epoch": 0.4374367822930922, "grad_norm": 0.43864336163698847, "learning_rate": 9.096343659635887e-06, "loss": 0.3276, "step": 1838 }, { "epoch": 0.437674778366157, "grad_norm": 0.3841715752329595, "learning_rate": 9.095238054071505e-06, "loss": 0.3416, "step": 1839 }, { "epoch": 0.4379127744392218, "grad_norm": 0.370655417628842, "learning_rate": 9.094131839853435e-06, "loss": 0.3964, "step": 1840 }, { "epoch": 0.43815077051228657, "grad_norm": 0.3513470462073168, "learning_rate": 9.093025017146089e-06, "loss": 0.3362, "step": 1841 }, { "epoch": 0.43838876658535136, "grad_norm": 0.3802770131312354, "learning_rate": 9.09191758611397e-06, "loss": 0.3301, "step": 1842 }, { "epoch": 0.43862676265841616, "grad_norm": 0.4226138218606067, "learning_rate": 9.09080954692167e-06, "loss": 0.3848, "step": 1843 }, { "epoch": 0.43886475873148095, "grad_norm": 0.4535187719262108, "learning_rate": 9.089700899733867e-06, "loss": 0.4167, "step": 1844 }, { "epoch": 0.43910275480454575, "grad_norm": 0.3702844760500393, "learning_rate": 9.088591644715338e-06, "loss": 0.3273, "step": 1845 }, { "epoch": 0.43934075087761054, "grad_norm": 0.4038109651897949, "learning_rate": 9.087481782030943e-06, "loss": 0.3614, "step": 1846 }, { "epoch": 0.43957874695067534, "grad_norm": 0.40604586580793806, "learning_rate": 9.086371311845636e-06, "loss": 0.4053, "step": 1847 }, { "epoch": 0.43981674302374013, "grad_norm": 0.421721246391553, "learning_rate": 9.08526023432446e-06, "loss": 0.3641, "step": 1848 }, { "epoch": 0.4400547390968049, "grad_norm": 0.38810442228840714, "learning_rate": 9.084148549632547e-06, "loss": 0.3291, "step": 1849 }, { "epoch": 0.4402927351698697, "grad_norm": 0.4074211523419894, "learning_rate": 9.083036257935125e-06, "loss": 0.3818, "step": 1850 }, { "epoch": 0.4405307312429345, "grad_norm": 0.3979947415081816, "learning_rate": 9.081923359397504e-06, "loss": 0.4428, "step": 1851 }, { "epoch": 0.4407687273159993, "grad_norm": 0.4180778911918767, "learning_rate": 9.080809854185091e-06, "loss": 0.3094, "step": 1852 }, { "epoch": 0.4410067233890641, "grad_norm": 0.44372997553941645, "learning_rate": 9.07969574246338e-06, "loss": 0.3535, "step": 1853 }, { "epoch": 0.4412447194621289, "grad_norm": 0.41225588433239624, "learning_rate": 9.078581024397952e-06, "loss": 0.4067, "step": 1854 }, { "epoch": 0.4414827155351937, "grad_norm": 0.4001859550814012, "learning_rate": 9.077465700154487e-06, "loss": 0.433, "step": 1855 }, { "epoch": 0.4417207116082585, "grad_norm": 0.4167341678602161, "learning_rate": 9.076349769898746e-06, "loss": 0.3567, "step": 1856 }, { "epoch": 0.4419587076813233, "grad_norm": 0.40392402299201724, "learning_rate": 9.075233233796585e-06, "loss": 0.3763, "step": 1857 }, { "epoch": 0.4421967037543881, "grad_norm": 0.362779786516785, "learning_rate": 9.074116092013952e-06, "loss": 0.3903, "step": 1858 }, { "epoch": 0.44243469982745287, "grad_norm": 0.3781623110186848, "learning_rate": 9.072998344716875e-06, "loss": 0.3343, "step": 1859 }, { "epoch": 0.44267269590051767, "grad_norm": 0.38836103267593736, "learning_rate": 9.071879992071484e-06, "loss": 0.3055, "step": 1860 }, { "epoch": 0.44291069197358246, "grad_norm": 0.37564508597971813, "learning_rate": 9.070761034243995e-06, "loss": 0.4162, "step": 1861 }, { "epoch": 0.44314868804664725, "grad_norm": 0.381826072613129, "learning_rate": 9.069641471400707e-06, "loss": 0.3939, "step": 1862 }, { "epoch": 0.44338668411971205, "grad_norm": 0.41067999322559867, "learning_rate": 9.06852130370802e-06, "loss": 0.314, "step": 1863 }, { "epoch": 0.44362468019277684, "grad_norm": 0.4382314242050576, "learning_rate": 9.067400531332418e-06, "loss": 0.3397, "step": 1864 }, { "epoch": 0.44386267626584164, "grad_norm": 0.3671784649407471, "learning_rate": 9.066279154440474e-06, "loss": 0.4134, "step": 1865 }, { "epoch": 0.44410067233890643, "grad_norm": 0.38775612781990293, "learning_rate": 9.065157173198852e-06, "loss": 0.3365, "step": 1866 }, { "epoch": 0.4443386684119712, "grad_norm": 0.399921745750612, "learning_rate": 9.064034587774307e-06, "loss": 0.3262, "step": 1867 }, { "epoch": 0.444576664485036, "grad_norm": 0.3883120453391914, "learning_rate": 9.062911398333682e-06, "loss": 0.3783, "step": 1868 }, { "epoch": 0.4448146605581008, "grad_norm": 0.42591617134823834, "learning_rate": 9.061787605043913e-06, "loss": 0.4329, "step": 1869 }, { "epoch": 0.4450526566311656, "grad_norm": 0.4145202625576308, "learning_rate": 9.060663208072022e-06, "loss": 0.319, "step": 1870 }, { "epoch": 0.4452906527042304, "grad_norm": 0.4256230711123469, "learning_rate": 9.059538207585123e-06, "loss": 0.3498, "step": 1871 }, { "epoch": 0.4455286487772952, "grad_norm": 0.36200201893045636, "learning_rate": 9.058412603750417e-06, "loss": 0.4051, "step": 1872 }, { "epoch": 0.44576664485036, "grad_norm": 0.4088077633298198, "learning_rate": 9.057286396735198e-06, "loss": 0.352, "step": 1873 }, { "epoch": 0.4460046409234248, "grad_norm": 0.411767781342609, "learning_rate": 9.056159586706847e-06, "loss": 0.3159, "step": 1874 }, { "epoch": 0.4462426369964896, "grad_norm": 0.38770515767105623, "learning_rate": 9.055032173832838e-06, "loss": 0.3435, "step": 1875 }, { "epoch": 0.4464806330695544, "grad_norm": 0.4063020777640675, "learning_rate": 9.053904158280731e-06, "loss": 0.448, "step": 1876 }, { "epoch": 0.4467186291426192, "grad_norm": 0.4108973315393291, "learning_rate": 9.052775540218178e-06, "loss": 0.3257, "step": 1877 }, { "epoch": 0.44695662521568397, "grad_norm": 0.3962899590466375, "learning_rate": 9.05164631981292e-06, "loss": 0.3428, "step": 1878 }, { "epoch": 0.44719462128874876, "grad_norm": 0.39154457859950453, "learning_rate": 9.050516497232783e-06, "loss": 0.4113, "step": 1879 }, { "epoch": 0.44743261736181356, "grad_norm": 0.40521710300411384, "learning_rate": 9.049386072645691e-06, "loss": 0.3697, "step": 1880 }, { "epoch": 0.44767061343487835, "grad_norm": 0.41448812938566526, "learning_rate": 9.048255046219652e-06, "loss": 0.3128, "step": 1881 }, { "epoch": 0.44790860950794315, "grad_norm": 0.3732301223230006, "learning_rate": 9.047123418122762e-06, "loss": 0.3486, "step": 1882 }, { "epoch": 0.44814660558100794, "grad_norm": 0.37645904480775877, "learning_rate": 9.045991188523213e-06, "loss": 0.4007, "step": 1883 }, { "epoch": 0.44838460165407273, "grad_norm": 0.5589491366235092, "learning_rate": 9.044858357589281e-06, "loss": 0.3507, "step": 1884 }, { "epoch": 0.44862259772713753, "grad_norm": 0.41677787585285414, "learning_rate": 9.043724925489332e-06, "loss": 0.3181, "step": 1885 }, { "epoch": 0.4488605938002023, "grad_norm": 0.41414556643068295, "learning_rate": 9.04259089239182e-06, "loss": 0.4107, "step": 1886 }, { "epoch": 0.4490985898732671, "grad_norm": 0.43327968208641654, "learning_rate": 9.041456258465295e-06, "loss": 0.4257, "step": 1887 }, { "epoch": 0.4493365859463319, "grad_norm": 0.4245883014008134, "learning_rate": 9.040321023878387e-06, "loss": 0.3388, "step": 1888 }, { "epoch": 0.4495745820193967, "grad_norm": 0.4421642723801354, "learning_rate": 9.039185188799824e-06, "loss": 0.3743, "step": 1889 }, { "epoch": 0.4498125780924615, "grad_norm": 0.41567634181571017, "learning_rate": 9.038048753398417e-06, "loss": 0.4091, "step": 1890 }, { "epoch": 0.4500505741655263, "grad_norm": 0.4133338214658035, "learning_rate": 9.036911717843067e-06, "loss": 0.3367, "step": 1891 }, { "epoch": 0.4502885702385911, "grad_norm": 0.41679245676442966, "learning_rate": 9.035774082302769e-06, "loss": 0.3222, "step": 1892 }, { "epoch": 0.4505265663116559, "grad_norm": 0.38537142529458723, "learning_rate": 9.034635846946603e-06, "loss": 0.3554, "step": 1893 }, { "epoch": 0.4507645623847207, "grad_norm": 0.4135663142912014, "learning_rate": 9.033497011943735e-06, "loss": 0.4212, "step": 1894 }, { "epoch": 0.4510025584577855, "grad_norm": 0.39185654824504884, "learning_rate": 9.032357577463429e-06, "loss": 0.3207, "step": 1895 }, { "epoch": 0.45124055453085027, "grad_norm": 0.39385062485453165, "learning_rate": 9.031217543675032e-06, "loss": 0.33, "step": 1896 }, { "epoch": 0.45147855060391506, "grad_norm": 0.41350024044695805, "learning_rate": 9.03007691074798e-06, "loss": 0.395, "step": 1897 }, { "epoch": 0.4517165466769798, "grad_norm": 0.4048615581588889, "learning_rate": 9.028935678851798e-06, "loss": 0.3942, "step": 1898 }, { "epoch": 0.4519545427500446, "grad_norm": 0.43192810625467914, "learning_rate": 9.027793848156106e-06, "loss": 0.2972, "step": 1899 }, { "epoch": 0.4521925388231094, "grad_norm": 0.3638638789244606, "learning_rate": 9.026651418830603e-06, "loss": 0.3737, "step": 1900 }, { "epoch": 0.4524305348961742, "grad_norm": 0.4178434063849281, "learning_rate": 9.025508391045087e-06, "loss": 0.4463, "step": 1901 }, { "epoch": 0.452668530969239, "grad_norm": 0.43266874725956067, "learning_rate": 9.024364764969435e-06, "loss": 0.354, "step": 1902 }, { "epoch": 0.4529065270423038, "grad_norm": 0.4386170116690222, "learning_rate": 9.023220540773621e-06, "loss": 0.3436, "step": 1903 }, { "epoch": 0.45314452311536857, "grad_norm": 0.4154738684824685, "learning_rate": 9.022075718627707e-06, "loss": 0.3934, "step": 1904 }, { "epoch": 0.45338251918843336, "grad_norm": 0.4336014698946849, "learning_rate": 9.02093029870184e-06, "loss": 0.376, "step": 1905 }, { "epoch": 0.45362051526149816, "grad_norm": 0.4496211440899709, "learning_rate": 9.019784281166255e-06, "loss": 0.3017, "step": 1906 }, { "epoch": 0.45385851133456295, "grad_norm": 0.438757831396024, "learning_rate": 9.018637666191284e-06, "loss": 0.3907, "step": 1907 }, { "epoch": 0.45409650740762775, "grad_norm": 0.38217794195897165, "learning_rate": 9.017490453947337e-06, "loss": 0.4368, "step": 1908 }, { "epoch": 0.45433450348069254, "grad_norm": 0.4045541745421685, "learning_rate": 9.016342644604923e-06, "loss": 0.3241, "step": 1909 }, { "epoch": 0.45457249955375734, "grad_norm": 0.4749105178045131, "learning_rate": 9.01519423833463e-06, "loss": 0.3236, "step": 1910 }, { "epoch": 0.45481049562682213, "grad_norm": 0.39441695061972637, "learning_rate": 9.014045235307144e-06, "loss": 0.3832, "step": 1911 }, { "epoch": 0.4550484916998869, "grad_norm": 0.4006478226656663, "learning_rate": 9.012895635693232e-06, "loss": 0.3885, "step": 1912 }, { "epoch": 0.4552864877729517, "grad_norm": 0.42235636070301646, "learning_rate": 9.011745439663756e-06, "loss": 0.3053, "step": 1913 }, { "epoch": 0.4555244838460165, "grad_norm": 0.4330833366027642, "learning_rate": 9.010594647389662e-06, "loss": 0.3426, "step": 1914 }, { "epoch": 0.4557624799190813, "grad_norm": 0.3935945827408916, "learning_rate": 9.009443259041984e-06, "loss": 0.438, "step": 1915 }, { "epoch": 0.4560004759921461, "grad_norm": 0.4573862811142753, "learning_rate": 9.008291274791849e-06, "loss": 0.3511, "step": 1916 }, { "epoch": 0.4562384720652109, "grad_norm": 0.4191822242792251, "learning_rate": 9.00713869481047e-06, "loss": 0.3424, "step": 1917 }, { "epoch": 0.4564764681382757, "grad_norm": 0.3714154576618009, "learning_rate": 9.005985519269151e-06, "loss": 0.3758, "step": 1918 }, { "epoch": 0.4567144642113405, "grad_norm": 0.3968866803688529, "learning_rate": 9.00483174833928e-06, "loss": 0.4399, "step": 1919 }, { "epoch": 0.4569524602844053, "grad_norm": 0.4381863859987972, "learning_rate": 9.003677382192337e-06, "loss": 0.3317, "step": 1920 }, { "epoch": 0.4571904563574701, "grad_norm": 0.3668542092189725, "learning_rate": 9.002522420999887e-06, "loss": 0.3302, "step": 1921 }, { "epoch": 0.45742845243053487, "grad_norm": 0.40073715252923153, "learning_rate": 9.00136686493359e-06, "loss": 0.3946, "step": 1922 }, { "epoch": 0.45766644850359967, "grad_norm": 0.422377185704182, "learning_rate": 9.000210714165185e-06, "loss": 0.3721, "step": 1923 }, { "epoch": 0.45790444457666446, "grad_norm": 0.39892724787175154, "learning_rate": 8.999053968866509e-06, "loss": 0.3285, "step": 1924 }, { "epoch": 0.45814244064972925, "grad_norm": 0.39420537454241245, "learning_rate": 8.997896629209482e-06, "loss": 0.3954, "step": 1925 }, { "epoch": 0.45838043672279405, "grad_norm": 0.3875725349434963, "learning_rate": 8.996738695366111e-06, "loss": 0.4209, "step": 1926 }, { "epoch": 0.45861843279585884, "grad_norm": 0.4320566083850361, "learning_rate": 8.995580167508495e-06, "loss": 0.3111, "step": 1927 }, { "epoch": 0.45885642886892364, "grad_norm": 0.39786821137659234, "learning_rate": 8.994421045808821e-06, "loss": 0.3344, "step": 1928 }, { "epoch": 0.45909442494198843, "grad_norm": 0.3864397258868153, "learning_rate": 8.993261330439365e-06, "loss": 0.4114, "step": 1929 }, { "epoch": 0.4593324210150532, "grad_norm": 0.44366727708416975, "learning_rate": 8.992101021572483e-06, "loss": 0.3847, "step": 1930 }, { "epoch": 0.459570417088118, "grad_norm": 0.42879150308085556, "learning_rate": 8.990940119380632e-06, "loss": 0.3251, "step": 1931 }, { "epoch": 0.4598084131611828, "grad_norm": 0.4013669319373018, "learning_rate": 8.989778624036346e-06, "loss": 0.3751, "step": 1932 }, { "epoch": 0.4600464092342476, "grad_norm": 0.38329016484024186, "learning_rate": 8.988616535712255e-06, "loss": 0.4322, "step": 1933 }, { "epoch": 0.4602844053073124, "grad_norm": 0.4593810052561611, "learning_rate": 8.987453854581074e-06, "loss": 0.351, "step": 1934 }, { "epoch": 0.4605224013803772, "grad_norm": 0.4290257106099543, "learning_rate": 8.986290580815605e-06, "loss": 0.2975, "step": 1935 }, { "epoch": 0.460760397453442, "grad_norm": 0.38235899911292076, "learning_rate": 8.985126714588739e-06, "loss": 0.4136, "step": 1936 }, { "epoch": 0.4609983935265068, "grad_norm": 0.38940541074163637, "learning_rate": 8.983962256073457e-06, "loss": 0.4249, "step": 1937 }, { "epoch": 0.4612363895995716, "grad_norm": 0.3599022107051358, "learning_rate": 8.982797205442823e-06, "loss": 0.3004, "step": 1938 }, { "epoch": 0.4614743856726364, "grad_norm": 0.3859999702393283, "learning_rate": 8.981631562869997e-06, "loss": 0.3493, "step": 1939 }, { "epoch": 0.4617123817457012, "grad_norm": 0.38132124586744975, "learning_rate": 8.98046532852822e-06, "loss": 0.4082, "step": 1940 }, { "epoch": 0.46195037781876597, "grad_norm": 0.40954534803570225, "learning_rate": 8.979298502590821e-06, "loss": 0.3792, "step": 1941 }, { "epoch": 0.46218837389183076, "grad_norm": 0.3763058419790421, "learning_rate": 8.978131085231223e-06, "loss": 0.314, "step": 1942 }, { "epoch": 0.46242636996489556, "grad_norm": 0.3801916014322026, "learning_rate": 8.976963076622932e-06, "loss": 0.3608, "step": 1943 }, { "epoch": 0.46266436603796035, "grad_norm": 0.3754880817697566, "learning_rate": 8.975794476939541e-06, "loss": 0.4133, "step": 1944 }, { "epoch": 0.46290236211102515, "grad_norm": 0.4458270346361261, "learning_rate": 8.974625286354735e-06, "loss": 0.3359, "step": 1945 }, { "epoch": 0.46314035818408994, "grad_norm": 0.40596951932507613, "learning_rate": 8.973455505042285e-06, "loss": 0.36, "step": 1946 }, { "epoch": 0.46337835425715473, "grad_norm": 0.3777591671479931, "learning_rate": 8.972285133176047e-06, "loss": 0.4053, "step": 1947 }, { "epoch": 0.46361635033021953, "grad_norm": 0.381883104188769, "learning_rate": 8.97111417092997e-06, "loss": 0.3788, "step": 1948 }, { "epoch": 0.4638543464032843, "grad_norm": 0.41602474134267337, "learning_rate": 8.969942618478085e-06, "loss": 0.3184, "step": 1949 }, { "epoch": 0.4640923424763491, "grad_norm": 0.3838946466607663, "learning_rate": 8.968770475994514e-06, "loss": 0.3573, "step": 1950 }, { "epoch": 0.4643303385494139, "grad_norm": 0.3860312821497556, "learning_rate": 8.967597743653471e-06, "loss": 0.4234, "step": 1951 }, { "epoch": 0.4645683346224787, "grad_norm": 0.36977923888724, "learning_rate": 8.966424421629247e-06, "loss": 0.3242, "step": 1952 }, { "epoch": 0.4648063306955435, "grad_norm": 0.3937693643527784, "learning_rate": 8.965250510096231e-06, "loss": 0.3117, "step": 1953 }, { "epoch": 0.4650443267686083, "grad_norm": 0.38742510667851454, "learning_rate": 8.964076009228892e-06, "loss": 0.3899, "step": 1954 }, { "epoch": 0.4652823228416731, "grad_norm": 0.381510778099069, "learning_rate": 8.962900919201793e-06, "loss": 0.363, "step": 1955 }, { "epoch": 0.4655203189147379, "grad_norm": 0.4266911809851305, "learning_rate": 8.96172524018958e-06, "loss": 0.3424, "step": 1956 }, { "epoch": 0.4657583149878027, "grad_norm": 0.39273456552864333, "learning_rate": 8.960548972366987e-06, "loss": 0.3623, "step": 1957 }, { "epoch": 0.4659963110608675, "grad_norm": 0.39550307752848174, "learning_rate": 8.959372115908838e-06, "loss": 0.4073, "step": 1958 }, { "epoch": 0.46623430713393227, "grad_norm": 0.3939424995482473, "learning_rate": 8.958194670990043e-06, "loss": 0.3466, "step": 1959 }, { "epoch": 0.46647230320699706, "grad_norm": 0.4163291327720836, "learning_rate": 8.957016637785599e-06, "loss": 0.3167, "step": 1960 }, { "epoch": 0.46671029928006186, "grad_norm": 0.3744209884685097, "learning_rate": 8.95583801647059e-06, "loss": 0.3617, "step": 1961 }, { "epoch": 0.46694829535312665, "grad_norm": 0.36438599515039355, "learning_rate": 8.954658807220189e-06, "loss": 0.3979, "step": 1962 }, { "epoch": 0.46718629142619145, "grad_norm": 0.36878304664624034, "learning_rate": 8.953479010209655e-06, "loss": 0.3402, "step": 1963 }, { "epoch": 0.46742428749925624, "grad_norm": 0.38784861609124194, "learning_rate": 8.952298625614335e-06, "loss": 0.3465, "step": 1964 }, { "epoch": 0.46766228357232104, "grad_norm": 0.36193075171069583, "learning_rate": 8.951117653609666e-06, "loss": 0.4162, "step": 1965 }, { "epoch": 0.46790027964538583, "grad_norm": 0.4273533447324452, "learning_rate": 8.949936094371168e-06, "loss": 0.357, "step": 1966 }, { "epoch": 0.4681382757184506, "grad_norm": 0.3748626683477407, "learning_rate": 8.948753948074448e-06, "loss": 0.3286, "step": 1967 }, { "epoch": 0.4683762717915154, "grad_norm": 0.3615946348473227, "learning_rate": 8.947571214895206e-06, "loss": 0.374, "step": 1968 }, { "epoch": 0.4686142678645802, "grad_norm": 0.40199712164294776, "learning_rate": 8.946387895009221e-06, "loss": 0.4383, "step": 1969 }, { "epoch": 0.468852263937645, "grad_norm": 0.40189899947360347, "learning_rate": 8.945203988592368e-06, "loss": 0.3137, "step": 1970 }, { "epoch": 0.4690902600107098, "grad_norm": 0.42304082101866364, "learning_rate": 8.944019495820602e-06, "loss": 0.3607, "step": 1971 }, { "epoch": 0.4693282560837746, "grad_norm": 0.3915717145865347, "learning_rate": 8.942834416869967e-06, "loss": 0.4186, "step": 1972 }, { "epoch": 0.4695662521568394, "grad_norm": 0.3699303309540853, "learning_rate": 8.941648751916598e-06, "loss": 0.3479, "step": 1973 }, { "epoch": 0.4698042482299042, "grad_norm": 0.3931300817695143, "learning_rate": 8.940462501136712e-06, "loss": 0.3416, "step": 1974 }, { "epoch": 0.470042244302969, "grad_norm": 0.4174108723423135, "learning_rate": 8.939275664706618e-06, "loss": 0.3809, "step": 1975 }, { "epoch": 0.4702802403760338, "grad_norm": 0.4017321399357895, "learning_rate": 8.938088242802705e-06, "loss": 0.3999, "step": 1976 }, { "epoch": 0.47051823644909857, "grad_norm": 0.3796367988353256, "learning_rate": 8.936900235601456e-06, "loss": 0.3103, "step": 1977 }, { "epoch": 0.47075623252216336, "grad_norm": 0.4341261574035814, "learning_rate": 8.93571164327944e-06, "loss": 0.3357, "step": 1978 }, { "epoch": 0.47099422859522816, "grad_norm": 0.4070316455404249, "learning_rate": 8.934522466013305e-06, "loss": 0.3898, "step": 1979 }, { "epoch": 0.47123222466829295, "grad_norm": 0.3974826629955441, "learning_rate": 8.933332703979798e-06, "loss": 0.3888, "step": 1980 }, { "epoch": 0.47147022074135775, "grad_norm": 0.4184039008610896, "learning_rate": 8.932142357355747e-06, "loss": 0.3078, "step": 1981 }, { "epoch": 0.47170821681442254, "grad_norm": 0.41469276127770316, "learning_rate": 8.930951426318061e-06, "loss": 0.346, "step": 1982 }, { "epoch": 0.47194621288748734, "grad_norm": 0.3679140205317907, "learning_rate": 8.929759911043749e-06, "loss": 0.4249, "step": 1983 }, { "epoch": 0.47218420896055213, "grad_norm": 0.38233800356637293, "learning_rate": 8.928567811709897e-06, "loss": 0.3664, "step": 1984 }, { "epoch": 0.4724222050336169, "grad_norm": 0.3860957949442366, "learning_rate": 8.927375128493679e-06, "loss": 0.3278, "step": 1985 }, { "epoch": 0.4726602011066817, "grad_norm": 0.42016968648143477, "learning_rate": 8.92618186157236e-06, "loss": 0.4028, "step": 1986 }, { "epoch": 0.4728981971797465, "grad_norm": 0.3932672809367911, "learning_rate": 8.924988011123286e-06, "loss": 0.4194, "step": 1987 }, { "epoch": 0.4731361932528113, "grad_norm": 0.3914761416304697, "learning_rate": 8.923793577323894e-06, "loss": 0.289, "step": 1988 }, { "epoch": 0.4733741893258761, "grad_norm": 0.41886930971894176, "learning_rate": 8.922598560351705e-06, "loss": 0.3386, "step": 1989 }, { "epoch": 0.4736121853989409, "grad_norm": 0.40054887442025705, "learning_rate": 8.92140296038433e-06, "loss": 0.4222, "step": 1990 }, { "epoch": 0.4738501814720057, "grad_norm": 0.37901621734635876, "learning_rate": 8.920206777599467e-06, "loss": 0.3587, "step": 1991 }, { "epoch": 0.4740881775450705, "grad_norm": 0.37289856531710025, "learning_rate": 8.919010012174894e-06, "loss": 0.3589, "step": 1992 }, { "epoch": 0.4743261736181353, "grad_norm": 0.37673235179772124, "learning_rate": 8.917812664288481e-06, "loss": 0.3493, "step": 1993 }, { "epoch": 0.4745641696912001, "grad_norm": 0.40946971737432014, "learning_rate": 8.916614734118184e-06, "loss": 0.4314, "step": 1994 }, { "epoch": 0.47480216576426487, "grad_norm": 0.3663866235640273, "learning_rate": 8.915416221842045e-06, "loss": 0.3188, "step": 1995 }, { "epoch": 0.47504016183732967, "grad_norm": 0.3729349793322217, "learning_rate": 8.914217127638194e-06, "loss": 0.33, "step": 1996 }, { "epoch": 0.47527815791039446, "grad_norm": 0.37819004145885465, "learning_rate": 8.913017451684845e-06, "loss": 0.3864, "step": 1997 }, { "epoch": 0.47551615398345926, "grad_norm": 0.3816038357588018, "learning_rate": 8.911817194160297e-06, "loss": 0.3707, "step": 1998 }, { "epoch": 0.47575415005652405, "grad_norm": 0.381594564688525, "learning_rate": 8.910616355242943e-06, "loss": 0.2955, "step": 1999 }, { "epoch": 0.47599214612958884, "grad_norm": 0.4144362212607731, "learning_rate": 8.909414935111251e-06, "loss": 0.3695, "step": 2000 }, { "epoch": 0.47623014220265364, "grad_norm": 0.40541513918237165, "learning_rate": 8.908212933943788e-06, "loss": 0.4287, "step": 2001 }, { "epoch": 0.47646813827571843, "grad_norm": 0.4010091276525552, "learning_rate": 8.907010351919198e-06, "loss": 0.352, "step": 2002 }, { "epoch": 0.47670613434878323, "grad_norm": 0.37436400202401043, "learning_rate": 8.905807189216216e-06, "loss": 0.3316, "step": 2003 }, { "epoch": 0.476944130421848, "grad_norm": 0.3610601587615899, "learning_rate": 8.90460344601366e-06, "loss": 0.3977, "step": 2004 }, { "epoch": 0.4771821264949128, "grad_norm": 0.40390327913148766, "learning_rate": 8.903399122490436e-06, "loss": 0.4071, "step": 2005 }, { "epoch": 0.4774201225679776, "grad_norm": 0.39707419630426943, "learning_rate": 8.902194218825537e-06, "loss": 0.3261, "step": 2006 }, { "epoch": 0.4776581186410424, "grad_norm": 0.4064601483285377, "learning_rate": 8.900988735198043e-06, "loss": 0.373, "step": 2007 }, { "epoch": 0.4778961147141072, "grad_norm": 0.3921311614892764, "learning_rate": 8.899782671787114e-06, "loss": 0.402, "step": 2008 }, { "epoch": 0.478134110787172, "grad_norm": 0.38829304720202296, "learning_rate": 8.898576028772006e-06, "loss": 0.3733, "step": 2009 }, { "epoch": 0.4783721068602368, "grad_norm": 0.423689833869032, "learning_rate": 8.897368806332053e-06, "loss": 0.3363, "step": 2010 }, { "epoch": 0.4786101029333016, "grad_norm": 0.40741104435951725, "learning_rate": 8.896161004646682e-06, "loss": 0.3923, "step": 2011 }, { "epoch": 0.4788480990063664, "grad_norm": 0.38648441274354406, "learning_rate": 8.894952623895396e-06, "loss": 0.4054, "step": 2012 }, { "epoch": 0.4790860950794312, "grad_norm": 0.3899353786856898, "learning_rate": 8.893743664257796e-06, "loss": 0.3402, "step": 2013 }, { "epoch": 0.47932409115249597, "grad_norm": 0.37220325831387135, "learning_rate": 8.892534125913558e-06, "loss": 0.3757, "step": 2014 }, { "epoch": 0.47956208722556076, "grad_norm": 0.37816661979461397, "learning_rate": 8.891324009042456e-06, "loss": 0.4359, "step": 2015 }, { "epoch": 0.47980008329862556, "grad_norm": 0.3787006467971716, "learning_rate": 8.890113313824339e-06, "loss": 0.366, "step": 2016 }, { "epoch": 0.48003807937169035, "grad_norm": 0.36458447262183397, "learning_rate": 8.888902040439145e-06, "loss": 0.3146, "step": 2017 }, { "epoch": 0.48027607544475515, "grad_norm": 0.39938985399119364, "learning_rate": 8.887690189066899e-06, "loss": 0.3725, "step": 2018 }, { "epoch": 0.48051407151781994, "grad_norm": 0.3591918893002723, "learning_rate": 8.886477759887717e-06, "loss": 0.3943, "step": 2019 }, { "epoch": 0.48075206759088474, "grad_norm": 0.36999609920363924, "learning_rate": 8.885264753081794e-06, "loss": 0.3297, "step": 2020 }, { "epoch": 0.48099006366394953, "grad_norm": 0.407098460962921, "learning_rate": 8.884051168829409e-06, "loss": 0.3605, "step": 2021 }, { "epoch": 0.4812280597370143, "grad_norm": 0.3847503739932104, "learning_rate": 8.882837007310936e-06, "loss": 0.4087, "step": 2022 }, { "epoch": 0.4814660558100791, "grad_norm": 0.3953664392850304, "learning_rate": 8.881622268706825e-06, "loss": 0.3865, "step": 2023 }, { "epoch": 0.4817040518831439, "grad_norm": 0.4139311567463376, "learning_rate": 8.88040695319762e-06, "loss": 0.3024, "step": 2024 }, { "epoch": 0.4819420479562087, "grad_norm": 0.3915339203989994, "learning_rate": 8.879191060963943e-06, "loss": 0.3784, "step": 2025 }, { "epoch": 0.4821800440292735, "grad_norm": 0.3600119497807205, "learning_rate": 8.87797459218651e-06, "loss": 0.4668, "step": 2026 }, { "epoch": 0.4824180401023383, "grad_norm": 0.4115620298847174, "learning_rate": 8.876757547046116e-06, "loss": 0.3102, "step": 2027 }, { "epoch": 0.4826560361754031, "grad_norm": 0.4194742305434396, "learning_rate": 8.875539925723641e-06, "loss": 0.3474, "step": 2028 }, { "epoch": 0.4828940322484679, "grad_norm": 0.37165614518858103, "learning_rate": 8.874321728400059e-06, "loss": 0.3922, "step": 2029 }, { "epoch": 0.4831320283215327, "grad_norm": 0.37833468745165144, "learning_rate": 8.873102955256423e-06, "loss": 0.3875, "step": 2030 }, { "epoch": 0.4833700243945975, "grad_norm": 0.4131846841190066, "learning_rate": 8.871883606473871e-06, "loss": 0.3283, "step": 2031 }, { "epoch": 0.48360802046766227, "grad_norm": 0.38823207775132323, "learning_rate": 8.87066368223363e-06, "loss": 0.3778, "step": 2032 }, { "epoch": 0.48384601654072706, "grad_norm": 0.3691014202938941, "learning_rate": 8.869443182717009e-06, "loss": 0.4339, "step": 2033 }, { "epoch": 0.48408401261379186, "grad_norm": 0.39215841748503116, "learning_rate": 8.868222108105407e-06, "loss": 0.3159, "step": 2034 }, { "epoch": 0.48432200868685665, "grad_norm": 0.3882981946824124, "learning_rate": 8.867000458580302e-06, "loss": 0.3129, "step": 2035 }, { "epoch": 0.48456000475992145, "grad_norm": 0.3918438336109168, "learning_rate": 8.865778234323266e-06, "loss": 0.4061, "step": 2036 }, { "epoch": 0.48479800083298624, "grad_norm": 0.36841655891359437, "learning_rate": 8.864555435515949e-06, "loss": 0.408, "step": 2037 }, { "epoch": 0.48503599690605104, "grad_norm": 0.37655472476118107, "learning_rate": 8.863332062340091e-06, "loss": 0.297, "step": 2038 }, { "epoch": 0.48527399297911583, "grad_norm": 0.3798702816409488, "learning_rate": 8.862108114977512e-06, "loss": 0.3575, "step": 2039 }, { "epoch": 0.4855119890521806, "grad_norm": 0.36545896885427936, "learning_rate": 8.860883593610126e-06, "loss": 0.4035, "step": 2040 }, { "epoch": 0.4857499851252454, "grad_norm": 0.35102870721288, "learning_rate": 8.859658498419922e-06, "loss": 0.3564, "step": 2041 }, { "epoch": 0.4859879811983102, "grad_norm": 0.3576865709579289, "learning_rate": 8.858432829588984e-06, "loss": 0.296, "step": 2042 }, { "epoch": 0.486225977271375, "grad_norm": 0.36034938518371656, "learning_rate": 8.857206587299471e-06, "loss": 0.3711, "step": 2043 }, { "epoch": 0.4864639733444398, "grad_norm": 0.4065171435929195, "learning_rate": 8.85597977173364e-06, "loss": 0.4387, "step": 2044 }, { "epoch": 0.4867019694175046, "grad_norm": 0.3834118830811794, "learning_rate": 8.85475238307382e-06, "loss": 0.2858, "step": 2045 }, { "epoch": 0.4869399654905694, "grad_norm": 0.3943529386040461, "learning_rate": 8.853524421502436e-06, "loss": 0.3485, "step": 2046 }, { "epoch": 0.4871779615636342, "grad_norm": 0.4010452585395006, "learning_rate": 8.852295887201988e-06, "loss": 0.4022, "step": 2047 }, { "epoch": 0.487415957636699, "grad_norm": 0.4092837500931114, "learning_rate": 8.851066780355074e-06, "loss": 0.3798, "step": 2048 }, { "epoch": 0.4876539537097638, "grad_norm": 0.41343430994676683, "learning_rate": 8.849837101144363e-06, "loss": 0.3158, "step": 2049 }, { "epoch": 0.48789194978282857, "grad_norm": 0.3737441299895588, "learning_rate": 8.84860684975262e-06, "loss": 0.3551, "step": 2050 }, { "epoch": 0.48812994585589337, "grad_norm": 0.3745215140069202, "learning_rate": 8.847376026362688e-06, "loss": 0.4366, "step": 2051 }, { "epoch": 0.48836794192895816, "grad_norm": 0.42064142614172584, "learning_rate": 8.8461446311575e-06, "loss": 0.3181, "step": 2052 }, { "epoch": 0.48860593800202295, "grad_norm": 0.37945037835962714, "learning_rate": 8.844912664320072e-06, "loss": 0.3606, "step": 2053 }, { "epoch": 0.48884393407508775, "grad_norm": 0.38167249606508846, "learning_rate": 8.8436801260335e-06, "loss": 0.3845, "step": 2054 }, { "epoch": 0.48908193014815254, "grad_norm": 0.3746464998144936, "learning_rate": 8.842447016480975e-06, "loss": 0.3902, "step": 2055 }, { "epoch": 0.48931992622121734, "grad_norm": 0.39703764026174426, "learning_rate": 8.841213335845767e-06, "loss": 0.3346, "step": 2056 }, { "epoch": 0.48955792229428213, "grad_norm": 0.391301094421723, "learning_rate": 8.839979084311228e-06, "loss": 0.3585, "step": 2057 }, { "epoch": 0.4897959183673469, "grad_norm": 0.4083430548241745, "learning_rate": 8.8387442620608e-06, "loss": 0.4185, "step": 2058 }, { "epoch": 0.4900339144404117, "grad_norm": 0.37140532210327554, "learning_rate": 8.837508869278011e-06, "loss": 0.3295, "step": 2059 }, { "epoch": 0.4902719105134765, "grad_norm": 0.43155618842343063, "learning_rate": 8.836272906146467e-06, "loss": 0.3387, "step": 2060 }, { "epoch": 0.4905099065865413, "grad_norm": 0.3865957545475916, "learning_rate": 8.835036372849867e-06, "loss": 0.3753, "step": 2061 }, { "epoch": 0.4907479026596061, "grad_norm": 0.38780511078052327, "learning_rate": 8.833799269571985e-06, "loss": 0.397, "step": 2062 }, { "epoch": 0.4909858987326709, "grad_norm": 0.4141760815744359, "learning_rate": 8.832561596496689e-06, "loss": 0.3248, "step": 2063 }, { "epoch": 0.4912238948057357, "grad_norm": 0.4008766368624388, "learning_rate": 8.831323353807928e-06, "loss": 0.342, "step": 2064 }, { "epoch": 0.4914618908788005, "grad_norm": 0.39406895654952484, "learning_rate": 8.830084541689731e-06, "loss": 0.4475, "step": 2065 }, { "epoch": 0.4916998869518653, "grad_norm": 0.399495170165796, "learning_rate": 8.828845160326222e-06, "loss": 0.3681, "step": 2066 }, { "epoch": 0.4919378830249301, "grad_norm": 0.3868671762065268, "learning_rate": 8.827605209901602e-06, "loss": 0.3054, "step": 2067 }, { "epoch": 0.4921758790979949, "grad_norm": 0.38625669927979583, "learning_rate": 8.826364690600155e-06, "loss": 0.3585, "step": 2068 }, { "epoch": 0.49241387517105967, "grad_norm": 0.4117055914580924, "learning_rate": 8.825123602606256e-06, "loss": 0.4328, "step": 2069 }, { "epoch": 0.49265187124412446, "grad_norm": 0.3711886777989222, "learning_rate": 8.82388194610436e-06, "loss": 0.3273, "step": 2070 }, { "epoch": 0.49288986731718926, "grad_norm": 0.4050709073259558, "learning_rate": 8.82263972127901e-06, "loss": 0.341, "step": 2071 }, { "epoch": 0.49312786339025405, "grad_norm": 0.39297282044656545, "learning_rate": 8.82139692831483e-06, "loss": 0.3942, "step": 2072 }, { "epoch": 0.49336585946331885, "grad_norm": 0.4006082152861021, "learning_rate": 8.820153567396528e-06, "loss": 0.3502, "step": 2073 }, { "epoch": 0.49360385553638364, "grad_norm": 0.4641634293550907, "learning_rate": 8.818909638708901e-06, "loss": 0.3107, "step": 2074 }, { "epoch": 0.49384185160944843, "grad_norm": 0.3822372933111529, "learning_rate": 8.817665142436826e-06, "loss": 0.3832, "step": 2075 }, { "epoch": 0.49407984768251323, "grad_norm": 0.36126502781069214, "learning_rate": 8.816420078765267e-06, "loss": 0.4281, "step": 2076 }, { "epoch": 0.494317843755578, "grad_norm": 0.3805792912065209, "learning_rate": 8.81517444787927e-06, "loss": 0.326, "step": 2077 }, { "epoch": 0.4945558398286428, "grad_norm": 0.3938968001852603, "learning_rate": 8.813928249963967e-06, "loss": 0.348, "step": 2078 }, { "epoch": 0.4947938359017076, "grad_norm": 0.38229600062789804, "learning_rate": 8.812681485204575e-06, "loss": 0.4155, "step": 2079 }, { "epoch": 0.4950318319747724, "grad_norm": 0.4196935982542361, "learning_rate": 8.811434153786392e-06, "loss": 0.377, "step": 2080 }, { "epoch": 0.4952698280478372, "grad_norm": 0.4143016393191996, "learning_rate": 8.810186255894804e-06, "loss": 0.3329, "step": 2081 }, { "epoch": 0.495507824120902, "grad_norm": 0.3843063609766728, "learning_rate": 8.808937791715278e-06, "loss": 0.3756, "step": 2082 }, { "epoch": 0.4957458201939668, "grad_norm": 0.36885161283443524, "learning_rate": 8.807688761433369e-06, "loss": 0.433, "step": 2083 }, { "epoch": 0.4959838162670316, "grad_norm": 0.36443450880109934, "learning_rate": 8.806439165234711e-06, "loss": 0.3567, "step": 2084 }, { "epoch": 0.4962218123400964, "grad_norm": 0.38276313847917565, "learning_rate": 8.805189003305026e-06, "loss": 0.3166, "step": 2085 }, { "epoch": 0.4964598084131612, "grad_norm": 0.38135336290107213, "learning_rate": 8.803938275830122e-06, "loss": 0.3852, "step": 2086 }, { "epoch": 0.49669780448622597, "grad_norm": 0.3876532972271958, "learning_rate": 8.802686982995882e-06, "loss": 0.394, "step": 2087 }, { "epoch": 0.49693580055929076, "grad_norm": 0.42237733267854094, "learning_rate": 8.801435124988284e-06, "loss": 0.3224, "step": 2088 }, { "epoch": 0.49717379663235556, "grad_norm": 0.36357998941592223, "learning_rate": 8.800182701993383e-06, "loss": 0.3489, "step": 2089 }, { "epoch": 0.49741179270542035, "grad_norm": 0.3776997755369579, "learning_rate": 8.798929714197321e-06, "loss": 0.4065, "step": 2090 }, { "epoch": 0.49764978877848515, "grad_norm": 0.39974248449207966, "learning_rate": 8.797676161786322e-06, "loss": 0.3716, "step": 2091 }, { "epoch": 0.49788778485154994, "grad_norm": 0.3512586392644621, "learning_rate": 8.796422044946697e-06, "loss": 0.2985, "step": 2092 }, { "epoch": 0.49812578092461474, "grad_norm": 0.37530246376072357, "learning_rate": 8.795167363864835e-06, "loss": 0.3765, "step": 2093 }, { "epoch": 0.49836377699767953, "grad_norm": 0.403359701606089, "learning_rate": 8.793912118727214e-06, "loss": 0.4233, "step": 2094 }, { "epoch": 0.4986017730707443, "grad_norm": 0.39277782417073076, "learning_rate": 8.792656309720398e-06, "loss": 0.323, "step": 2095 }, { "epoch": 0.4988397691438091, "grad_norm": 0.3901545968666047, "learning_rate": 8.791399937031027e-06, "loss": 0.3246, "step": 2096 }, { "epoch": 0.4990777652168739, "grad_norm": 0.4108640260500558, "learning_rate": 8.790143000845832e-06, "loss": 0.4035, "step": 2097 }, { "epoch": 0.4993157612899387, "grad_norm": 0.43483507105211894, "learning_rate": 8.788885501351622e-06, "loss": 0.3644, "step": 2098 }, { "epoch": 0.4995537573630035, "grad_norm": 0.3989358061906033, "learning_rate": 8.787627438735295e-06, "loss": 0.3092, "step": 2099 }, { "epoch": 0.4997917534360683, "grad_norm": 0.39812360364772853, "learning_rate": 8.786368813183829e-06, "loss": 0.3558, "step": 2100 }, { "epoch": 0.5000297495091331, "grad_norm": 0.404777603959223, "learning_rate": 8.785109624884287e-06, "loss": 0.4303, "step": 2101 }, { "epoch": 0.5002677455821979, "grad_norm": 0.40005648641359376, "learning_rate": 8.783849874023816e-06, "loss": 0.3249, "step": 2102 }, { "epoch": 0.5005057416552627, "grad_norm": 0.4387837922924149, "learning_rate": 8.782589560789645e-06, "loss": 0.3341, "step": 2103 }, { "epoch": 0.5007437377283275, "grad_norm": 0.36939020016413393, "learning_rate": 8.781328685369088e-06, "loss": 0.3658, "step": 2104 }, { "epoch": 0.5009817338013923, "grad_norm": 0.42034598447699306, "learning_rate": 8.780067247949545e-06, "loss": 0.4267, "step": 2105 }, { "epoch": 0.5012197298744571, "grad_norm": 0.3744391349615731, "learning_rate": 8.778805248718492e-06, "loss": 0.3169, "step": 2106 }, { "epoch": 0.5014577259475219, "grad_norm": 0.39455712888275285, "learning_rate": 8.777542687863498e-06, "loss": 0.385, "step": 2107 }, { "epoch": 0.5016957220205867, "grad_norm": 0.4107559343031647, "learning_rate": 8.776279565572208e-06, "loss": 0.4194, "step": 2108 }, { "epoch": 0.5019337180936515, "grad_norm": 0.376450072712637, "learning_rate": 8.775015882032355e-06, "loss": 0.3303, "step": 2109 }, { "epoch": 0.5021717141667162, "grad_norm": 0.39192436430391, "learning_rate": 8.77375163743175e-06, "loss": 0.3186, "step": 2110 }, { "epoch": 0.5024097102397811, "grad_norm": 0.4103061508772666, "learning_rate": 8.772486831958293e-06, "loss": 0.3535, "step": 2111 }, { "epoch": 0.5026477063128458, "grad_norm": 0.4061911120479169, "learning_rate": 8.771221465799968e-06, "loss": 0.4365, "step": 2112 }, { "epoch": 0.5028857023859107, "grad_norm": 0.36093912967050273, "learning_rate": 8.769955539144839e-06, "loss": 0.311, "step": 2113 }, { "epoch": 0.5031236984589754, "grad_norm": 0.431661016075779, "learning_rate": 8.768689052181051e-06, "loss": 0.3709, "step": 2114 }, { "epoch": 0.5033616945320403, "grad_norm": 0.37241191111075006, "learning_rate": 8.767422005096838e-06, "loss": 0.4208, "step": 2115 }, { "epoch": 0.503599690605105, "grad_norm": 0.3900038040150187, "learning_rate": 8.766154398080511e-06, "loss": 0.3656, "step": 2116 }, { "epoch": 0.5038376866781699, "grad_norm": 0.38734311341707733, "learning_rate": 8.764886231320473e-06, "loss": 0.315, "step": 2117 }, { "epoch": 0.5040756827512346, "grad_norm": 0.39624067824100434, "learning_rate": 8.7636175050052e-06, "loss": 0.3679, "step": 2118 }, { "epoch": 0.5043136788242994, "grad_norm": 0.3786762562077862, "learning_rate": 8.76234821932326e-06, "loss": 0.421, "step": 2119 }, { "epoch": 0.5045516748973642, "grad_norm": 0.3658684215497542, "learning_rate": 8.7610783744633e-06, "loss": 0.3373, "step": 2120 }, { "epoch": 0.504789670970429, "grad_norm": 0.371242399448406, "learning_rate": 8.759807970614044e-06, "loss": 0.3295, "step": 2121 }, { "epoch": 0.5050276670434938, "grad_norm": 0.40052728933299747, "learning_rate": 8.758537007964314e-06, "loss": 0.4182, "step": 2122 }, { "epoch": 0.5052656631165586, "grad_norm": 0.39743565216070303, "learning_rate": 8.757265486703001e-06, "loss": 0.3706, "step": 2123 }, { "epoch": 0.5055036591896234, "grad_norm": 0.36470933499650765, "learning_rate": 8.75599340701909e-06, "loss": 0.2961, "step": 2124 }, { "epoch": 0.5057416552626882, "grad_norm": 0.37485254459171946, "learning_rate": 8.754720769101636e-06, "loss": 0.3761, "step": 2125 }, { "epoch": 0.505979651335753, "grad_norm": 0.37277864735486177, "learning_rate": 8.75344757313979e-06, "loss": 0.4196, "step": 2126 }, { "epoch": 0.5062176474088178, "grad_norm": 0.39747924989282063, "learning_rate": 8.75217381932278e-06, "loss": 0.3059, "step": 2127 }, { "epoch": 0.5064556434818825, "grad_norm": 0.3715732881453211, "learning_rate": 8.750899507839913e-06, "loss": 0.322, "step": 2128 }, { "epoch": 0.5066936395549474, "grad_norm": 0.4053341652611045, "learning_rate": 8.74962463888059e-06, "loss": 0.4358, "step": 2129 }, { "epoch": 0.5069316356280121, "grad_norm": 0.40118580003340715, "learning_rate": 8.748349212634284e-06, "loss": 0.3766, "step": 2130 }, { "epoch": 0.507169631701077, "grad_norm": 0.4019091388621105, "learning_rate": 8.747073229290552e-06, "loss": 0.3041, "step": 2131 }, { "epoch": 0.5074076277741417, "grad_norm": 0.4304176264560313, "learning_rate": 8.745796689039043e-06, "loss": 0.3581, "step": 2132 }, { "epoch": 0.5076456238472066, "grad_norm": 0.4099748631186, "learning_rate": 8.744519592069479e-06, "loss": 0.4268, "step": 2133 }, { "epoch": 0.5078836199202713, "grad_norm": 0.40848377251157103, "learning_rate": 8.743241938571667e-06, "loss": 0.3395, "step": 2134 }, { "epoch": 0.5081216159933362, "grad_norm": 0.38784109641240894, "learning_rate": 8.741963728735502e-06, "loss": 0.3247, "step": 2135 }, { "epoch": 0.5083596120664009, "grad_norm": 0.387056172080891, "learning_rate": 8.740684962750953e-06, "loss": 0.4079, "step": 2136 }, { "epoch": 0.5085976081394658, "grad_norm": 0.43968856979286486, "learning_rate": 8.73940564080808e-06, "loss": 0.4165, "step": 2137 }, { "epoch": 0.5088356042125305, "grad_norm": 0.4069695223863342, "learning_rate": 8.738125763097019e-06, "loss": 0.3109, "step": 2138 }, { "epoch": 0.5090736002855953, "grad_norm": 0.390822454142334, "learning_rate": 8.736845329807994e-06, "loss": 0.3342, "step": 2139 }, { "epoch": 0.5093115963586601, "grad_norm": 0.3791678415289624, "learning_rate": 8.735564341131308e-06, "loss": 0.4129, "step": 2140 }, { "epoch": 0.5095495924317249, "grad_norm": 0.3842175369404934, "learning_rate": 8.734282797257347e-06, "loss": 0.3665, "step": 2141 }, { "epoch": 0.5097875885047897, "grad_norm": 0.39913401398744197, "learning_rate": 8.733000698376579e-06, "loss": 0.3109, "step": 2142 }, { "epoch": 0.5100255845778545, "grad_norm": 0.3937291920519653, "learning_rate": 8.73171804467956e-06, "loss": 0.3699, "step": 2143 }, { "epoch": 0.5102635806509193, "grad_norm": 0.3710499687158456, "learning_rate": 8.73043483635692e-06, "loss": 0.409, "step": 2144 }, { "epoch": 0.5105015767239841, "grad_norm": 0.4346758984087426, "learning_rate": 8.729151073599376e-06, "loss": 0.3292, "step": 2145 }, { "epoch": 0.5107395727970488, "grad_norm": 0.40591530956978267, "learning_rate": 8.72786675659773e-06, "loss": 0.3347, "step": 2146 }, { "epoch": 0.5109775688701137, "grad_norm": 0.37110322190450645, "learning_rate": 8.72658188554286e-06, "loss": 0.431, "step": 2147 }, { "epoch": 0.5112155649431784, "grad_norm": 0.3762833501337982, "learning_rate": 8.725296460625729e-06, "loss": 0.3465, "step": 2148 }, { "epoch": 0.5114535610162433, "grad_norm": 0.42166675669869735, "learning_rate": 8.724010482037386e-06, "loss": 0.3255, "step": 2149 }, { "epoch": 0.511691557089308, "grad_norm": 0.4134995142541755, "learning_rate": 8.722723949968958e-06, "loss": 0.3746, "step": 2150 }, { "epoch": 0.5119295531623729, "grad_norm": 0.38354990683388107, "learning_rate": 8.721436864611653e-06, "loss": 0.4031, "step": 2151 }, { "epoch": 0.5121675492354376, "grad_norm": 0.4329904072710047, "learning_rate": 8.720149226156769e-06, "loss": 0.3331, "step": 2152 }, { "epoch": 0.5124055453085025, "grad_norm": 0.4137030908720267, "learning_rate": 8.718861034795677e-06, "loss": 0.3028, "step": 2153 }, { "epoch": 0.5126435413815672, "grad_norm": 0.3654824931248918, "learning_rate": 8.717572290719835e-06, "loss": 0.3846, "step": 2154 }, { "epoch": 0.512881537454632, "grad_norm": 0.422834765499284, "learning_rate": 8.716282994120782e-06, "loss": 0.3844, "step": 2155 }, { "epoch": 0.5131195335276968, "grad_norm": 0.3922197545354826, "learning_rate": 8.71499314519014e-06, "loss": 0.3464, "step": 2156 }, { "epoch": 0.5133575296007616, "grad_norm": 0.38918598237414415, "learning_rate": 8.713702744119613e-06, "loss": 0.3361, "step": 2157 }, { "epoch": 0.5135955256738264, "grad_norm": 0.4107866717596137, "learning_rate": 8.712411791100983e-06, "loss": 0.4021, "step": 2158 }, { "epoch": 0.5138335217468912, "grad_norm": 0.3901161988399392, "learning_rate": 8.711120286326122e-06, "loss": 0.3225, "step": 2159 }, { "epoch": 0.514071517819956, "grad_norm": 0.413635276999212, "learning_rate": 8.709828229986978e-06, "loss": 0.3094, "step": 2160 }, { "epoch": 0.5143095138930208, "grad_norm": 0.37538329913818896, "learning_rate": 8.708535622275581e-06, "loss": 0.4063, "step": 2161 }, { "epoch": 0.5145475099660856, "grad_norm": 0.37400221595282845, "learning_rate": 8.707242463384046e-06, "loss": 0.4596, "step": 2162 }, { "epoch": 0.5147855060391504, "grad_norm": 0.40427219364856926, "learning_rate": 8.705948753504569e-06, "loss": 0.3199, "step": 2163 }, { "epoch": 0.5150235021122151, "grad_norm": 0.3803126661404316, "learning_rate": 8.704654492829428e-06, "loss": 0.3466, "step": 2164 }, { "epoch": 0.51526149818528, "grad_norm": 0.3755852465058205, "learning_rate": 8.703359681550978e-06, "loss": 0.3883, "step": 2165 }, { "epoch": 0.5154994942583447, "grad_norm": 0.3908086804440207, "learning_rate": 8.702064319861663e-06, "loss": 0.3674, "step": 2166 }, { "epoch": 0.5157374903314096, "grad_norm": 0.3771686738573496, "learning_rate": 8.700768407954007e-06, "loss": 0.315, "step": 2167 }, { "epoch": 0.5159754864044743, "grad_norm": 0.370557512425135, "learning_rate": 8.699471946020612e-06, "loss": 0.3404, "step": 2168 }, { "epoch": 0.5162134824775391, "grad_norm": 0.3752377056778907, "learning_rate": 8.698174934254164e-06, "loss": 0.4201, "step": 2169 }, { "epoch": 0.5164514785506039, "grad_norm": 0.4184085294213279, "learning_rate": 8.696877372847434e-06, "loss": 0.3294, "step": 2170 }, { "epoch": 0.5166894746236687, "grad_norm": 0.388912569698524, "learning_rate": 8.69557926199327e-06, "loss": 0.359, "step": 2171 }, { "epoch": 0.5169274706967335, "grad_norm": 0.365614463397713, "learning_rate": 8.694280601884603e-06, "loss": 0.4009, "step": 2172 }, { "epoch": 0.5171654667697982, "grad_norm": 0.42621501649673044, "learning_rate": 8.692981392714445e-06, "loss": 0.36, "step": 2173 }, { "epoch": 0.5174034628428631, "grad_norm": 0.41391982376423003, "learning_rate": 8.691681634675895e-06, "loss": 0.3199, "step": 2174 }, { "epoch": 0.5176414589159278, "grad_norm": 0.41365783101678866, "learning_rate": 8.690381327962125e-06, "loss": 0.3919, "step": 2175 }, { "epoch": 0.5178794549889927, "grad_norm": 0.37354876476127546, "learning_rate": 8.689080472766393e-06, "loss": 0.4031, "step": 2176 }, { "epoch": 0.5181174510620574, "grad_norm": 0.4203856959561625, "learning_rate": 8.687779069282041e-06, "loss": 0.3208, "step": 2177 }, { "epoch": 0.5183554471351223, "grad_norm": 0.40423490700726816, "learning_rate": 8.686477117702488e-06, "loss": 0.3334, "step": 2178 }, { "epoch": 0.518593443208187, "grad_norm": 0.3509157300766899, "learning_rate": 8.685174618221235e-06, "loss": 0.4177, "step": 2179 }, { "epoch": 0.5188314392812519, "grad_norm": 0.40079035375275884, "learning_rate": 8.683871571031867e-06, "loss": 0.4139, "step": 2180 }, { "epoch": 0.5190694353543166, "grad_norm": 0.37163196418771477, "learning_rate": 8.68256797632805e-06, "loss": 0.3169, "step": 2181 }, { "epoch": 0.5193074314273814, "grad_norm": 0.4054280532520997, "learning_rate": 8.681263834303528e-06, "loss": 0.4119, "step": 2182 }, { "epoch": 0.5195454275004462, "grad_norm": 0.4273379334199248, "learning_rate": 8.67995914515213e-06, "loss": 0.4099, "step": 2183 }, { "epoch": 0.519783423573511, "grad_norm": 0.38570960886267935, "learning_rate": 8.678653909067767e-06, "loss": 0.3451, "step": 2184 }, { "epoch": 0.5200214196465758, "grad_norm": 0.39683790687415654, "learning_rate": 8.677348126244427e-06, "loss": 0.316, "step": 2185 }, { "epoch": 0.5202594157196406, "grad_norm": 0.38461925483513904, "learning_rate": 8.676041796876183e-06, "loss": 0.3881, "step": 2186 }, { "epoch": 0.5204974117927054, "grad_norm": 0.4254609757724267, "learning_rate": 8.674734921157185e-06, "loss": 0.4075, "step": 2187 }, { "epoch": 0.5207354078657702, "grad_norm": 0.36619327488390596, "learning_rate": 8.67342749928167e-06, "loss": 0.3159, "step": 2188 }, { "epoch": 0.520973403938835, "grad_norm": 0.41853879431263746, "learning_rate": 8.672119531443951e-06, "loss": 0.3249, "step": 2189 }, { "epoch": 0.5212114000118998, "grad_norm": 0.37412197275330067, "learning_rate": 8.67081101783843e-06, "loss": 0.4339, "step": 2190 }, { "epoch": 0.5214493960849645, "grad_norm": 0.3840297484734256, "learning_rate": 8.669501958659576e-06, "loss": 0.3418, "step": 2191 }, { "epoch": 0.5216873921580294, "grad_norm": 0.39214557075111384, "learning_rate": 8.668192354101953e-06, "loss": 0.3065, "step": 2192 }, { "epoch": 0.5219253882310941, "grad_norm": 0.3979212333040952, "learning_rate": 8.666882204360201e-06, "loss": 0.3639, "step": 2193 }, { "epoch": 0.522163384304159, "grad_norm": 0.43111388602684464, "learning_rate": 8.665571509629038e-06, "loss": 0.3999, "step": 2194 }, { "epoch": 0.5224013803772237, "grad_norm": 0.39713249026912617, "learning_rate": 8.664260270103265e-06, "loss": 0.329, "step": 2195 }, { "epoch": 0.5226393764502886, "grad_norm": 0.3899241343272046, "learning_rate": 8.662948485977768e-06, "loss": 0.3436, "step": 2196 }, { "epoch": 0.5228773725233533, "grad_norm": 0.3939001732463367, "learning_rate": 8.661636157447511e-06, "loss": 0.4149, "step": 2197 }, { "epoch": 0.5231153685964182, "grad_norm": 0.3957340016317853, "learning_rate": 8.660323284707535e-06, "loss": 0.3787, "step": 2198 }, { "epoch": 0.5233533646694829, "grad_norm": 0.44601803382908783, "learning_rate": 8.659009867952966e-06, "loss": 0.2919, "step": 2199 }, { "epoch": 0.5235913607425478, "grad_norm": 0.3932173337224158, "learning_rate": 8.657695907379011e-06, "loss": 0.3767, "step": 2200 }, { "epoch": 0.5238293568156125, "grad_norm": 0.408961622615314, "learning_rate": 8.65638140318096e-06, "loss": 0.4167, "step": 2201 }, { "epoch": 0.5240673528886773, "grad_norm": 0.3895863266832368, "learning_rate": 8.655066355554175e-06, "loss": 0.3162, "step": 2202 }, { "epoch": 0.5243053489617421, "grad_norm": 0.385176418198331, "learning_rate": 8.65375076469411e-06, "loss": 0.323, "step": 2203 }, { "epoch": 0.5245433450348069, "grad_norm": 0.36711047121100115, "learning_rate": 8.652434630796288e-06, "loss": 0.4063, "step": 2204 }, { "epoch": 0.5247813411078717, "grad_norm": 0.3996610302929268, "learning_rate": 8.651117954056325e-06, "loss": 0.4035, "step": 2205 }, { "epoch": 0.5250193371809365, "grad_norm": 0.38507884013978433, "learning_rate": 8.649800734669912e-06, "loss": 0.3275, "step": 2206 }, { "epoch": 0.5252573332540013, "grad_norm": 0.3649799910820221, "learning_rate": 8.648482972832815e-06, "loss": 0.3651, "step": 2207 }, { "epoch": 0.5254953293270661, "grad_norm": 0.37617511815362664, "learning_rate": 8.647164668740891e-06, "loss": 0.4303, "step": 2208 }, { "epoch": 0.5257333254001308, "grad_norm": 0.38432072478719503, "learning_rate": 8.64584582259007e-06, "loss": 0.3724, "step": 2209 }, { "epoch": 0.5259713214731957, "grad_norm": 0.42322209539590827, "learning_rate": 8.644526434576365e-06, "loss": 0.323, "step": 2210 }, { "epoch": 0.5262093175462604, "grad_norm": 0.40391854416882916, "learning_rate": 8.64320650489587e-06, "loss": 0.3575, "step": 2211 }, { "epoch": 0.5264473136193253, "grad_norm": 0.3835559240154108, "learning_rate": 8.641886033744762e-06, "loss": 0.426, "step": 2212 }, { "epoch": 0.52668530969239, "grad_norm": 0.3936562166696423, "learning_rate": 8.640565021319293e-06, "loss": 0.3247, "step": 2213 }, { "epoch": 0.5269233057654549, "grad_norm": 0.39202674206549953, "learning_rate": 8.639243467815798e-06, "loss": 0.3597, "step": 2214 }, { "epoch": 0.5271613018385196, "grad_norm": 0.3783225902955775, "learning_rate": 8.637921373430694e-06, "loss": 0.4475, "step": 2215 }, { "epoch": 0.5273992979115845, "grad_norm": 0.3600937320401172, "learning_rate": 8.636598738360476e-06, "loss": 0.3617, "step": 2216 }, { "epoch": 0.5276372939846492, "grad_norm": 0.4160704153133727, "learning_rate": 8.63527556280172e-06, "loss": 0.3446, "step": 2217 }, { "epoch": 0.527875290057714, "grad_norm": 0.3913242338353506, "learning_rate": 8.633951846951081e-06, "loss": 0.3822, "step": 2218 }, { "epoch": 0.5281132861307788, "grad_norm": 0.4035152853862847, "learning_rate": 8.6326275910053e-06, "loss": 0.4297, "step": 2219 }, { "epoch": 0.5283512822038436, "grad_norm": 0.409203322526621, "learning_rate": 8.631302795161192e-06, "loss": 0.3351, "step": 2220 }, { "epoch": 0.5285892782769084, "grad_norm": 0.39436563517242, "learning_rate": 8.629977459615655e-06, "loss": 0.3404, "step": 2221 }, { "epoch": 0.5288272743499732, "grad_norm": 0.3590194240170449, "learning_rate": 8.628651584565665e-06, "loss": 0.3969, "step": 2222 }, { "epoch": 0.529065270423038, "grad_norm": 0.4019072389485261, "learning_rate": 8.627325170208282e-06, "loss": 0.3476, "step": 2223 }, { "epoch": 0.5293032664961028, "grad_norm": 0.37619114031005646, "learning_rate": 8.625998216740643e-06, "loss": 0.3222, "step": 2224 }, { "epoch": 0.5295412625691676, "grad_norm": 0.4034865597690215, "learning_rate": 8.624670724359964e-06, "loss": 0.3545, "step": 2225 }, { "epoch": 0.5297792586422324, "grad_norm": 0.408006482551879, "learning_rate": 8.623342693263549e-06, "loss": 0.4144, "step": 2226 }, { "epoch": 0.5300172547152971, "grad_norm": 0.38382326688221563, "learning_rate": 8.62201412364877e-06, "loss": 0.3342, "step": 2227 }, { "epoch": 0.530255250788362, "grad_norm": 0.41258702064324476, "learning_rate": 8.620685015713089e-06, "loss": 0.3147, "step": 2228 }, { "epoch": 0.5304932468614267, "grad_norm": 0.38204434060333653, "learning_rate": 8.619355369654043e-06, "loss": 0.3818, "step": 2229 }, { "epoch": 0.5307312429344916, "grad_norm": 0.38216547595409855, "learning_rate": 8.61802518566925e-06, "loss": 0.383, "step": 2230 }, { "epoch": 0.5309692390075563, "grad_norm": 0.4029134683361265, "learning_rate": 8.616694463956409e-06, "loss": 0.2774, "step": 2231 }, { "epoch": 0.5312072350806212, "grad_norm": 0.37301434376960896, "learning_rate": 8.615363204713299e-06, "loss": 0.3619, "step": 2232 }, { "epoch": 0.5314452311536859, "grad_norm": 0.38496277632142917, "learning_rate": 8.614031408137775e-06, "loss": 0.4241, "step": 2233 }, { "epoch": 0.5316832272267508, "grad_norm": 0.33800484116848534, "learning_rate": 8.612699074427777e-06, "loss": 0.3205, "step": 2234 }, { "epoch": 0.5319212232998155, "grad_norm": 0.38034462571684113, "learning_rate": 8.611366203781323e-06, "loss": 0.3176, "step": 2235 }, { "epoch": 0.5321592193728804, "grad_norm": 0.41034018337162753, "learning_rate": 8.610032796396513e-06, "loss": 0.3932, "step": 2236 }, { "epoch": 0.5323972154459451, "grad_norm": 0.3624140867849283, "learning_rate": 8.60869885247152e-06, "loss": 0.3799, "step": 2237 }, { "epoch": 0.5326352115190099, "grad_norm": 0.39211384701169555, "learning_rate": 8.607364372204602e-06, "loss": 0.3236, "step": 2238 }, { "epoch": 0.5328732075920747, "grad_norm": 0.3631336275865908, "learning_rate": 8.606029355794095e-06, "loss": 0.3626, "step": 2239 }, { "epoch": 0.5331112036651395, "grad_norm": 0.3499747372288337, "learning_rate": 8.604693803438418e-06, "loss": 0.3962, "step": 2240 }, { "epoch": 0.5333491997382043, "grad_norm": 0.3833522235637884, "learning_rate": 8.603357715336067e-06, "loss": 0.3912, "step": 2241 }, { "epoch": 0.5335871958112691, "grad_norm": 0.40874050154372005, "learning_rate": 8.602021091685615e-06, "loss": 0.3315, "step": 2242 }, { "epoch": 0.5338251918843339, "grad_norm": 0.40195329644632777, "learning_rate": 8.600683932685721e-06, "loss": 0.3904, "step": 2243 }, { "epoch": 0.5340631879573987, "grad_norm": 0.3846575731760141, "learning_rate": 8.599346238535118e-06, "loss": 0.4735, "step": 2244 }, { "epoch": 0.5343011840304634, "grad_norm": 0.36643018837011293, "learning_rate": 8.59800800943262e-06, "loss": 0.3197, "step": 2245 }, { "epoch": 0.5345391801035283, "grad_norm": 0.40617951682169934, "learning_rate": 8.596669245577119e-06, "loss": 0.3588, "step": 2246 }, { "epoch": 0.534777176176593, "grad_norm": 0.3698247677981035, "learning_rate": 8.595329947167593e-06, "loss": 0.4227, "step": 2247 }, { "epoch": 0.5350151722496579, "grad_norm": 0.3755104238478636, "learning_rate": 8.593990114403093e-06, "loss": 0.3951, "step": 2248 }, { "epoch": 0.5352531683227226, "grad_norm": 0.37805284252091315, "learning_rate": 8.59264974748275e-06, "loss": 0.2891, "step": 2249 }, { "epoch": 0.5354911643957875, "grad_norm": 0.37337307896849703, "learning_rate": 8.591308846605777e-06, "loss": 0.3513, "step": 2250 }, { "epoch": 0.5357291604688522, "grad_norm": 0.3460371741397967, "learning_rate": 8.589967411971464e-06, "loss": 0.4251, "step": 2251 }, { "epoch": 0.5359671565419171, "grad_norm": 0.37459542754801345, "learning_rate": 8.588625443779183e-06, "loss": 0.3355, "step": 2252 }, { "epoch": 0.5362051526149818, "grad_norm": 0.40230507957953726, "learning_rate": 8.587282942228382e-06, "loss": 0.34, "step": 2253 }, { "epoch": 0.5364431486880467, "grad_norm": 0.372308451561018, "learning_rate": 8.585939907518591e-06, "loss": 0.3885, "step": 2254 }, { "epoch": 0.5366811447611114, "grad_norm": 0.3689804359627597, "learning_rate": 8.584596339849419e-06, "loss": 0.3928, "step": 2255 }, { "epoch": 0.5369191408341762, "grad_norm": 0.37560524421780966, "learning_rate": 8.583252239420549e-06, "loss": 0.3303, "step": 2256 }, { "epoch": 0.537157136907241, "grad_norm": 0.3842271683641431, "learning_rate": 8.581907606431754e-06, "loss": 0.3536, "step": 2257 }, { "epoch": 0.5373951329803058, "grad_norm": 0.4002665238506151, "learning_rate": 8.580562441082876e-06, "loss": 0.4165, "step": 2258 }, { "epoch": 0.5376331290533706, "grad_norm": 0.38907378275117815, "learning_rate": 8.579216743573839e-06, "loss": 0.2978, "step": 2259 }, { "epoch": 0.5378711251264354, "grad_norm": 0.3956313105170694, "learning_rate": 8.577870514104651e-06, "loss": 0.3106, "step": 2260 }, { "epoch": 0.5381091211995002, "grad_norm": 0.39047993680689397, "learning_rate": 8.57652375287539e-06, "loss": 0.3575, "step": 2261 }, { "epoch": 0.538347117272565, "grad_norm": 0.41768920949511756, "learning_rate": 8.575176460086221e-06, "loss": 0.4167, "step": 2262 }, { "epoch": 0.5385851133456298, "grad_norm": 0.3870552235120542, "learning_rate": 8.573828635937384e-06, "loss": 0.2996, "step": 2263 }, { "epoch": 0.5388231094186946, "grad_norm": 0.4596709809606415, "learning_rate": 8.5724802806292e-06, "loss": 0.373, "step": 2264 }, { "epoch": 0.5390611054917593, "grad_norm": 0.3706967842183925, "learning_rate": 8.571131394362069e-06, "loss": 0.404, "step": 2265 }, { "epoch": 0.5392991015648242, "grad_norm": 0.40181886987790577, "learning_rate": 8.569781977336464e-06, "loss": 0.3562, "step": 2266 }, { "epoch": 0.5395370976378889, "grad_norm": 0.4298167654016625, "learning_rate": 8.568432029752947e-06, "loss": 0.3202, "step": 2267 }, { "epoch": 0.5397750937109538, "grad_norm": 0.39862560057903823, "learning_rate": 8.56708155181215e-06, "loss": 0.3843, "step": 2268 }, { "epoch": 0.5400130897840185, "grad_norm": 0.41266735819620026, "learning_rate": 8.565730543714791e-06, "loss": 0.4155, "step": 2269 }, { "epoch": 0.5402510858570834, "grad_norm": 0.3974166536006753, "learning_rate": 8.564379005661661e-06, "loss": 0.3293, "step": 2270 }, { "epoch": 0.5404890819301481, "grad_norm": 0.35669423871612993, "learning_rate": 8.563026937853633e-06, "loss": 0.3605, "step": 2271 }, { "epoch": 0.540727078003213, "grad_norm": 0.4127273022491468, "learning_rate": 8.561674340491656e-06, "loss": 0.409, "step": 2272 }, { "epoch": 0.5409650740762777, "grad_norm": 0.4680110594563534, "learning_rate": 8.56032121377676e-06, "loss": 0.3709, "step": 2273 }, { "epoch": 0.5412030701493425, "grad_norm": 0.4124449173785681, "learning_rate": 8.558967557910054e-06, "loss": 0.2934, "step": 2274 }, { "epoch": 0.5414410662224073, "grad_norm": 0.4639894069096805, "learning_rate": 8.557613373092724e-06, "loss": 0.3645, "step": 2275 }, { "epoch": 0.5416790622954721, "grad_norm": 0.3845035156200861, "learning_rate": 8.556258659526036e-06, "loss": 0.4354, "step": 2276 }, { "epoch": 0.5419170583685369, "grad_norm": 0.4096705605468606, "learning_rate": 8.554903417411333e-06, "loss": 0.3055, "step": 2277 }, { "epoch": 0.5421550544416017, "grad_norm": 0.3894517106688162, "learning_rate": 8.553547646950037e-06, "loss": 0.3147, "step": 2278 }, { "epoch": 0.5423930505146665, "grad_norm": 0.41144516694641703, "learning_rate": 8.552191348343653e-06, "loss": 0.4009, "step": 2279 }, { "epoch": 0.5426310465877313, "grad_norm": 0.40545518632374106, "learning_rate": 8.550834521793757e-06, "loss": 0.3549, "step": 2280 }, { "epoch": 0.542869042660796, "grad_norm": 0.4536869892584681, "learning_rate": 8.549477167502006e-06, "loss": 0.2982, "step": 2281 }, { "epoch": 0.5431070387338609, "grad_norm": 0.36914461803860876, "learning_rate": 8.54811928567014e-06, "loss": 0.3515, "step": 2282 }, { "epoch": 0.5433450348069256, "grad_norm": 0.38659861108474836, "learning_rate": 8.546760876499968e-06, "loss": 0.4102, "step": 2283 }, { "epoch": 0.5435830308799905, "grad_norm": 0.40982842576957934, "learning_rate": 8.545401940193392e-06, "loss": 0.3362, "step": 2284 }, { "epoch": 0.5438210269530552, "grad_norm": 0.3991434411628737, "learning_rate": 8.544042476952377e-06, "loss": 0.3119, "step": 2285 }, { "epoch": 0.5440590230261201, "grad_norm": 0.3929815486547205, "learning_rate": 8.542682486978973e-06, "loss": 0.3739, "step": 2286 }, { "epoch": 0.5442970190991848, "grad_norm": 0.42239476535343035, "learning_rate": 8.541321970475312e-06, "loss": 0.3884, "step": 2287 }, { "epoch": 0.5445350151722497, "grad_norm": 0.4368412726721034, "learning_rate": 8.539960927643596e-06, "loss": 0.3054, "step": 2288 }, { "epoch": 0.5447730112453144, "grad_norm": 0.39557679784627, "learning_rate": 8.538599358686112e-06, "loss": 0.353, "step": 2289 }, { "epoch": 0.5450110073183793, "grad_norm": 0.39922321971295105, "learning_rate": 8.537237263805225e-06, "loss": 0.4309, "step": 2290 }, { "epoch": 0.545249003391444, "grad_norm": 0.39924479565216386, "learning_rate": 8.53587464320337e-06, "loss": 0.3412, "step": 2291 }, { "epoch": 0.5454869994645088, "grad_norm": 0.39342949827033796, "learning_rate": 8.534511497083073e-06, "loss": 0.2964, "step": 2292 }, { "epoch": 0.5457249955375736, "grad_norm": 0.37825134436273367, "learning_rate": 8.533147825646925e-06, "loss": 0.3704, "step": 2293 }, { "epoch": 0.5459629916106384, "grad_norm": 0.38022616425475375, "learning_rate": 8.531783629097608e-06, "loss": 0.405, "step": 2294 }, { "epoch": 0.5462009876837032, "grad_norm": 0.36260140531944307, "learning_rate": 8.530418907637868e-06, "loss": 0.3217, "step": 2295 }, { "epoch": 0.546438983756768, "grad_norm": 0.37353441118295333, "learning_rate": 8.529053661470542e-06, "loss": 0.3432, "step": 2296 }, { "epoch": 0.5466769798298328, "grad_norm": 0.40946792741351434, "learning_rate": 8.527687890798537e-06, "loss": 0.4199, "step": 2297 }, { "epoch": 0.5469149759028976, "grad_norm": 0.3726039053557827, "learning_rate": 8.52632159582484e-06, "loss": 0.3707, "step": 2298 }, { "epoch": 0.5471529719759624, "grad_norm": 0.3659302138094137, "learning_rate": 8.524954776752516e-06, "loss": 0.338, "step": 2299 }, { "epoch": 0.5473909680490272, "grad_norm": 0.3829909878854894, "learning_rate": 8.52358743378471e-06, "loss": 0.3499, "step": 2300 }, { "epoch": 0.5476289641220919, "grad_norm": 0.3890459899862676, "learning_rate": 8.522219567124643e-06, "loss": 0.4132, "step": 2301 }, { "epoch": 0.5478669601951568, "grad_norm": 0.36286174283101486, "learning_rate": 8.520851176975612e-06, "loss": 0.35, "step": 2302 }, { "epoch": 0.5481049562682215, "grad_norm": 0.38768718004776836, "learning_rate": 8.519482263540994e-06, "loss": 0.3365, "step": 2303 }, { "epoch": 0.5483429523412864, "grad_norm": 0.4160688543785431, "learning_rate": 8.518112827024245e-06, "loss": 0.4039, "step": 2304 }, { "epoch": 0.5485809484143511, "grad_norm": 0.36929499946058547, "learning_rate": 8.516742867628895e-06, "loss": 0.4113, "step": 2305 }, { "epoch": 0.548818944487416, "grad_norm": 0.4434563141145236, "learning_rate": 8.515372385558554e-06, "loss": 0.3342, "step": 2306 }, { "epoch": 0.5490569405604807, "grad_norm": 0.37489035852359814, "learning_rate": 8.514001381016912e-06, "loss": 0.3657, "step": 2307 }, { "epoch": 0.5492949366335456, "grad_norm": 0.35701192424068157, "learning_rate": 8.512629854207733e-06, "loss": 0.4158, "step": 2308 }, { "epoch": 0.5495329327066103, "grad_norm": 0.4554656989283448, "learning_rate": 8.511257805334859e-06, "loss": 0.3355, "step": 2309 }, { "epoch": 0.5497709287796751, "grad_norm": 0.3669973654578771, "learning_rate": 8.509885234602209e-06, "loss": 0.3158, "step": 2310 }, { "epoch": 0.5500089248527399, "grad_norm": 0.391116918799618, "learning_rate": 8.508512142213784e-06, "loss": 0.3917, "step": 2311 }, { "epoch": 0.5502469209258047, "grad_norm": 0.40840152784809935, "learning_rate": 8.507138528373658e-06, "loss": 0.4089, "step": 2312 }, { "epoch": 0.5504849169988695, "grad_norm": 0.39490420844357255, "learning_rate": 8.505764393285985e-06, "loss": 0.3205, "step": 2313 }, { "epoch": 0.5507229130719343, "grad_norm": 0.3845395660774653, "learning_rate": 8.504389737154994e-06, "loss": 0.391, "step": 2314 }, { "epoch": 0.5509609091449991, "grad_norm": 0.3909360241094245, "learning_rate": 8.503014560184994e-06, "loss": 0.4145, "step": 2315 }, { "epoch": 0.5511989052180639, "grad_norm": 0.38762183020768226, "learning_rate": 8.50163886258037e-06, "loss": 0.3754, "step": 2316 }, { "epoch": 0.5514369012911287, "grad_norm": 0.3912728587608663, "learning_rate": 8.500262644545584e-06, "loss": 0.3224, "step": 2317 }, { "epoch": 0.5516748973641935, "grad_norm": 0.37973815027865854, "learning_rate": 8.498885906285177e-06, "loss": 0.3811, "step": 2318 }, { "epoch": 0.5519128934372582, "grad_norm": 0.3579851250106683, "learning_rate": 8.497508648003765e-06, "loss": 0.4085, "step": 2319 }, { "epoch": 0.5521508895103231, "grad_norm": 0.39660478319056564, "learning_rate": 8.496130869906046e-06, "loss": 0.3407, "step": 2320 }, { "epoch": 0.5523888855833878, "grad_norm": 0.47401560455711944, "learning_rate": 8.49475257219679e-06, "loss": 0.3387, "step": 2321 }, { "epoch": 0.5526268816564527, "grad_norm": 0.37354092809224154, "learning_rate": 8.493373755080843e-06, "loss": 0.3914, "step": 2322 }, { "epoch": 0.5528648777295174, "grad_norm": 0.35292148040523397, "learning_rate": 8.491994418763136e-06, "loss": 0.3585, "step": 2323 }, { "epoch": 0.5531028738025823, "grad_norm": 0.5613791127606462, "learning_rate": 8.49061456344867e-06, "loss": 0.2989, "step": 2324 }, { "epoch": 0.553340869875647, "grad_norm": 0.41661056361625665, "learning_rate": 8.489234189342526e-06, "loss": 0.3642, "step": 2325 }, { "epoch": 0.5535788659487119, "grad_norm": 0.3734315892480904, "learning_rate": 8.487853296649861e-06, "loss": 0.4333, "step": 2326 }, { "epoch": 0.5538168620217766, "grad_norm": 0.3674453219548101, "learning_rate": 8.486471885575912e-06, "loss": 0.3408, "step": 2327 }, { "epoch": 0.5540548580948415, "grad_norm": 0.3673572223097309, "learning_rate": 8.48508995632599e-06, "loss": 0.3572, "step": 2328 }, { "epoch": 0.5542928541679062, "grad_norm": 0.4046956426382339, "learning_rate": 8.483707509105483e-06, "loss": 0.3804, "step": 2329 }, { "epoch": 0.554530850240971, "grad_norm": 0.35220241891020276, "learning_rate": 8.482324544119858e-06, "loss": 0.3549, "step": 2330 }, { "epoch": 0.5547688463140358, "grad_norm": 0.4301526345521306, "learning_rate": 8.480941061574656e-06, "loss": 0.3218, "step": 2331 }, { "epoch": 0.5550068423871006, "grad_norm": 0.38794458883354294, "learning_rate": 8.479557061675498e-06, "loss": 0.3864, "step": 2332 }, { "epoch": 0.5552448384601654, "grad_norm": 0.37899037555503806, "learning_rate": 8.478172544628082e-06, "loss": 0.4119, "step": 2333 }, { "epoch": 0.5554828345332302, "grad_norm": 0.4193850656712972, "learning_rate": 8.476787510638179e-06, "loss": 0.362, "step": 2334 }, { "epoch": 0.555720830606295, "grad_norm": 0.37309432031469925, "learning_rate": 8.47540195991164e-06, "loss": 0.3088, "step": 2335 }, { "epoch": 0.5559588266793598, "grad_norm": 0.3870361410415046, "learning_rate": 8.474015892654394e-06, "loss": 0.4003, "step": 2336 }, { "epoch": 0.5561968227524245, "grad_norm": 0.4108399130708448, "learning_rate": 8.472629309072443e-06, "loss": 0.4116, "step": 2337 }, { "epoch": 0.5564348188254894, "grad_norm": 0.38847211210505317, "learning_rate": 8.471242209371867e-06, "loss": 0.3489, "step": 2338 }, { "epoch": 0.5566728148985541, "grad_norm": 0.3632605487642382, "learning_rate": 8.469854593758825e-06, "loss": 0.3579, "step": 2339 }, { "epoch": 0.556910810971619, "grad_norm": 0.36587914178181785, "learning_rate": 8.468466462439549e-06, "loss": 0.4237, "step": 2340 }, { "epoch": 0.5571488070446837, "grad_norm": 0.3955575662234394, "learning_rate": 8.467077815620352e-06, "loss": 0.3263, "step": 2341 }, { "epoch": 0.5573868031177486, "grad_norm": 0.4044906304039566, "learning_rate": 8.46568865350762e-06, "loss": 0.3015, "step": 2342 }, { "epoch": 0.5576247991908133, "grad_norm": 0.3719462975550843, "learning_rate": 8.464298976307816e-06, "loss": 0.3709, "step": 2343 }, { "epoch": 0.5578627952638782, "grad_norm": 0.41922112217020985, "learning_rate": 8.462908784227484e-06, "loss": 0.4194, "step": 2344 }, { "epoch": 0.5581007913369429, "grad_norm": 0.4271019026096489, "learning_rate": 8.461518077473236e-06, "loss": 0.3211, "step": 2345 }, { "epoch": 0.5583387874100078, "grad_norm": 0.401362760699648, "learning_rate": 8.46012685625177e-06, "loss": 0.3347, "step": 2346 }, { "epoch": 0.5585767834830725, "grad_norm": 0.3788197761561941, "learning_rate": 8.458735120769853e-06, "loss": 0.3966, "step": 2347 }, { "epoch": 0.5588147795561373, "grad_norm": 0.37058769198361474, "learning_rate": 8.457342871234331e-06, "loss": 0.3761, "step": 2348 }, { "epoch": 0.5590527756292021, "grad_norm": 0.3701385464501336, "learning_rate": 8.455950107852127e-06, "loss": 0.296, "step": 2349 }, { "epoch": 0.5592907717022669, "grad_norm": 0.39037984018929806, "learning_rate": 8.454556830830242e-06, "loss": 0.3621, "step": 2350 }, { "epoch": 0.5595287677753317, "grad_norm": 0.3678062640318312, "learning_rate": 8.453163040375751e-06, "loss": 0.4257, "step": 2351 }, { "epoch": 0.5597667638483965, "grad_norm": 0.3905948837382555, "learning_rate": 8.451768736695806e-06, "loss": 0.3342, "step": 2352 }, { "epoch": 0.5600047599214613, "grad_norm": 0.4279691685882921, "learning_rate": 8.450373919997633e-06, "loss": 0.3265, "step": 2353 }, { "epoch": 0.5602427559945261, "grad_norm": 0.39184884481824495, "learning_rate": 8.448978590488538e-06, "loss": 0.398, "step": 2354 }, { "epoch": 0.5604807520675908, "grad_norm": 0.3667348181679106, "learning_rate": 8.447582748375899e-06, "loss": 0.3963, "step": 2355 }, { "epoch": 0.5607187481406557, "grad_norm": 0.4044466646021283, "learning_rate": 8.446186393867175e-06, "loss": 0.3226, "step": 2356 }, { "epoch": 0.5609567442137204, "grad_norm": 0.42695375000264973, "learning_rate": 8.444789527169899e-06, "loss": 0.345, "step": 2357 }, { "epoch": 0.5611947402867853, "grad_norm": 0.38004783330852054, "learning_rate": 8.44339214849168e-06, "loss": 0.3978, "step": 2358 }, { "epoch": 0.56143273635985, "grad_norm": 0.3996997964059031, "learning_rate": 8.441994258040202e-06, "loss": 0.3303, "step": 2359 }, { "epoch": 0.5616707324329149, "grad_norm": 0.39510081986039514, "learning_rate": 8.440595856023226e-06, "loss": 0.3358, "step": 2360 }, { "epoch": 0.5619087285059796, "grad_norm": 0.4191208470001434, "learning_rate": 8.439196942648589e-06, "loss": 0.4067, "step": 2361 }, { "epoch": 0.5621467245790445, "grad_norm": 0.4303815244296002, "learning_rate": 8.437797518124205e-06, "loss": 0.4148, "step": 2362 }, { "epoch": 0.5623847206521092, "grad_norm": 0.3989720793075796, "learning_rate": 8.436397582658062e-06, "loss": 0.3123, "step": 2363 }, { "epoch": 0.562622716725174, "grad_norm": 0.3880785671355131, "learning_rate": 8.434997136458227e-06, "loss": 0.3449, "step": 2364 }, { "epoch": 0.5628607127982388, "grad_norm": 0.35392253974214943, "learning_rate": 8.433596179732838e-06, "loss": 0.4147, "step": 2365 }, { "epoch": 0.5630987088713036, "grad_norm": 0.35980860715044094, "learning_rate": 8.432194712690117e-06, "loss": 0.344, "step": 2366 }, { "epoch": 0.5633367049443684, "grad_norm": 0.3585696578371747, "learning_rate": 8.430792735538352e-06, "loss": 0.3067, "step": 2367 }, { "epoch": 0.5635747010174332, "grad_norm": 0.3443163729830074, "learning_rate": 8.429390248485911e-06, "loss": 0.3627, "step": 2368 }, { "epoch": 0.563812697090498, "grad_norm": 0.3649074447807937, "learning_rate": 8.42798725174124e-06, "loss": 0.4325, "step": 2369 }, { "epoch": 0.5640506931635628, "grad_norm": 0.3798587786258275, "learning_rate": 8.426583745512862e-06, "loss": 0.3246, "step": 2370 }, { "epoch": 0.5642886892366276, "grad_norm": 0.40308412350983575, "learning_rate": 8.425179730009368e-06, "loss": 0.3384, "step": 2371 }, { "epoch": 0.5645266853096924, "grad_norm": 0.4049157079324643, "learning_rate": 8.423775205439433e-06, "loss": 0.4236, "step": 2372 }, { "epoch": 0.5647646813827571, "grad_norm": 0.3975137139248059, "learning_rate": 8.4223701720118e-06, "loss": 0.3573, "step": 2373 }, { "epoch": 0.565002677455822, "grad_norm": 0.40160322607563, "learning_rate": 8.420964629935294e-06, "loss": 0.306, "step": 2374 }, { "epoch": 0.5652406735288867, "grad_norm": 0.37308395646020814, "learning_rate": 8.419558579418813e-06, "loss": 0.3918, "step": 2375 }, { "epoch": 0.5654786696019516, "grad_norm": 0.3514852696457898, "learning_rate": 8.418152020671335e-06, "loss": 0.4363, "step": 2376 }, { "epoch": 0.5657166656750163, "grad_norm": 0.6416951229578869, "learning_rate": 8.416744953901904e-06, "loss": 0.3119, "step": 2377 }, { "epoch": 0.5659546617480812, "grad_norm": 0.3436478077704659, "learning_rate": 8.415337379319645e-06, "loss": 0.3405, "step": 2378 }, { "epoch": 0.5661926578211459, "grad_norm": 0.38888307187061655, "learning_rate": 8.41392929713376e-06, "loss": 0.3862, "step": 2379 }, { "epoch": 0.5664306538942108, "grad_norm": 0.4044080349860496, "learning_rate": 8.412520707553527e-06, "loss": 0.4068, "step": 2380 }, { "epoch": 0.5666686499672755, "grad_norm": 0.3738358783673891, "learning_rate": 8.411111610788294e-06, "loss": 0.3103, "step": 2381 }, { "epoch": 0.5669066460403404, "grad_norm": 0.3894619985619245, "learning_rate": 8.40970200704749e-06, "loss": 0.3569, "step": 2382 }, { "epoch": 0.5671446421134051, "grad_norm": 0.37956208132491615, "learning_rate": 8.408291896540613e-06, "loss": 0.4404, "step": 2383 }, { "epoch": 0.56738263818647, "grad_norm": 0.4385341684832731, "learning_rate": 8.406881279477244e-06, "loss": 0.3509, "step": 2384 }, { "epoch": 0.5676206342595347, "grad_norm": 0.43016413455724534, "learning_rate": 8.405470156067038e-06, "loss": 0.3038, "step": 2385 }, { "epoch": 0.5678586303325995, "grad_norm": 0.41401646258681746, "learning_rate": 8.404058526519717e-06, "loss": 0.3866, "step": 2386 }, { "epoch": 0.5680966264056643, "grad_norm": 0.3977676509749422, "learning_rate": 8.402646391045085e-06, "loss": 0.4203, "step": 2387 }, { "epoch": 0.5683346224787291, "grad_norm": 0.47631691258225883, "learning_rate": 8.401233749853024e-06, "loss": 0.3177, "step": 2388 }, { "epoch": 0.5685726185517939, "grad_norm": 0.3878321830383998, "learning_rate": 8.399820603153483e-06, "loss": 0.3613, "step": 2389 }, { "epoch": 0.5688106146248587, "grad_norm": 0.3714565178735775, "learning_rate": 8.398406951156496e-06, "loss": 0.426, "step": 2390 }, { "epoch": 0.5690486106979235, "grad_norm": 0.39492294312602905, "learning_rate": 8.396992794072162e-06, "loss": 0.3286, "step": 2391 }, { "epoch": 0.5692866067709883, "grad_norm": 0.4043641921314243, "learning_rate": 8.395578132110663e-06, "loss": 0.3344, "step": 2392 }, { "epoch": 0.569524602844053, "grad_norm": 0.39639917678624054, "learning_rate": 8.394162965482249e-06, "loss": 0.3757, "step": 2393 }, { "epoch": 0.5697625989171179, "grad_norm": 0.37351452297347937, "learning_rate": 8.39274729439725e-06, "loss": 0.3834, "step": 2394 }, { "epoch": 0.5700005949901826, "grad_norm": 0.4062182778345128, "learning_rate": 8.391331119066071e-06, "loss": 0.304, "step": 2395 }, { "epoch": 0.5702385910632475, "grad_norm": 0.36745005612369885, "learning_rate": 8.389914439699191e-06, "loss": 0.3195, "step": 2396 }, { "epoch": 0.5704765871363122, "grad_norm": 0.3687137278697919, "learning_rate": 8.388497256507163e-06, "loss": 0.3726, "step": 2397 }, { "epoch": 0.5707145832093771, "grad_norm": 0.3792259871749328, "learning_rate": 8.387079569700615e-06, "loss": 0.3311, "step": 2398 }, { "epoch": 0.5709525792824418, "grad_norm": 0.40696921581904394, "learning_rate": 8.38566137949025e-06, "loss": 0.2977, "step": 2399 }, { "epoch": 0.5711905753555067, "grad_norm": 0.3699322815190473, "learning_rate": 8.384242686086848e-06, "loss": 0.3794, "step": 2400 }, { "epoch": 0.5714285714285714, "grad_norm": 0.3826635436528603, "learning_rate": 8.382823489701262e-06, "loss": 0.4304, "step": 2401 }, { "epoch": 0.5716665675016362, "grad_norm": 0.38070938472969035, "learning_rate": 8.381403790544416e-06, "loss": 0.3305, "step": 2402 }, { "epoch": 0.571904563574701, "grad_norm": 0.3868544933608521, "learning_rate": 8.379983588827314e-06, "loss": 0.3067, "step": 2403 }, { "epoch": 0.5721425596477658, "grad_norm": 0.39043334928429774, "learning_rate": 8.378562884761037e-06, "loss": 0.385, "step": 2404 }, { "epoch": 0.5723805557208306, "grad_norm": 0.3953822016500048, "learning_rate": 8.37714167855673e-06, "loss": 0.3968, "step": 2405 }, { "epoch": 0.5726185517938954, "grad_norm": 0.36014258721015646, "learning_rate": 8.375719970425626e-06, "loss": 0.3058, "step": 2406 }, { "epoch": 0.5728565478669602, "grad_norm": 0.38242727883148314, "learning_rate": 8.374297760579024e-06, "loss": 0.3667, "step": 2407 }, { "epoch": 0.573094543940025, "grad_norm": 0.3583190778095351, "learning_rate": 8.372875049228295e-06, "loss": 0.448, "step": 2408 }, { "epoch": 0.5733325400130898, "grad_norm": 0.43781235930710827, "learning_rate": 8.371451836584894e-06, "loss": 0.3132, "step": 2409 }, { "epoch": 0.5735705360861546, "grad_norm": 0.3688130777579906, "learning_rate": 8.370028122860346e-06, "loss": 0.3088, "step": 2410 }, { "epoch": 0.5738085321592193, "grad_norm": 0.398460180753574, "learning_rate": 8.368603908266248e-06, "loss": 0.3805, "step": 2411 }, { "epoch": 0.5740465282322842, "grad_norm": 0.39559802571213665, "learning_rate": 8.367179193014275e-06, "loss": 0.4192, "step": 2412 }, { "epoch": 0.5742845243053489, "grad_norm": 0.41593476553175096, "learning_rate": 8.365753977316171e-06, "loss": 0.3284, "step": 2413 }, { "epoch": 0.5745225203784138, "grad_norm": 0.3718359537773827, "learning_rate": 8.364328261383763e-06, "loss": 0.34, "step": 2414 }, { "epoch": 0.5747605164514785, "grad_norm": 0.3557624684742045, "learning_rate": 8.362902045428945e-06, "loss": 0.4296, "step": 2415 }, { "epoch": 0.5749985125245434, "grad_norm": 0.402514906677194, "learning_rate": 8.36147532966369e-06, "loss": 0.3406, "step": 2416 }, { "epoch": 0.5752365085976081, "grad_norm": 0.39462407875443245, "learning_rate": 8.360048114300041e-06, "loss": 0.3383, "step": 2417 }, { "epoch": 0.575474504670673, "grad_norm": 0.41311640572509384, "learning_rate": 8.35862039955012e-06, "loss": 0.3611, "step": 2418 }, { "epoch": 0.5757125007437377, "grad_norm": 0.3917287001092687, "learning_rate": 8.357192185626118e-06, "loss": 0.4186, "step": 2419 }, { "epoch": 0.5759504968168025, "grad_norm": 0.3467390799072633, "learning_rate": 8.355763472740305e-06, "loss": 0.3304, "step": 2420 }, { "epoch": 0.5761884928898673, "grad_norm": 0.38769794783433975, "learning_rate": 8.354334261105023e-06, "loss": 0.3565, "step": 2421 }, { "epoch": 0.5764264889629321, "grad_norm": 0.4041528840850423, "learning_rate": 8.352904550932687e-06, "loss": 0.4234, "step": 2422 }, { "epoch": 0.5766644850359969, "grad_norm": 0.4100086929644872, "learning_rate": 8.351474342435786e-06, "loss": 0.356, "step": 2423 }, { "epoch": 0.5769024811090617, "grad_norm": 0.36794983188883607, "learning_rate": 8.350043635826888e-06, "loss": 0.3224, "step": 2424 }, { "epoch": 0.5771404771821265, "grad_norm": 0.40729170540747445, "learning_rate": 8.34861243131863e-06, "loss": 0.3484, "step": 2425 }, { "epoch": 0.5773784732551913, "grad_norm": 0.40251376202597505, "learning_rate": 8.347180729123724e-06, "loss": 0.4518, "step": 2426 }, { "epoch": 0.577616469328256, "grad_norm": 0.3852352526845826, "learning_rate": 8.345748529454956e-06, "loss": 0.3455, "step": 2427 }, { "epoch": 0.5778544654013209, "grad_norm": 0.39160730175824005, "learning_rate": 8.344315832525187e-06, "loss": 0.3527, "step": 2428 }, { "epoch": 0.5780924614743856, "grad_norm": 0.38108553823066177, "learning_rate": 8.342882638547351e-06, "loss": 0.4142, "step": 2429 }, { "epoch": 0.5783304575474505, "grad_norm": 0.4092273445218704, "learning_rate": 8.341448947734454e-06, "loss": 0.3947, "step": 2430 }, { "epoch": 0.5785684536205152, "grad_norm": 0.40333249898205964, "learning_rate": 8.340014760299582e-06, "loss": 0.3326, "step": 2431 }, { "epoch": 0.5788064496935801, "grad_norm": 0.3729044861254844, "learning_rate": 8.33858007645589e-06, "loss": 0.3735, "step": 2432 }, { "epoch": 0.5790444457666448, "grad_norm": 0.3671984616197141, "learning_rate": 8.337144896416602e-06, "loss": 0.3966, "step": 2433 }, { "epoch": 0.5792824418397097, "grad_norm": 0.3836501393931116, "learning_rate": 8.335709220395029e-06, "loss": 0.3285, "step": 2434 }, { "epoch": 0.5795204379127744, "grad_norm": 0.35340054131259374, "learning_rate": 8.334273048604541e-06, "loss": 0.3307, "step": 2435 }, { "epoch": 0.5797584339858393, "grad_norm": 0.3439612755818031, "learning_rate": 8.332836381258596e-06, "loss": 0.3899, "step": 2436 }, { "epoch": 0.579996430058904, "grad_norm": 0.3569322611942811, "learning_rate": 8.331399218570711e-06, "loss": 0.4009, "step": 2437 }, { "epoch": 0.5802344261319689, "grad_norm": 0.6662367868543072, "learning_rate": 8.32996156075449e-06, "loss": 0.3147, "step": 2438 }, { "epoch": 0.5804724222050336, "grad_norm": 0.4227515779891762, "learning_rate": 8.328523408023599e-06, "loss": 0.3299, "step": 2439 }, { "epoch": 0.5807104182780984, "grad_norm": 0.37706784919884295, "learning_rate": 8.327084760591788e-06, "loss": 0.4154, "step": 2440 }, { "epoch": 0.5809484143511632, "grad_norm": 0.36959641829833456, "learning_rate": 8.325645618672873e-06, "loss": 0.3566, "step": 2441 }, { "epoch": 0.581186410424228, "grad_norm": 0.38877783129131926, "learning_rate": 8.324205982480747e-06, "loss": 0.3094, "step": 2442 }, { "epoch": 0.5814244064972928, "grad_norm": 0.4093291177503294, "learning_rate": 8.322765852229373e-06, "loss": 0.3788, "step": 2443 }, { "epoch": 0.5816624025703576, "grad_norm": 0.35993564376286113, "learning_rate": 8.321325228132793e-06, "loss": 0.3971, "step": 2444 }, { "epoch": 0.5819003986434224, "grad_norm": 0.37858365267915045, "learning_rate": 8.31988411040512e-06, "loss": 0.3312, "step": 2445 }, { "epoch": 0.5821383947164872, "grad_norm": 0.36232519560188203, "learning_rate": 8.318442499260538e-06, "loss": 0.3209, "step": 2446 }, { "epoch": 0.582376390789552, "grad_norm": 0.37803394847163874, "learning_rate": 8.317000394913304e-06, "loss": 0.4168, "step": 2447 }, { "epoch": 0.5826143868626168, "grad_norm": 0.39433181440916604, "learning_rate": 8.315557797577754e-06, "loss": 0.3812, "step": 2448 }, { "epoch": 0.5828523829356815, "grad_norm": 0.403795746000504, "learning_rate": 8.314114707468293e-06, "loss": 0.2969, "step": 2449 }, { "epoch": 0.5830903790087464, "grad_norm": 0.3611167532562934, "learning_rate": 8.312671124799398e-06, "loss": 0.3553, "step": 2450 }, { "epoch": 0.5833283750818111, "grad_norm": 0.38084432296705717, "learning_rate": 8.311227049785623e-06, "loss": 0.4011, "step": 2451 }, { "epoch": 0.583566371154876, "grad_norm": 0.3655837949285881, "learning_rate": 8.309782482641595e-06, "loss": 0.3379, "step": 2452 }, { "epoch": 0.5838043672279407, "grad_norm": 0.3913423734099842, "learning_rate": 8.308337423582006e-06, "loss": 0.3441, "step": 2453 }, { "epoch": 0.5840423633010056, "grad_norm": 0.39397581704554785, "learning_rate": 8.306891872821635e-06, "loss": 0.3829, "step": 2454 }, { "epoch": 0.5842803593740703, "grad_norm": 0.3843675181792697, "learning_rate": 8.30544583057532e-06, "loss": 0.3929, "step": 2455 }, { "epoch": 0.5845183554471352, "grad_norm": 0.3832844659583936, "learning_rate": 8.303999297057983e-06, "loss": 0.3161, "step": 2456 }, { "epoch": 0.5847563515201999, "grad_norm": 0.39763166006214656, "learning_rate": 8.302552272484613e-06, "loss": 0.3667, "step": 2457 }, { "epoch": 0.5849943475932647, "grad_norm": 0.3585575011269157, "learning_rate": 8.301104757070276e-06, "loss": 0.4027, "step": 2458 }, { "epoch": 0.5852323436663295, "grad_norm": 0.38820380899321066, "learning_rate": 8.299656751030105e-06, "loss": 0.3341, "step": 2459 }, { "epoch": 0.5854703397393943, "grad_norm": 0.3836385113109399, "learning_rate": 8.298208254579311e-06, "loss": 0.3173, "step": 2460 }, { "epoch": 0.5857083358124591, "grad_norm": 0.38574649888799817, "learning_rate": 8.296759267933178e-06, "loss": 0.3862, "step": 2461 }, { "epoch": 0.5859463318855239, "grad_norm": 0.3684542994428625, "learning_rate": 8.29530979130706e-06, "loss": 0.3995, "step": 2462 }, { "epoch": 0.5861843279585887, "grad_norm": 0.39106683323722646, "learning_rate": 8.293859824916383e-06, "loss": 0.3097, "step": 2463 }, { "epoch": 0.5864223240316535, "grad_norm": 0.4653396505467396, "learning_rate": 8.29240936897665e-06, "loss": 0.3342, "step": 2464 }, { "epoch": 0.5866603201047182, "grad_norm": 0.3895719288501651, "learning_rate": 8.290958423703437e-06, "loss": 0.4107, "step": 2465 }, { "epoch": 0.5868983161777831, "grad_norm": 0.4018410614287424, "learning_rate": 8.289506989312386e-06, "loss": 0.3257, "step": 2466 }, { "epoch": 0.5871363122508478, "grad_norm": 0.3944410384951886, "learning_rate": 8.288055066019218e-06, "loss": 0.3238, "step": 2467 }, { "epoch": 0.5873743083239127, "grad_norm": 0.38792256065200875, "learning_rate": 8.286602654039724e-06, "loss": 0.3826, "step": 2468 }, { "epoch": 0.5876123043969774, "grad_norm": 0.38502496740303777, "learning_rate": 8.28514975358977e-06, "loss": 0.3997, "step": 2469 }, { "epoch": 0.5878503004700423, "grad_norm": 0.37703883930285037, "learning_rate": 8.283696364885293e-06, "loss": 0.3356, "step": 2470 }, { "epoch": 0.588088296543107, "grad_norm": 0.3861610449357546, "learning_rate": 8.282242488142299e-06, "loss": 0.3348, "step": 2471 }, { "epoch": 0.5883262926161719, "grad_norm": 0.3780090152003883, "learning_rate": 8.280788123576873e-06, "loss": 0.4045, "step": 2472 }, { "epoch": 0.5885642886892366, "grad_norm": 0.3801772021397056, "learning_rate": 8.279333271405171e-06, "loss": 0.3941, "step": 2473 }, { "epoch": 0.5888022847623015, "grad_norm": 0.3903145537175543, "learning_rate": 8.277877931843417e-06, "loss": 0.32, "step": 2474 }, { "epoch": 0.5890402808353662, "grad_norm": 0.3762448521852173, "learning_rate": 8.276422105107911e-06, "loss": 0.3519, "step": 2475 }, { "epoch": 0.589278276908431, "grad_norm": 0.4024863557944298, "learning_rate": 8.274965791415026e-06, "loss": 0.4126, "step": 2476 }, { "epoch": 0.5895162729814958, "grad_norm": 0.3810042080891356, "learning_rate": 8.273508990981206e-06, "loss": 0.3461, "step": 2477 }, { "epoch": 0.5897542690545606, "grad_norm": 0.39863438603953905, "learning_rate": 8.272051704022965e-06, "loss": 0.3328, "step": 2478 }, { "epoch": 0.5899922651276254, "grad_norm": 0.3867478091098192, "learning_rate": 8.270593930756897e-06, "loss": 0.377, "step": 2479 }, { "epoch": 0.5902302612006902, "grad_norm": 0.3879898329337162, "learning_rate": 8.269135671399659e-06, "loss": 0.3646, "step": 2480 }, { "epoch": 0.590468257273755, "grad_norm": 0.36772292163364306, "learning_rate": 8.267676926167986e-06, "loss": 0.3086, "step": 2481 }, { "epoch": 0.5907062533468198, "grad_norm": 0.3565818145381248, "learning_rate": 8.266217695278682e-06, "loss": 0.355, "step": 2482 }, { "epoch": 0.5909442494198845, "grad_norm": 0.37046723786837815, "learning_rate": 8.264757978948627e-06, "loss": 0.4076, "step": 2483 }, { "epoch": 0.5911822454929494, "grad_norm": 0.4249706612993194, "learning_rate": 8.263297777394772e-06, "loss": 0.3327, "step": 2484 }, { "epoch": 0.5914202415660141, "grad_norm": 0.3951802875165859, "learning_rate": 8.261837090834135e-06, "loss": 0.3376, "step": 2485 }, { "epoch": 0.591658237639079, "grad_norm": 0.3673787092456175, "learning_rate": 8.260375919483812e-06, "loss": 0.3509, "step": 2486 }, { "epoch": 0.5918962337121437, "grad_norm": 0.3896876553874139, "learning_rate": 8.258914263560971e-06, "loss": 0.4089, "step": 2487 }, { "epoch": 0.5921342297852086, "grad_norm": 0.37425201217841664, "learning_rate": 8.257452123282847e-06, "loss": 0.3259, "step": 2488 }, { "epoch": 0.5923722258582733, "grad_norm": 0.3859687694225435, "learning_rate": 8.255989498866754e-06, "loss": 0.3243, "step": 2489 }, { "epoch": 0.5926102219313382, "grad_norm": 0.3708026305032604, "learning_rate": 8.254526390530071e-06, "loss": 0.3954, "step": 2490 }, { "epoch": 0.5928482180044029, "grad_norm": 0.3794998880457793, "learning_rate": 8.253062798490255e-06, "loss": 0.3535, "step": 2491 }, { "epoch": 0.5930862140774678, "grad_norm": 0.4489229665961381, "learning_rate": 8.251598722964828e-06, "loss": 0.3371, "step": 2492 }, { "epoch": 0.5933242101505325, "grad_norm": 0.3631137000424927, "learning_rate": 8.250134164171391e-06, "loss": 0.3718, "step": 2493 }, { "epoch": 0.5935622062235973, "grad_norm": 0.3527479506637244, "learning_rate": 8.248669122327612e-06, "loss": 0.3933, "step": 2494 }, { "epoch": 0.5938002022966621, "grad_norm": 0.3742555835211287, "learning_rate": 8.247203597651234e-06, "loss": 0.3274, "step": 2495 }, { "epoch": 0.5940381983697269, "grad_norm": 0.4308062065966235, "learning_rate": 8.24573759036007e-06, "loss": 0.3666, "step": 2496 }, { "epoch": 0.5942761944427917, "grad_norm": 0.3966932502889181, "learning_rate": 8.244271100672004e-06, "loss": 0.4066, "step": 2497 }, { "epoch": 0.5945141905158565, "grad_norm": 0.35946849534322295, "learning_rate": 8.242804128804993e-06, "loss": 0.3555, "step": 2498 }, { "epoch": 0.5947521865889213, "grad_norm": 0.37828665852870985, "learning_rate": 8.241336674977064e-06, "loss": 0.3126, "step": 2499 }, { "epoch": 0.5949901826619861, "grad_norm": 0.38560613627125817, "learning_rate": 8.23986873940632e-06, "loss": 0.3595, "step": 2500 }, { "epoch": 0.5952281787350509, "grad_norm": 0.3933069848593732, "learning_rate": 8.238400322310931e-06, "loss": 0.4599, "step": 2501 }, { "epoch": 0.5954661748081157, "grad_norm": 0.37906655075786916, "learning_rate": 8.23693142390914e-06, "loss": 0.3189, "step": 2502 }, { "epoch": 0.5957041708811804, "grad_norm": 0.3597221601036792, "learning_rate": 8.23546204441926e-06, "loss": 0.3001, "step": 2503 }, { "epoch": 0.5959421669542453, "grad_norm": 0.36715814207073877, "learning_rate": 8.233992184059681e-06, "loss": 0.3793, "step": 2504 }, { "epoch": 0.59618016302731, "grad_norm": 0.37986642282050925, "learning_rate": 8.232521843048855e-06, "loss": 0.378, "step": 2505 }, { "epoch": 0.5964181591003749, "grad_norm": 0.38641730071730823, "learning_rate": 8.231051021605316e-06, "loss": 0.3321, "step": 2506 }, { "epoch": 0.5966561551734396, "grad_norm": 0.4161340087618939, "learning_rate": 8.229579719947664e-06, "loss": 0.3855, "step": 2507 }, { "epoch": 0.5968941512465045, "grad_norm": 0.3557911482780271, "learning_rate": 8.228107938294568e-06, "loss": 0.3944, "step": 2508 }, { "epoch": 0.5971321473195692, "grad_norm": 0.37982110625589854, "learning_rate": 8.226635676864774e-06, "loss": 0.3175, "step": 2509 }, { "epoch": 0.5973701433926341, "grad_norm": 0.37294853078019546, "learning_rate": 8.225162935877096e-06, "loss": 0.3159, "step": 2510 }, { "epoch": 0.5976081394656988, "grad_norm": 0.3732432601966647, "learning_rate": 8.223689715550417e-06, "loss": 0.4083, "step": 2511 }, { "epoch": 0.5978461355387636, "grad_norm": 0.37372480614911496, "learning_rate": 8.222216016103697e-06, "loss": 0.4262, "step": 2512 }, { "epoch": 0.5980841316118284, "grad_norm": 0.35343278143642487, "learning_rate": 8.220741837755964e-06, "loss": 0.3131, "step": 2513 }, { "epoch": 0.5983221276848932, "grad_norm": 0.36855420142012446, "learning_rate": 8.219267180726315e-06, "loss": 0.3692, "step": 2514 }, { "epoch": 0.598560123757958, "grad_norm": 0.3994967531361665, "learning_rate": 8.217792045233924e-06, "loss": 0.3973, "step": 2515 }, { "epoch": 0.5987981198310228, "grad_norm": 0.4108525006798571, "learning_rate": 8.216316431498028e-06, "loss": 0.3718, "step": 2516 }, { "epoch": 0.5990361159040876, "grad_norm": 0.40662138405492826, "learning_rate": 8.214840339737943e-06, "loss": 0.2975, "step": 2517 }, { "epoch": 0.5992741119771524, "grad_norm": 0.3728366285892858, "learning_rate": 8.213363770173054e-06, "loss": 0.3559, "step": 2518 }, { "epoch": 0.5995121080502172, "grad_norm": 0.37944723614507436, "learning_rate": 8.211886723022814e-06, "loss": 0.4264, "step": 2519 }, { "epoch": 0.599750104123282, "grad_norm": 0.3611408857103594, "learning_rate": 8.210409198506748e-06, "loss": 0.3163, "step": 2520 }, { "epoch": 0.5999881001963467, "grad_norm": 0.3729043334589071, "learning_rate": 8.208931196844453e-06, "loss": 0.3315, "step": 2521 }, { "epoch": 0.6002260962694116, "grad_norm": 0.39851502121087623, "learning_rate": 8.207452718255597e-06, "loss": 0.4049, "step": 2522 }, { "epoch": 0.6004640923424763, "grad_norm": 0.402522005091446, "learning_rate": 8.20597376295992e-06, "loss": 0.3749, "step": 2523 }, { "epoch": 0.6007020884155412, "grad_norm": 0.4137714780671095, "learning_rate": 8.204494331177229e-06, "loss": 0.3014, "step": 2524 }, { "epoch": 0.6009400844886059, "grad_norm": 0.3901604913593569, "learning_rate": 8.203014423127405e-06, "loss": 0.3693, "step": 2525 }, { "epoch": 0.6011780805616708, "grad_norm": 0.3839384353345892, "learning_rate": 8.201534039030398e-06, "loss": 0.4382, "step": 2526 }, { "epoch": 0.6014160766347355, "grad_norm": 0.39209431032725645, "learning_rate": 8.20005317910623e-06, "loss": 0.3409, "step": 2527 }, { "epoch": 0.6016540727078004, "grad_norm": 0.39203793684317206, "learning_rate": 8.198571843574997e-06, "loss": 0.3048, "step": 2528 }, { "epoch": 0.6018920687808651, "grad_norm": 0.3696671599005409, "learning_rate": 8.197090032656858e-06, "loss": 0.4085, "step": 2529 }, { "epoch": 0.60213006485393, "grad_norm": 0.36909421514297636, "learning_rate": 8.195607746572047e-06, "loss": 0.3601, "step": 2530 }, { "epoch": 0.6023680609269947, "grad_norm": 0.4061715297820262, "learning_rate": 8.19412498554087e-06, "loss": 0.3074, "step": 2531 }, { "epoch": 0.6026060570000595, "grad_norm": 0.42169562952568157, "learning_rate": 8.192641749783703e-06, "loss": 0.3869, "step": 2532 }, { "epoch": 0.6028440530731243, "grad_norm": 0.37904313486242136, "learning_rate": 8.191158039520986e-06, "loss": 0.4187, "step": 2533 }, { "epoch": 0.6030820491461891, "grad_norm": 0.3933415457883819, "learning_rate": 8.18967385497324e-06, "loss": 0.3349, "step": 2534 }, { "epoch": 0.6033200452192539, "grad_norm": 0.37746188889384785, "learning_rate": 8.188189196361052e-06, "loss": 0.3126, "step": 2535 }, { "epoch": 0.6035580412923187, "grad_norm": 0.37077728868779936, "learning_rate": 8.186704063905078e-06, "loss": 0.3702, "step": 2536 }, { "epoch": 0.6037960373653835, "grad_norm": 0.384234124457375, "learning_rate": 8.185218457826043e-06, "loss": 0.4062, "step": 2537 }, { "epoch": 0.6040340334384483, "grad_norm": 8.292846660801779, "learning_rate": 8.183732378344747e-06, "loss": 0.3263, "step": 2538 }, { "epoch": 0.604272029511513, "grad_norm": 0.3985849899773034, "learning_rate": 8.18224582568206e-06, "loss": 0.3693, "step": 2539 }, { "epoch": 0.6045100255845779, "grad_norm": 0.36445215405013764, "learning_rate": 8.180758800058914e-06, "loss": 0.3774, "step": 2540 }, { "epoch": 0.6047480216576426, "grad_norm": 0.38537810047531207, "learning_rate": 8.179271301696326e-06, "loss": 0.3585, "step": 2541 }, { "epoch": 0.6049860177307075, "grad_norm": 0.3722062094603994, "learning_rate": 8.17778333081537e-06, "loss": 0.3261, "step": 2542 }, { "epoch": 0.6052240138037722, "grad_norm": 0.43085688766278635, "learning_rate": 8.176294887637195e-06, "loss": 0.3491, "step": 2543 }, { "epoch": 0.6054620098768371, "grad_norm": 0.4080559076822167, "learning_rate": 8.174805972383024e-06, "loss": 0.3951, "step": 2544 }, { "epoch": 0.6057000059499018, "grad_norm": 0.37312331547160477, "learning_rate": 8.173316585274144e-06, "loss": 0.3025, "step": 2545 }, { "epoch": 0.6059380020229667, "grad_norm": 0.3667533329998851, "learning_rate": 8.171826726531916e-06, "loss": 0.3169, "step": 2546 }, { "epoch": 0.6061759980960314, "grad_norm": 0.3893058682249162, "learning_rate": 8.170336396377767e-06, "loss": 0.4053, "step": 2547 }, { "epoch": 0.6064139941690962, "grad_norm": 0.36637391718899653, "learning_rate": 8.168845595033202e-06, "loss": 0.3559, "step": 2548 }, { "epoch": 0.606651990242161, "grad_norm": 0.41529994785116214, "learning_rate": 8.167354322719785e-06, "loss": 0.3117, "step": 2549 }, { "epoch": 0.6068899863152258, "grad_norm": 0.38610454842175557, "learning_rate": 8.165862579659161e-06, "loss": 0.3633, "step": 2550 }, { "epoch": 0.6071279823882906, "grad_norm": 0.3753416629984086, "learning_rate": 8.164370366073038e-06, "loss": 0.4248, "step": 2551 }, { "epoch": 0.6073659784613554, "grad_norm": 0.394779197165333, "learning_rate": 8.162877682183197e-06, "loss": 0.3117, "step": 2552 }, { "epoch": 0.6076039745344202, "grad_norm": 0.4086616702369126, "learning_rate": 8.161384528211485e-06, "loss": 0.3191, "step": 2553 }, { "epoch": 0.607841970607485, "grad_norm": 0.3788225625439413, "learning_rate": 8.159890904379823e-06, "loss": 0.3859, "step": 2554 }, { "epoch": 0.6080799666805498, "grad_norm": 0.38335294969113787, "learning_rate": 8.158396810910201e-06, "loss": 0.3708, "step": 2555 }, { "epoch": 0.6083179627536146, "grad_norm": 0.38664017085300634, "learning_rate": 8.156902248024678e-06, "loss": 0.3318, "step": 2556 }, { "epoch": 0.6085559588266793, "grad_norm": 0.38453529856278706, "learning_rate": 8.155407215945382e-06, "loss": 0.3584, "step": 2557 }, { "epoch": 0.6087939548997442, "grad_norm": 0.42462216171053496, "learning_rate": 8.153911714894513e-06, "loss": 0.4105, "step": 2558 }, { "epoch": 0.6090319509728089, "grad_norm": 0.39687180601172, "learning_rate": 8.152415745094342e-06, "loss": 0.3442, "step": 2559 }, { "epoch": 0.6092699470458738, "grad_norm": 0.4136807644278921, "learning_rate": 8.150919306767202e-06, "loss": 0.3311, "step": 2560 }, { "epoch": 0.6095079431189385, "grad_norm": 0.35777343605244677, "learning_rate": 8.149422400135503e-06, "loss": 0.3723, "step": 2561 }, { "epoch": 0.6097459391920034, "grad_norm": 0.4469538872661931, "learning_rate": 8.14792502542172e-06, "loss": 0.3759, "step": 2562 }, { "epoch": 0.6099839352650681, "grad_norm": 0.39114145249451315, "learning_rate": 8.146427182848407e-06, "loss": 0.2963, "step": 2563 }, { "epoch": 0.610221931338133, "grad_norm": 0.39846643949056665, "learning_rate": 8.144928872638174e-06, "loss": 0.3149, "step": 2564 }, { "epoch": 0.6104599274111977, "grad_norm": 0.39040261177798785, "learning_rate": 8.143430095013706e-06, "loss": 0.3969, "step": 2565 }, { "epoch": 0.6106979234842626, "grad_norm": 0.3735836629849792, "learning_rate": 8.141930850197765e-06, "loss": 0.3382, "step": 2566 }, { "epoch": 0.6109359195573273, "grad_norm": 0.3736061629320796, "learning_rate": 8.14043113841317e-06, "loss": 0.307, "step": 2567 }, { "epoch": 0.6111739156303921, "grad_norm": 0.35064240017216614, "learning_rate": 8.138930959882818e-06, "loss": 0.3508, "step": 2568 }, { "epoch": 0.6114119117034569, "grad_norm": 0.39486353842354627, "learning_rate": 8.137430314829671e-06, "loss": 0.4483, "step": 2569 }, { "epoch": 0.6116499077765217, "grad_norm": 0.3759570099382165, "learning_rate": 8.135929203476764e-06, "loss": 0.343, "step": 2570 }, { "epoch": 0.6118879038495865, "grad_norm": 0.376912992715495, "learning_rate": 8.134427626047198e-06, "loss": 0.3342, "step": 2571 }, { "epoch": 0.6121258999226513, "grad_norm": 0.3675782252622046, "learning_rate": 8.132925582764144e-06, "loss": 0.4009, "step": 2572 }, { "epoch": 0.6123638959957161, "grad_norm": 0.37894873028788706, "learning_rate": 8.131423073850845e-06, "loss": 0.3797, "step": 2573 }, { "epoch": 0.6126018920687809, "grad_norm": 0.41501690842186384, "learning_rate": 8.129920099530608e-06, "loss": 0.3149, "step": 2574 }, { "epoch": 0.6128398881418456, "grad_norm": 0.3901236279329649, "learning_rate": 8.128416660026816e-06, "loss": 0.3559, "step": 2575 }, { "epoch": 0.6130778842149105, "grad_norm": 0.36410270538740547, "learning_rate": 8.126912755562913e-06, "loss": 0.4105, "step": 2576 }, { "epoch": 0.6133158802879752, "grad_norm": 0.40445769613053906, "learning_rate": 8.125408386362419e-06, "loss": 0.3565, "step": 2577 }, { "epoch": 0.6135538763610401, "grad_norm": 0.37585605678511325, "learning_rate": 8.12390355264892e-06, "loss": 0.299, "step": 2578 }, { "epoch": 0.6137918724341048, "grad_norm": 0.35091890724717334, "learning_rate": 8.122398254646071e-06, "loss": 0.3952, "step": 2579 }, { "epoch": 0.6140298685071697, "grad_norm": 0.3858838022367494, "learning_rate": 8.120892492577598e-06, "loss": 0.4277, "step": 2580 }, { "epoch": 0.6142678645802344, "grad_norm": 0.39895986019373647, "learning_rate": 8.119386266667292e-06, "loss": 0.3109, "step": 2581 }, { "epoch": 0.6145058606532993, "grad_norm": 0.4111713762728162, "learning_rate": 8.117879577139019e-06, "loss": 0.3601, "step": 2582 }, { "epoch": 0.614743856726364, "grad_norm": 0.3873631664853874, "learning_rate": 8.116372424216705e-06, "loss": 0.4458, "step": 2583 }, { "epoch": 0.6149818527994289, "grad_norm": 0.3810771917479411, "learning_rate": 8.114864808124356e-06, "loss": 0.296, "step": 2584 }, { "epoch": 0.6152198488724936, "grad_norm": 0.39306780621427023, "learning_rate": 8.113356729086038e-06, "loss": 0.3077, "step": 2585 }, { "epoch": 0.6154578449455584, "grad_norm": 0.3857981143058693, "learning_rate": 8.111848187325889e-06, "loss": 0.3732, "step": 2586 }, { "epoch": 0.6156958410186232, "grad_norm": 0.37904552046137224, "learning_rate": 8.110339183068117e-06, "loss": 0.3811, "step": 2587 }, { "epoch": 0.615933837091688, "grad_norm": 0.35684929770087614, "learning_rate": 8.108829716536993e-06, "loss": 0.3357, "step": 2588 }, { "epoch": 0.6161718331647528, "grad_norm": 0.37821384127046404, "learning_rate": 8.107319787956866e-06, "loss": 0.3508, "step": 2589 }, { "epoch": 0.6164098292378176, "grad_norm": 0.3574697327965667, "learning_rate": 8.105809397552148e-06, "loss": 0.4393, "step": 2590 }, { "epoch": 0.6166478253108824, "grad_norm": 0.39258448327977785, "learning_rate": 8.10429854554732e-06, "loss": 0.3101, "step": 2591 }, { "epoch": 0.6168858213839472, "grad_norm": 0.3888467385192024, "learning_rate": 8.10278723216693e-06, "loss": 0.3177, "step": 2592 }, { "epoch": 0.617123817457012, "grad_norm": 0.3852959961633021, "learning_rate": 8.101275457635601e-06, "loss": 0.3663, "step": 2593 }, { "epoch": 0.6173618135300768, "grad_norm": 0.40121539043640597, "learning_rate": 8.099763222178015e-06, "loss": 0.405, "step": 2594 }, { "epoch": 0.6175998096031415, "grad_norm": 0.4081613317939108, "learning_rate": 8.098250526018927e-06, "loss": 0.3019, "step": 2595 }, { "epoch": 0.6178378056762064, "grad_norm": 0.45909803930292775, "learning_rate": 8.096737369383167e-06, "loss": 0.339, "step": 2596 }, { "epoch": 0.6180758017492711, "grad_norm": 0.39288919774336073, "learning_rate": 8.095223752495625e-06, "loss": 0.3849, "step": 2597 }, { "epoch": 0.618313797822336, "grad_norm": 0.40645028794870114, "learning_rate": 8.09370967558126e-06, "loss": 0.3484, "step": 2598 }, { "epoch": 0.6185517938954007, "grad_norm": 0.36773838171647716, "learning_rate": 8.092195138865102e-06, "loss": 0.3205, "step": 2599 }, { "epoch": 0.6187897899684656, "grad_norm": 0.39094679473606364, "learning_rate": 8.090680142572251e-06, "loss": 0.3719, "step": 2600 }, { "epoch": 0.6190277860415303, "grad_norm": 0.37593142152302145, "learning_rate": 8.089164686927869e-06, "loss": 0.4178, "step": 2601 }, { "epoch": 0.6192657821145952, "grad_norm": 0.3875234390518961, "learning_rate": 8.087648772157193e-06, "loss": 0.3594, "step": 2602 }, { "epoch": 0.6195037781876599, "grad_norm": 0.3937548041329991, "learning_rate": 8.086132398485525e-06, "loss": 0.3272, "step": 2603 }, { "epoch": 0.6197417742607247, "grad_norm": 0.4882360341404345, "learning_rate": 8.084615566138234e-06, "loss": 0.3972, "step": 2604 }, { "epoch": 0.6199797703337895, "grad_norm": 0.3644617243184282, "learning_rate": 8.083098275340762e-06, "loss": 0.3796, "step": 2605 }, { "epoch": 0.6202177664068543, "grad_norm": 0.3464988959256686, "learning_rate": 8.081580526318614e-06, "loss": 0.3145, "step": 2606 }, { "epoch": 0.6204557624799191, "grad_norm": 0.37402621710256423, "learning_rate": 8.080062319297364e-06, "loss": 0.3315, "step": 2607 }, { "epoch": 0.6206937585529839, "grad_norm": 0.40711958296792816, "learning_rate": 8.078543654502656e-06, "loss": 0.4, "step": 2608 }, { "epoch": 0.6209317546260487, "grad_norm": 0.36492939402646146, "learning_rate": 8.077024532160202e-06, "loss": 0.3082, "step": 2609 }, { "epoch": 0.6211697506991135, "grad_norm": 0.37941276134920326, "learning_rate": 8.075504952495781e-06, "loss": 0.3377, "step": 2610 }, { "epoch": 0.6214077467721782, "grad_norm": 0.4020215489655943, "learning_rate": 8.07398491573524e-06, "loss": 0.3639, "step": 2611 }, { "epoch": 0.6216457428452431, "grad_norm": 0.4020370198246242, "learning_rate": 8.072464422104493e-06, "loss": 0.4031, "step": 2612 }, { "epoch": 0.6218837389183078, "grad_norm": 0.4204655833624215, "learning_rate": 8.070943471829524e-06, "loss": 0.2973, "step": 2613 }, { "epoch": 0.6221217349913727, "grad_norm": 0.4006137318758786, "learning_rate": 8.069422065136386e-06, "loss": 0.3611, "step": 2614 }, { "epoch": 0.6223597310644374, "grad_norm": 0.3662514094870652, "learning_rate": 8.067900202251191e-06, "loss": 0.397, "step": 2615 }, { "epoch": 0.6225977271375023, "grad_norm": 0.44999481000949004, "learning_rate": 8.066377883400132e-06, "loss": 0.3546, "step": 2616 }, { "epoch": 0.622835723210567, "grad_norm": 0.364463764085942, "learning_rate": 8.064855108809461e-06, "loss": 0.2948, "step": 2617 }, { "epoch": 0.6230737192836319, "grad_norm": 0.3964833253922973, "learning_rate": 8.063331878705499e-06, "loss": 0.3764, "step": 2618 }, { "epoch": 0.6233117153566966, "grad_norm": 0.3514192516820972, "learning_rate": 8.061808193314638e-06, "loss": 0.4172, "step": 2619 }, { "epoch": 0.6235497114297615, "grad_norm": 0.3548064383030754, "learning_rate": 8.060284052863334e-06, "loss": 0.3202, "step": 2620 }, { "epoch": 0.6237877075028262, "grad_norm": 0.38064126138563015, "learning_rate": 8.05875945757811e-06, "loss": 0.3158, "step": 2621 }, { "epoch": 0.624025703575891, "grad_norm": 0.3849786777328987, "learning_rate": 8.057234407685563e-06, "loss": 0.3875, "step": 2622 }, { "epoch": 0.6242636996489558, "grad_norm": 0.3737306119830634, "learning_rate": 8.05570890341235e-06, "loss": 0.3512, "step": 2623 }, { "epoch": 0.6245016957220206, "grad_norm": 0.4064964030178299, "learning_rate": 8.054182944985198e-06, "loss": 0.3145, "step": 2624 }, { "epoch": 0.6247396917950854, "grad_norm": 0.38133945827871857, "learning_rate": 8.052656532630905e-06, "loss": 0.36, "step": 2625 }, { "epoch": 0.6249776878681502, "grad_norm": 0.34831792324190414, "learning_rate": 8.051129666576331e-06, "loss": 0.4264, "step": 2626 }, { "epoch": 0.625215683941215, "grad_norm": 0.38814660085579894, "learning_rate": 8.049602347048408e-06, "loss": 0.3214, "step": 2627 }, { "epoch": 0.6254536800142798, "grad_norm": 0.4292810547509034, "learning_rate": 8.048074574274132e-06, "loss": 0.3469, "step": 2628 }, { "epoch": 0.6256916760873446, "grad_norm": 0.36341426198547644, "learning_rate": 8.04654634848057e-06, "loss": 0.3922, "step": 2629 }, { "epoch": 0.6259296721604094, "grad_norm": 0.3821109148057445, "learning_rate": 8.045017669894851e-06, "loss": 0.3777, "step": 2630 }, { "epoch": 0.6261676682334741, "grad_norm": 0.37289488804653775, "learning_rate": 8.043488538744177e-06, "loss": 0.2943, "step": 2631 }, { "epoch": 0.626405664306539, "grad_norm": 0.37258093559025074, "learning_rate": 8.041958955255815e-06, "loss": 0.342, "step": 2632 }, { "epoch": 0.6266436603796037, "grad_norm": 0.3486184579114556, "learning_rate": 8.040428919657095e-06, "loss": 0.4144, "step": 2633 }, { "epoch": 0.6268816564526686, "grad_norm": 0.3741035400050582, "learning_rate": 8.038898432175424e-06, "loss": 0.3246, "step": 2634 }, { "epoch": 0.6271196525257333, "grad_norm": 0.40892825888298057, "learning_rate": 8.037367493038265e-06, "loss": 0.3332, "step": 2635 }, { "epoch": 0.6273576485987982, "grad_norm": 0.36522383480533593, "learning_rate": 8.035836102473155e-06, "loss": 0.3566, "step": 2636 }, { "epoch": 0.6275956446718629, "grad_norm": 0.3652591745540261, "learning_rate": 8.0343042607077e-06, "loss": 0.3765, "step": 2637 }, { "epoch": 0.6278336407449278, "grad_norm": 0.35844676767075323, "learning_rate": 8.032771967969566e-06, "loss": 0.3158, "step": 2638 }, { "epoch": 0.6280716368179925, "grad_norm": 0.3931558454911546, "learning_rate": 8.03123922448649e-06, "loss": 0.3568, "step": 2639 }, { "epoch": 0.6283096328910573, "grad_norm": 0.36411863899036223, "learning_rate": 8.029706030486274e-06, "loss": 0.4586, "step": 2640 }, { "epoch": 0.6285476289641221, "grad_norm": 0.3796092362122607, "learning_rate": 8.028172386196794e-06, "loss": 0.372, "step": 2641 }, { "epoch": 0.6287856250371869, "grad_norm": 0.3761030511051456, "learning_rate": 8.026638291845982e-06, "loss": 0.3092, "step": 2642 }, { "epoch": 0.6290236211102517, "grad_norm": 0.3699761519507502, "learning_rate": 8.025103747661844e-06, "loss": 0.3805, "step": 2643 }, { "epoch": 0.6292616171833165, "grad_norm": 0.373140231509292, "learning_rate": 8.023568753872453e-06, "loss": 0.388, "step": 2644 }, { "epoch": 0.6294996132563813, "grad_norm": 0.3637236022266386, "learning_rate": 8.022033310705946e-06, "loss": 0.3184, "step": 2645 }, { "epoch": 0.6297376093294461, "grad_norm": 0.3915776794451119, "learning_rate": 8.020497418390527e-06, "loss": 0.3349, "step": 2646 }, { "epoch": 0.6299756054025109, "grad_norm": 0.36532602213156506, "learning_rate": 8.018961077154468e-06, "loss": 0.3957, "step": 2647 }, { "epoch": 0.6302136014755757, "grad_norm": 0.4140563457038236, "learning_rate": 8.017424287226107e-06, "loss": 0.415, "step": 2648 }, { "epoch": 0.6304515975486404, "grad_norm": 0.3769113334994522, "learning_rate": 8.01588704883385e-06, "loss": 0.3145, "step": 2649 }, { "epoch": 0.6306895936217053, "grad_norm": 1.5368218754503258, "learning_rate": 8.014349362206167e-06, "loss": 0.374, "step": 2650 }, { "epoch": 0.63092758969477, "grad_norm": 0.38350013956172013, "learning_rate": 8.012811227571597e-06, "loss": 0.4003, "step": 2651 }, { "epoch": 0.6311655857678349, "grad_norm": 0.3866132408928916, "learning_rate": 8.011272645158747e-06, "loss": 0.3235, "step": 2652 }, { "epoch": 0.6314035818408996, "grad_norm": 0.40608371668229537, "learning_rate": 8.009733615196287e-06, "loss": 0.3244, "step": 2653 }, { "epoch": 0.6316415779139645, "grad_norm": 0.38096062005900233, "learning_rate": 8.008194137912955e-06, "loss": 0.412, "step": 2654 }, { "epoch": 0.6318795739870292, "grad_norm": 0.3862436056336853, "learning_rate": 8.006654213537553e-06, "loss": 0.3634, "step": 2655 }, { "epoch": 0.6321175700600941, "grad_norm": 0.3976502335701635, "learning_rate": 8.005113842298954e-06, "loss": 0.3234, "step": 2656 }, { "epoch": 0.6323555661331588, "grad_norm": 0.39127711057264086, "learning_rate": 8.003573024426094e-06, "loss": 0.3673, "step": 2657 }, { "epoch": 0.6325935622062236, "grad_norm": 0.3896004400805801, "learning_rate": 8.00203176014798e-06, "loss": 0.4458, "step": 2658 }, { "epoch": 0.6328315582792884, "grad_norm": 0.46398616972299095, "learning_rate": 8.000490049693678e-06, "loss": 0.3429, "step": 2659 }, { "epoch": 0.6330695543523532, "grad_norm": 0.46831349058115457, "learning_rate": 7.998947893292328e-06, "loss": 0.3135, "step": 2660 }, { "epoch": 0.633307550425418, "grad_norm": 0.4258334551178332, "learning_rate": 7.99740529117313e-06, "loss": 0.381, "step": 2661 }, { "epoch": 0.6335455464984828, "grad_norm": 0.413976046137531, "learning_rate": 7.995862243565352e-06, "loss": 0.4138, "step": 2662 }, { "epoch": 0.6337835425715476, "grad_norm": 0.43240881559747, "learning_rate": 7.994318750698333e-06, "loss": 0.3083, "step": 2663 }, { "epoch": 0.6340215386446124, "grad_norm": 0.4246524529086203, "learning_rate": 7.99277481280147e-06, "loss": 0.3523, "step": 2664 }, { "epoch": 0.6342595347176772, "grad_norm": 0.38578881152202554, "learning_rate": 7.991230430104233e-06, "loss": 0.3935, "step": 2665 }, { "epoch": 0.634497530790742, "grad_norm": 0.7982507745714851, "learning_rate": 7.989685602836155e-06, "loss": 0.3685, "step": 2666 }, { "epoch": 0.6347355268638067, "grad_norm": 0.39291885711093316, "learning_rate": 7.988140331226835e-06, "loss": 0.302, "step": 2667 }, { "epoch": 0.6349735229368716, "grad_norm": 0.3774641923130378, "learning_rate": 7.986594615505938e-06, "loss": 0.3455, "step": 2668 }, { "epoch": 0.6352115190099363, "grad_norm": 0.3849751604819079, "learning_rate": 7.985048455903195e-06, "loss": 0.3941, "step": 2669 }, { "epoch": 0.6354495150830012, "grad_norm": 0.4028547987540656, "learning_rate": 7.983501852648408e-06, "loss": 0.3105, "step": 2670 }, { "epoch": 0.6356875111560659, "grad_norm": 0.4175776893284019, "learning_rate": 7.981954805971434e-06, "loss": 0.3202, "step": 2671 }, { "epoch": 0.6359255072291308, "grad_norm": 0.3948825669750941, "learning_rate": 7.98040731610221e-06, "loss": 0.386, "step": 2672 }, { "epoch": 0.6361635033021955, "grad_norm": 0.37485861058760167, "learning_rate": 7.978859383270723e-06, "loss": 0.3553, "step": 2673 }, { "epoch": 0.6364014993752604, "grad_norm": 0.38429792606989394, "learning_rate": 7.97731100770704e-06, "loss": 0.305, "step": 2674 }, { "epoch": 0.6366394954483251, "grad_norm": 0.40243135178828765, "learning_rate": 7.975762189641287e-06, "loss": 0.3782, "step": 2675 }, { "epoch": 0.63687749152139, "grad_norm": 0.3537177295193898, "learning_rate": 7.974212929303655e-06, "loss": 0.4115, "step": 2676 }, { "epoch": 0.6371154875944547, "grad_norm": 0.4063244285595607, "learning_rate": 7.972663226924404e-06, "loss": 0.3373, "step": 2677 }, { "epoch": 0.6373534836675195, "grad_norm": 0.7174554198990031, "learning_rate": 7.971113082733855e-06, "loss": 0.319, "step": 2678 }, { "epoch": 0.6375914797405843, "grad_norm": 0.38291427234154246, "learning_rate": 7.969562496962402e-06, "loss": 0.4063, "step": 2679 }, { "epoch": 0.6378294758136491, "grad_norm": 0.37205693582330984, "learning_rate": 7.968011469840498e-06, "loss": 0.3852, "step": 2680 }, { "epoch": 0.6380674718867139, "grad_norm": 0.37289319443279956, "learning_rate": 7.966460001598666e-06, "loss": 0.3108, "step": 2681 }, { "epoch": 0.6383054679597787, "grad_norm": 0.36566089153086934, "learning_rate": 7.96490809246749e-06, "loss": 0.3616, "step": 2682 }, { "epoch": 0.6385434640328435, "grad_norm": 0.3792165993672128, "learning_rate": 7.963355742677622e-06, "loss": 0.3968, "step": 2683 }, { "epoch": 0.6387814601059083, "grad_norm": 0.3798446258828693, "learning_rate": 7.961802952459782e-06, "loss": 0.3367, "step": 2684 }, { "epoch": 0.639019456178973, "grad_norm": 0.3490183798631681, "learning_rate": 7.96024972204475e-06, "loss": 0.295, "step": 2685 }, { "epoch": 0.6392574522520379, "grad_norm": 0.3964584193841697, "learning_rate": 7.958696051663378e-06, "loss": 0.3989, "step": 2686 }, { "epoch": 0.6394954483251026, "grad_norm": 0.41876375542959604, "learning_rate": 7.957141941546579e-06, "loss": 0.4196, "step": 2687 }, { "epoch": 0.6397334443981675, "grad_norm": 0.3704391565272174, "learning_rate": 7.95558739192533e-06, "loss": 0.3395, "step": 2688 }, { "epoch": 0.6399714404712322, "grad_norm": 0.38705193099976026, "learning_rate": 7.954032403030676e-06, "loss": 0.3671, "step": 2689 }, { "epoch": 0.6402094365442971, "grad_norm": 0.3439386120652561, "learning_rate": 7.952476975093729e-06, "loss": 0.3779, "step": 2690 }, { "epoch": 0.6404474326173618, "grad_norm": 0.40968077374028966, "learning_rate": 7.950921108345663e-06, "loss": 0.3443, "step": 2691 }, { "epoch": 0.6406854286904267, "grad_norm": 0.40943399377681217, "learning_rate": 7.949364803017716e-06, "loss": 0.3285, "step": 2692 }, { "epoch": 0.6409234247634914, "grad_norm": 0.35009359805640566, "learning_rate": 7.947808059341198e-06, "loss": 0.3749, "step": 2693 }, { "epoch": 0.6411614208365563, "grad_norm": 0.41593770811256764, "learning_rate": 7.946250877547477e-06, "loss": 0.4014, "step": 2694 }, { "epoch": 0.641399416909621, "grad_norm": 0.37165753141343205, "learning_rate": 7.944693257867988e-06, "loss": 0.3249, "step": 2695 }, { "epoch": 0.6416374129826858, "grad_norm": 0.36739798172367116, "learning_rate": 7.943135200534231e-06, "loss": 0.3509, "step": 2696 }, { "epoch": 0.6418754090557506, "grad_norm": 0.3724656741195384, "learning_rate": 7.941576705777775e-06, "loss": 0.4075, "step": 2697 }, { "epoch": 0.6421134051288154, "grad_norm": 0.36653430005871285, "learning_rate": 7.940017773830251e-06, "loss": 0.3887, "step": 2698 }, { "epoch": 0.6423514012018802, "grad_norm": 0.3954242148262836, "learning_rate": 7.93845840492335e-06, "loss": 0.2837, "step": 2699 }, { "epoch": 0.642589397274945, "grad_norm": 0.37217288826320916, "learning_rate": 7.936898599288837e-06, "loss": 0.367, "step": 2700 }, { "epoch": 0.6428273933480098, "grad_norm": 0.3433351247909528, "learning_rate": 7.93533835715854e-06, "loss": 0.413, "step": 2701 }, { "epoch": 0.6430653894210746, "grad_norm": 0.3710719613065313, "learning_rate": 7.933777678764342e-06, "loss": 0.3481, "step": 2702 }, { "epoch": 0.6433033854941393, "grad_norm": 0.4051379668160816, "learning_rate": 7.932216564338207e-06, "loss": 0.3487, "step": 2703 }, { "epoch": 0.6435413815672042, "grad_norm": 0.35261923047482774, "learning_rate": 7.930655014112149e-06, "loss": 0.3673, "step": 2704 }, { "epoch": 0.6437793776402689, "grad_norm": 0.37561470708818495, "learning_rate": 7.929093028318254e-06, "loss": 0.3952, "step": 2705 }, { "epoch": 0.6440173737133338, "grad_norm": 0.38085667134847434, "learning_rate": 7.927530607188674e-06, "loss": 0.2997, "step": 2706 }, { "epoch": 0.6442553697863985, "grad_norm": 0.38309188860484494, "learning_rate": 7.925967750955621e-06, "loss": 0.3318, "step": 2707 }, { "epoch": 0.6444933658594634, "grad_norm": 0.35539044141417847, "learning_rate": 7.924404459851376e-06, "loss": 0.4216, "step": 2708 }, { "epoch": 0.6447313619325281, "grad_norm": 0.36545999682638586, "learning_rate": 7.92284073410828e-06, "loss": 0.3565, "step": 2709 }, { "epoch": 0.644969358005593, "grad_norm": 0.3853826020640622, "learning_rate": 7.921276573958747e-06, "loss": 0.3193, "step": 2710 }, { "epoch": 0.6452073540786577, "grad_norm": 0.370265598492799, "learning_rate": 7.919711979635245e-06, "loss": 0.3624, "step": 2711 }, { "epoch": 0.6454453501517224, "grad_norm": 0.38428982530476763, "learning_rate": 7.918146951370312e-06, "loss": 0.368, "step": 2712 }, { "epoch": 0.6456833462247873, "grad_norm": 0.3863324681379504, "learning_rate": 7.916581489396551e-06, "loss": 0.3382, "step": 2713 }, { "epoch": 0.645921342297852, "grad_norm": 0.40476758870600216, "learning_rate": 7.915015593946627e-06, "loss": 0.3452, "step": 2714 }, { "epoch": 0.6461593383709169, "grad_norm": 0.350755584302665, "learning_rate": 7.913449265253272e-06, "loss": 0.3541, "step": 2715 }, { "epoch": 0.6463973344439816, "grad_norm": 0.3917385422279502, "learning_rate": 7.911882503549282e-06, "loss": 0.3615, "step": 2716 }, { "epoch": 0.6466353305170465, "grad_norm": 0.4148508130598785, "learning_rate": 7.910315309067515e-06, "loss": 0.3237, "step": 2717 }, { "epoch": 0.6468733265901112, "grad_norm": 0.3655588510807828, "learning_rate": 7.908747682040893e-06, "loss": 0.3717, "step": 2718 }, { "epoch": 0.6471113226631761, "grad_norm": 0.3786662704253251, "learning_rate": 7.907179622702409e-06, "loss": 0.3995, "step": 2719 }, { "epoch": 0.6473493187362408, "grad_norm": 0.41077148959741994, "learning_rate": 7.905611131285114e-06, "loss": 0.3225, "step": 2720 }, { "epoch": 0.6475873148093056, "grad_norm": 0.3845293510123214, "learning_rate": 7.904042208022121e-06, "loss": 0.3184, "step": 2721 }, { "epoch": 0.6478253108823704, "grad_norm": 0.38026816465993274, "learning_rate": 7.902472853146614e-06, "loss": 0.4047, "step": 2722 }, { "epoch": 0.6480633069554352, "grad_norm": 0.3831104022471865, "learning_rate": 7.90090306689184e-06, "loss": 0.3919, "step": 2723 }, { "epoch": 0.6483013030285, "grad_norm": 0.39343951127809484, "learning_rate": 7.899332849491101e-06, "loss": 0.2942, "step": 2724 }, { "epoch": 0.6485392991015648, "grad_norm": 0.37517985298715045, "learning_rate": 7.897762201177777e-06, "loss": 0.3449, "step": 2725 }, { "epoch": 0.6487772951746296, "grad_norm": 0.36054421682014914, "learning_rate": 7.896191122185302e-06, "loss": 0.3968, "step": 2726 }, { "epoch": 0.6490152912476944, "grad_norm": 0.38545956806454257, "learning_rate": 7.894619612747177e-06, "loss": 0.3261, "step": 2727 }, { "epoch": 0.6492532873207592, "grad_norm": 0.4049278658685888, "learning_rate": 7.89304767309697e-06, "loss": 0.3517, "step": 2728 }, { "epoch": 0.649491283393824, "grad_norm": 0.377732909086466, "learning_rate": 7.891475303468307e-06, "loss": 0.377, "step": 2729 }, { "epoch": 0.6497292794668887, "grad_norm": 0.35831707426820597, "learning_rate": 7.889902504094883e-06, "loss": 0.386, "step": 2730 }, { "epoch": 0.6499672755399536, "grad_norm": 0.38647507461352604, "learning_rate": 7.888329275210454e-06, "loss": 0.3116, "step": 2731 }, { "epoch": 0.6502052716130183, "grad_norm": 0.41222519034063054, "learning_rate": 7.88675561704884e-06, "loss": 0.3485, "step": 2732 }, { "epoch": 0.6504432676860832, "grad_norm": 0.3540005364042782, "learning_rate": 7.885181529843928e-06, "loss": 0.3811, "step": 2733 }, { "epoch": 0.6506812637591479, "grad_norm": 0.3889803639057776, "learning_rate": 7.883607013829664e-06, "loss": 0.3164, "step": 2734 }, { "epoch": 0.6509192598322128, "grad_norm": 0.3825132936787645, "learning_rate": 7.88203206924006e-06, "loss": 0.3355, "step": 2735 }, { "epoch": 0.6511572559052775, "grad_norm": 0.3600797185755466, "learning_rate": 7.880456696309194e-06, "loss": 0.3822, "step": 2736 }, { "epoch": 0.6513952519783424, "grad_norm": 0.36788983076651144, "learning_rate": 7.878880895271203e-06, "loss": 0.4035, "step": 2737 }, { "epoch": 0.6516332480514071, "grad_norm": 0.36838811546682065, "learning_rate": 7.87730466636029e-06, "loss": 0.3201, "step": 2738 }, { "epoch": 0.651871244124472, "grad_norm": 0.4199311085900462, "learning_rate": 7.875728009810723e-06, "loss": 0.3354, "step": 2739 }, { "epoch": 0.6521092401975367, "grad_norm": 0.3700461680317869, "learning_rate": 7.874150925856832e-06, "loss": 0.3907, "step": 2740 }, { "epoch": 0.6523472362706015, "grad_norm": 0.3658208922433521, "learning_rate": 7.87257341473301e-06, "loss": 0.3436, "step": 2741 }, { "epoch": 0.6525852323436663, "grad_norm": 0.3673071582783027, "learning_rate": 7.870995476673716e-06, "loss": 0.3026, "step": 2742 }, { "epoch": 0.6528232284167311, "grad_norm": 0.36996570740087614, "learning_rate": 7.869417111913469e-06, "loss": 0.3494, "step": 2743 }, { "epoch": 0.6530612244897959, "grad_norm": 0.387133905688407, "learning_rate": 7.867838320686852e-06, "loss": 0.4002, "step": 2744 }, { "epoch": 0.6532992205628607, "grad_norm": 0.4136666892086636, "learning_rate": 7.866259103228513e-06, "loss": 0.3187, "step": 2745 }, { "epoch": 0.6535372166359255, "grad_norm": 0.39398446442473684, "learning_rate": 7.864679459773165e-06, "loss": 0.3197, "step": 2746 }, { "epoch": 0.6537752127089903, "grad_norm": 0.3856438918664348, "learning_rate": 7.863099390555579e-06, "loss": 0.3823, "step": 2747 }, { "epoch": 0.654013208782055, "grad_norm": 0.3908108331472674, "learning_rate": 7.861518895810597e-06, "loss": 0.36, "step": 2748 }, { "epoch": 0.6542512048551199, "grad_norm": 0.3906604552323348, "learning_rate": 7.859937975773113e-06, "loss": 0.3083, "step": 2749 }, { "epoch": 0.6544892009281846, "grad_norm": 0.3644262557255445, "learning_rate": 7.858356630678095e-06, "loss": 0.3834, "step": 2750 }, { "epoch": 0.6547271970012495, "grad_norm": 0.3859873524132499, "learning_rate": 7.85677486076057e-06, "loss": 0.4009, "step": 2751 }, { "epoch": 0.6549651930743142, "grad_norm": 0.4012225445856221, "learning_rate": 7.855192666255627e-06, "loss": 0.3174, "step": 2752 }, { "epoch": 0.6552031891473791, "grad_norm": 0.3894743297706602, "learning_rate": 7.853610047398422e-06, "loss": 0.3163, "step": 2753 }, { "epoch": 0.6554411852204438, "grad_norm": 0.36865693864488674, "learning_rate": 7.852027004424166e-06, "loss": 0.4053, "step": 2754 }, { "epoch": 0.6556791812935087, "grad_norm": 0.3871705949523487, "learning_rate": 7.850443537568142e-06, "loss": 0.3511, "step": 2755 }, { "epoch": 0.6559171773665734, "grad_norm": 0.40039522901635954, "learning_rate": 7.848859647065692e-06, "loss": 0.3183, "step": 2756 }, { "epoch": 0.6561551734396383, "grad_norm": 0.3899094558661615, "learning_rate": 7.847275333152222e-06, "loss": 0.3536, "step": 2757 }, { "epoch": 0.656393169512703, "grad_norm": 0.38854298448856595, "learning_rate": 7.845690596063198e-06, "loss": 0.3992, "step": 2758 }, { "epoch": 0.6566311655857678, "grad_norm": 0.37350144359617254, "learning_rate": 7.844105436034156e-06, "loss": 0.3449, "step": 2759 }, { "epoch": 0.6568691616588326, "grad_norm": 0.44114084013334837, "learning_rate": 7.842519853300683e-06, "loss": 0.2977, "step": 2760 }, { "epoch": 0.6571071577318974, "grad_norm": 0.35999935925172966, "learning_rate": 7.840933848098437e-06, "loss": 0.3916, "step": 2761 }, { "epoch": 0.6573451538049622, "grad_norm": 0.39899431523147383, "learning_rate": 7.839347420663143e-06, "loss": 0.4253, "step": 2762 }, { "epoch": 0.657583149878027, "grad_norm": 0.3511330110093876, "learning_rate": 7.837760571230582e-06, "loss": 0.3323, "step": 2763 }, { "epoch": 0.6578211459510918, "grad_norm": 0.4461907551327487, "learning_rate": 7.836173300036594e-06, "loss": 0.3239, "step": 2764 }, { "epoch": 0.6580591420241566, "grad_norm": 0.39054055621536493, "learning_rate": 7.83458560731709e-06, "loss": 0.4205, "step": 2765 }, { "epoch": 0.6582971380972213, "grad_norm": 0.3769385946466861, "learning_rate": 7.832997493308043e-06, "loss": 0.3511, "step": 2766 }, { "epoch": 0.6585351341702862, "grad_norm": 0.3986011878362815, "learning_rate": 7.831408958245483e-06, "loss": 0.296, "step": 2767 }, { "epoch": 0.6587731302433509, "grad_norm": 0.3880140645019253, "learning_rate": 7.829820002365504e-06, "loss": 0.3771, "step": 2768 }, { "epoch": 0.6590111263164158, "grad_norm": 0.38212257026061813, "learning_rate": 7.828230625904269e-06, "loss": 0.423, "step": 2769 }, { "epoch": 0.6592491223894805, "grad_norm": 0.38852972753947673, "learning_rate": 7.826640829097994e-06, "loss": 0.3055, "step": 2770 }, { "epoch": 0.6594871184625454, "grad_norm": 0.392631438114691, "learning_rate": 7.825050612182965e-06, "loss": 0.3385, "step": 2771 }, { "epoch": 0.6597251145356101, "grad_norm": 0.37085742233686686, "learning_rate": 7.823459975395527e-06, "loss": 0.3848, "step": 2772 }, { "epoch": 0.659963110608675, "grad_norm": 0.3834859007180493, "learning_rate": 7.821868918972087e-06, "loss": 0.3313, "step": 2773 }, { "epoch": 0.6602011066817397, "grad_norm": 0.3822005143101434, "learning_rate": 7.820277443149114e-06, "loss": 0.3139, "step": 2774 }, { "epoch": 0.6604391027548046, "grad_norm": 0.3751827860273947, "learning_rate": 7.818685548163144e-06, "loss": 0.3772, "step": 2775 }, { "epoch": 0.6606770988278693, "grad_norm": 0.36208369988094957, "learning_rate": 7.817093234250772e-06, "loss": 0.4038, "step": 2776 }, { "epoch": 0.6609150949009341, "grad_norm": 0.394839034258573, "learning_rate": 7.815500501648654e-06, "loss": 0.3098, "step": 2777 }, { "epoch": 0.6611530909739989, "grad_norm": 0.37324421123913026, "learning_rate": 7.813907350593509e-06, "loss": 0.2987, "step": 2778 }, { "epoch": 0.6613910870470637, "grad_norm": 0.3522307791159532, "learning_rate": 7.812313781322119e-06, "loss": 0.3708, "step": 2779 }, { "epoch": 0.6616290831201285, "grad_norm": 0.38147797701046887, "learning_rate": 7.810719794071326e-06, "loss": 0.3826, "step": 2780 }, { "epoch": 0.6618670791931933, "grad_norm": 0.3818587837039667, "learning_rate": 7.809125389078038e-06, "loss": 0.306, "step": 2781 }, { "epoch": 0.6621050752662581, "grad_norm": 0.4383170403521546, "learning_rate": 7.807530566579225e-06, "loss": 0.3566, "step": 2782 }, { "epoch": 0.6623430713393229, "grad_norm": 0.35732395462798827, "learning_rate": 7.805935326811913e-06, "loss": 0.4335, "step": 2783 }, { "epoch": 0.6625810674123876, "grad_norm": 0.3504216615398166, "learning_rate": 7.804339670013196e-06, "loss": 0.3143, "step": 2784 }, { "epoch": 0.6628190634854525, "grad_norm": 0.3661223386576208, "learning_rate": 7.802743596420228e-06, "loss": 0.3529, "step": 2785 }, { "epoch": 0.6630570595585172, "grad_norm": 0.3723251622104599, "learning_rate": 7.801147106270227e-06, "loss": 0.3883, "step": 2786 }, { "epoch": 0.6632950556315821, "grad_norm": 0.36878452917502985, "learning_rate": 7.799550199800468e-06, "loss": 0.421, "step": 2787 }, { "epoch": 0.6635330517046468, "grad_norm": 0.3618774249811299, "learning_rate": 7.797952877248289e-06, "loss": 0.3128, "step": 2788 }, { "epoch": 0.6637710477777117, "grad_norm": 0.4123556625672267, "learning_rate": 7.796355138851098e-06, "loss": 0.3594, "step": 2789 }, { "epoch": 0.6640090438507764, "grad_norm": 0.335938751220196, "learning_rate": 7.794756984846353e-06, "loss": 0.4006, "step": 2790 }, { "epoch": 0.6642470399238413, "grad_norm": 0.38401019782464624, "learning_rate": 7.793158415471582e-06, "loss": 0.3682, "step": 2791 }, { "epoch": 0.664485035996906, "grad_norm": 0.35133531035355053, "learning_rate": 7.791559430964371e-06, "loss": 0.2978, "step": 2792 }, { "epoch": 0.6647230320699709, "grad_norm": 0.39741836114549955, "learning_rate": 7.789960031562368e-06, "loss": 0.3708, "step": 2793 }, { "epoch": 0.6649610281430356, "grad_norm": 0.3886500686427725, "learning_rate": 7.788360217503284e-06, "loss": 0.4344, "step": 2794 }, { "epoch": 0.6651990242161004, "grad_norm": 0.38181964678908115, "learning_rate": 7.786759989024891e-06, "loss": 0.3091, "step": 2795 }, { "epoch": 0.6654370202891652, "grad_norm": 0.3575252771082044, "learning_rate": 7.785159346365024e-06, "loss": 0.3533, "step": 2796 }, { "epoch": 0.66567501636223, "grad_norm": 0.35735170640215147, "learning_rate": 7.783558289761575e-06, "loss": 0.4113, "step": 2797 }, { "epoch": 0.6659130124352948, "grad_norm": 0.3802244001409177, "learning_rate": 7.781956819452503e-06, "loss": 0.3463, "step": 2798 }, { "epoch": 0.6661510085083596, "grad_norm": 0.3589197801986663, "learning_rate": 7.780354935675824e-06, "loss": 0.3054, "step": 2799 }, { "epoch": 0.6663890045814244, "grad_norm": 0.37241774516966863, "learning_rate": 7.778752638669621e-06, "loss": 0.344, "step": 2800 }, { "epoch": 0.6666270006544892, "grad_norm": 0.38446901953737084, "learning_rate": 7.777149928672032e-06, "loss": 0.389, "step": 2801 }, { "epoch": 0.666864996727554, "grad_norm": 0.3748374371731264, "learning_rate": 7.775546805921259e-06, "loss": 0.2864, "step": 2802 }, { "epoch": 0.6671029928006188, "grad_norm": 0.4043541069673673, "learning_rate": 7.773943270655568e-06, "loss": 0.3045, "step": 2803 }, { "epoch": 0.6673409888736835, "grad_norm": 0.4320616556631244, "learning_rate": 7.772339323113283e-06, "loss": 0.3828, "step": 2804 }, { "epoch": 0.6675789849467484, "grad_norm": 0.3884296375705101, "learning_rate": 7.770734963532791e-06, "loss": 0.3935, "step": 2805 }, { "epoch": 0.6678169810198131, "grad_norm": 0.3947257784010564, "learning_rate": 7.769130192152538e-06, "loss": 0.3206, "step": 2806 }, { "epoch": 0.668054977092878, "grad_norm": 0.4031463873354111, "learning_rate": 7.767525009211032e-06, "loss": 0.3593, "step": 2807 }, { "epoch": 0.6682929731659427, "grad_norm": 0.33474899996590485, "learning_rate": 7.765919414946846e-06, "loss": 0.387, "step": 2808 }, { "epoch": 0.6685309692390076, "grad_norm": 0.3959331136357287, "learning_rate": 7.76431340959861e-06, "loss": 0.3433, "step": 2809 }, { "epoch": 0.6687689653120723, "grad_norm": 0.3963425292776564, "learning_rate": 7.762706993405014e-06, "loss": 0.2888, "step": 2810 }, { "epoch": 0.6690069613851372, "grad_norm": 0.37455109669088693, "learning_rate": 7.761100166604814e-06, "loss": 0.3744, "step": 2811 }, { "epoch": 0.6692449574582019, "grad_norm": 0.37164146720882896, "learning_rate": 7.759492929436821e-06, "loss": 0.3976, "step": 2812 }, { "epoch": 0.6694829535312667, "grad_norm": 0.37019035117029214, "learning_rate": 7.757885282139913e-06, "loss": 0.3273, "step": 2813 }, { "epoch": 0.6697209496043315, "grad_norm": 0.3867936235753999, "learning_rate": 7.756277224953027e-06, "loss": 0.3476, "step": 2814 }, { "epoch": 0.6699589456773963, "grad_norm": 0.37145512873107056, "learning_rate": 7.754668758115157e-06, "loss": 0.3941, "step": 2815 }, { "epoch": 0.6701969417504611, "grad_norm": 0.36270472677005344, "learning_rate": 7.753059881865361e-06, "loss": 0.3415, "step": 2816 }, { "epoch": 0.6704349378235259, "grad_norm": 0.4044012125764732, "learning_rate": 7.751450596442761e-06, "loss": 0.3126, "step": 2817 }, { "epoch": 0.6706729338965907, "grad_norm": 0.4117053779057893, "learning_rate": 7.749840902086534e-06, "loss": 0.3767, "step": 2818 }, { "epoch": 0.6709109299696555, "grad_norm": 0.3709103066934568, "learning_rate": 7.748230799035922e-06, "loss": 0.4183, "step": 2819 }, { "epoch": 0.6711489260427203, "grad_norm": 0.36765736344691075, "learning_rate": 7.746620287530224e-06, "loss": 0.3289, "step": 2820 }, { "epoch": 0.6713869221157851, "grad_norm": 0.39213657203388363, "learning_rate": 7.745009367808805e-06, "loss": 0.3612, "step": 2821 }, { "epoch": 0.6716249181888498, "grad_norm": 0.3694609673330917, "learning_rate": 7.743398040111085e-06, "loss": 0.4092, "step": 2822 }, { "epoch": 0.6718629142619147, "grad_norm": 0.36370204232238, "learning_rate": 7.741786304676546e-06, "loss": 0.3647, "step": 2823 }, { "epoch": 0.6721009103349794, "grad_norm": 0.39103037281531944, "learning_rate": 7.740174161744734e-06, "loss": 0.3049, "step": 2824 }, { "epoch": 0.6723389064080443, "grad_norm": 0.4294014674026649, "learning_rate": 7.738561611555256e-06, "loss": 0.3702, "step": 2825 }, { "epoch": 0.672576902481109, "grad_norm": 0.378252161646541, "learning_rate": 7.736948654347771e-06, "loss": 0.429, "step": 2826 }, { "epoch": 0.6728148985541739, "grad_norm": 0.3733849472381206, "learning_rate": 7.735335290362008e-06, "loss": 0.3083, "step": 2827 }, { "epoch": 0.6730528946272386, "grad_norm": 0.3875790384170856, "learning_rate": 7.733721519837751e-06, "loss": 0.3125, "step": 2828 }, { "epoch": 0.6732908907003035, "grad_norm": 0.357812704637255, "learning_rate": 7.732107343014848e-06, "loss": 0.4005, "step": 2829 }, { "epoch": 0.6735288867733682, "grad_norm": 0.3644020205564977, "learning_rate": 7.730492760133204e-06, "loss": 0.4116, "step": 2830 }, { "epoch": 0.673766882846433, "grad_norm": 0.39330628647179383, "learning_rate": 7.728877771432787e-06, "loss": 0.322, "step": 2831 }, { "epoch": 0.6740048789194978, "grad_norm": 0.39515618942825725, "learning_rate": 7.727262377153625e-06, "loss": 0.3311, "step": 2832 }, { "epoch": 0.6742428749925626, "grad_norm": 0.3826839966849655, "learning_rate": 7.725646577535803e-06, "loss": 0.4147, "step": 2833 }, { "epoch": 0.6744808710656274, "grad_norm": 0.40336985165060796, "learning_rate": 7.724030372819473e-06, "loss": 0.3492, "step": 2834 }, { "epoch": 0.6747188671386922, "grad_norm": 0.40161595680714757, "learning_rate": 7.722413763244837e-06, "loss": 0.3058, "step": 2835 }, { "epoch": 0.674956863211757, "grad_norm": 0.37111456991939507, "learning_rate": 7.720796749052169e-06, "loss": 0.3988, "step": 2836 }, { "epoch": 0.6751948592848218, "grad_norm": 0.4093260967053505, "learning_rate": 7.719179330481791e-06, "loss": 0.3985, "step": 2837 }, { "epoch": 0.6754328553578866, "grad_norm": 0.37469388622058286, "learning_rate": 7.7175615077741e-06, "loss": 0.3206, "step": 2838 }, { "epoch": 0.6756708514309514, "grad_norm": 0.36305043278624066, "learning_rate": 7.715943281169539e-06, "loss": 0.3477, "step": 2839 }, { "epoch": 0.6759088475040161, "grad_norm": 0.36935236818452855, "learning_rate": 7.714324650908615e-06, "loss": 0.3998, "step": 2840 }, { "epoch": 0.676146843577081, "grad_norm": 0.3948705853817137, "learning_rate": 7.7127056172319e-06, "loss": 0.3422, "step": 2841 }, { "epoch": 0.6763848396501457, "grad_norm": 0.3796382485482362, "learning_rate": 7.711086180380021e-06, "loss": 0.3149, "step": 2842 }, { "epoch": 0.6766228357232106, "grad_norm": 0.3616110674940236, "learning_rate": 7.709466340593666e-06, "loss": 0.364, "step": 2843 }, { "epoch": 0.6768608317962753, "grad_norm": 0.3662500931977446, "learning_rate": 7.707846098113583e-06, "loss": 0.3967, "step": 2844 }, { "epoch": 0.6770988278693402, "grad_norm": 0.37281181697106686, "learning_rate": 7.706225453180583e-06, "loss": 0.3279, "step": 2845 }, { "epoch": 0.6773368239424049, "grad_norm": 0.3855893672663316, "learning_rate": 7.704604406035531e-06, "loss": 0.3496, "step": 2846 }, { "epoch": 0.6775748200154698, "grad_norm": 0.3889507681326313, "learning_rate": 7.702982956919356e-06, "loss": 0.4173, "step": 2847 }, { "epoch": 0.6778128160885345, "grad_norm": 0.3971306511498377, "learning_rate": 7.701361106073044e-06, "loss": 0.3444, "step": 2848 }, { "epoch": 0.6780508121615993, "grad_norm": 0.36005635998666913, "learning_rate": 7.699738853737646e-06, "loss": 0.2921, "step": 2849 }, { "epoch": 0.6782888082346641, "grad_norm": 0.38741602947105674, "learning_rate": 7.698116200154262e-06, "loss": 0.3202, "step": 2850 }, { "epoch": 0.6785268043077289, "grad_norm": 0.3676779858892718, "learning_rate": 7.696493145564065e-06, "loss": 0.4059, "step": 2851 }, { "epoch": 0.6787648003807937, "grad_norm": 0.3417732883784283, "learning_rate": 7.694869690208278e-06, "loss": 0.3071, "step": 2852 }, { "epoch": 0.6790027964538585, "grad_norm": 0.3806094615926385, "learning_rate": 7.693245834328186e-06, "loss": 0.3005, "step": 2853 }, { "epoch": 0.6792407925269233, "grad_norm": 0.37415443493343387, "learning_rate": 7.691621578165135e-06, "loss": 0.3775, "step": 2854 }, { "epoch": 0.6794787885999881, "grad_norm": 0.3588357321765243, "learning_rate": 7.689996921960533e-06, "loss": 0.3828, "step": 2855 }, { "epoch": 0.6797167846730529, "grad_norm": 0.3928642880638878, "learning_rate": 7.68837186595584e-06, "loss": 0.3388, "step": 2856 }, { "epoch": 0.6799547807461177, "grad_norm": 0.39713877557243726, "learning_rate": 7.686746410392579e-06, "loss": 0.3547, "step": 2857 }, { "epoch": 0.6801927768191824, "grad_norm": 0.37028184933204206, "learning_rate": 7.685120555512335e-06, "loss": 0.3991, "step": 2858 }, { "epoch": 0.6804307728922473, "grad_norm": 0.44868723676569244, "learning_rate": 7.68349430155675e-06, "loss": 0.2975, "step": 2859 }, { "epoch": 0.680668768965312, "grad_norm": 0.38969868882705583, "learning_rate": 7.681867648767527e-06, "loss": 0.3064, "step": 2860 }, { "epoch": 0.6809067650383769, "grad_norm": 0.38048777908327175, "learning_rate": 7.680240597386423e-06, "loss": 0.3544, "step": 2861 }, { "epoch": 0.6811447611114416, "grad_norm": 0.37492467378771765, "learning_rate": 7.678613147655263e-06, "loss": 0.4007, "step": 2862 }, { "epoch": 0.6813827571845065, "grad_norm": 0.3689709476215601, "learning_rate": 7.676985299815921e-06, "loss": 0.3204, "step": 2863 }, { "epoch": 0.6816207532575712, "grad_norm": 0.4036262585926096, "learning_rate": 7.675357054110337e-06, "loss": 0.3487, "step": 2864 }, { "epoch": 0.6818587493306361, "grad_norm": 0.41388190325234303, "learning_rate": 7.673728410780512e-06, "loss": 0.3926, "step": 2865 }, { "epoch": 0.6820967454037008, "grad_norm": 0.3896246709459209, "learning_rate": 7.6720993700685e-06, "loss": 0.3495, "step": 2866 }, { "epoch": 0.6823347414767656, "grad_norm": 0.38993044742482114, "learning_rate": 7.670469932216416e-06, "loss": 0.2993, "step": 2867 }, { "epoch": 0.6825727375498304, "grad_norm": 0.3709106695995631, "learning_rate": 7.668840097466438e-06, "loss": 0.4017, "step": 2868 }, { "epoch": 0.6828107336228952, "grad_norm": 0.39844192153673497, "learning_rate": 7.667209866060795e-06, "loss": 0.4329, "step": 2869 }, { "epoch": 0.68304872969596, "grad_norm": 0.394396694213486, "learning_rate": 7.665579238241783e-06, "loss": 0.3091, "step": 2870 }, { "epoch": 0.6832867257690248, "grad_norm": 0.39853662948827007, "learning_rate": 7.663948214251754e-06, "loss": 0.336, "step": 2871 }, { "epoch": 0.6835247218420896, "grad_norm": 0.3630956355542585, "learning_rate": 7.662316794333115e-06, "loss": 0.4052, "step": 2872 }, { "epoch": 0.6837627179151544, "grad_norm": 0.3963082232165837, "learning_rate": 7.660684978728341e-06, "loss": 0.3736, "step": 2873 }, { "epoch": 0.6840007139882192, "grad_norm": 0.3977137944295455, "learning_rate": 7.659052767679956e-06, "loss": 0.3113, "step": 2874 }, { "epoch": 0.684238710061284, "grad_norm": 0.4015812355864788, "learning_rate": 7.657420161430548e-06, "loss": 0.3585, "step": 2875 }, { "epoch": 0.6844767061343487, "grad_norm": 0.39210402493400554, "learning_rate": 7.655787160222762e-06, "loss": 0.4295, "step": 2876 }, { "epoch": 0.6847147022074136, "grad_norm": 0.39681491199767854, "learning_rate": 7.654153764299304e-06, "loss": 0.2991, "step": 2877 }, { "epoch": 0.6849526982804783, "grad_norm": 0.4163108651945542, "learning_rate": 7.652519973902935e-06, "loss": 0.3332, "step": 2878 }, { "epoch": 0.6851906943535432, "grad_norm": 0.3748847281909277, "learning_rate": 7.650885789276477e-06, "loss": 0.3468, "step": 2879 }, { "epoch": 0.6854286904266079, "grad_norm": 0.421568928920472, "learning_rate": 7.649251210662812e-06, "loss": 0.373, "step": 2880 }, { "epoch": 0.6856666864996728, "grad_norm": 0.37882377572325376, "learning_rate": 7.647616238304876e-06, "loss": 0.3311, "step": 2881 }, { "epoch": 0.6859046825727375, "grad_norm": 0.37714593364667043, "learning_rate": 7.64598087244567e-06, "loss": 0.3475, "step": 2882 }, { "epoch": 0.6861426786458024, "grad_norm": 0.3855958441067824, "learning_rate": 7.644345113328248e-06, "loss": 0.4108, "step": 2883 }, { "epoch": 0.6863806747188671, "grad_norm": 0.3927803834584747, "learning_rate": 7.642708961195723e-06, "loss": 0.3284, "step": 2884 }, { "epoch": 0.686618670791932, "grad_norm": 0.39284438670389804, "learning_rate": 7.641072416291271e-06, "loss": 0.3087, "step": 2885 }, { "epoch": 0.6868566668649967, "grad_norm": 0.38222011385116755, "learning_rate": 7.639435478858119e-06, "loss": 0.3611, "step": 2886 }, { "epoch": 0.6870946629380615, "grad_norm": 0.35343748065798736, "learning_rate": 7.637798149139559e-06, "loss": 0.4157, "step": 2887 }, { "epoch": 0.6873326590111263, "grad_norm": 0.4761170077236074, "learning_rate": 7.636160427378938e-06, "loss": 0.3342, "step": 2888 }, { "epoch": 0.6875706550841911, "grad_norm": 0.3653390110713056, "learning_rate": 7.634522313819664e-06, "loss": 0.332, "step": 2889 }, { "epoch": 0.6878086511572559, "grad_norm": 0.357610536904034, "learning_rate": 7.632883808705196e-06, "loss": 0.4209, "step": 2890 }, { "epoch": 0.6880466472303207, "grad_norm": 0.35884223240749147, "learning_rate": 7.631244912279061e-06, "loss": 0.3103, "step": 2891 }, { "epoch": 0.6882846433033855, "grad_norm": 0.3932088139549123, "learning_rate": 7.629605624784839e-06, "loss": 0.3257, "step": 2892 }, { "epoch": 0.6885226393764503, "grad_norm": 0.3756997656933469, "learning_rate": 7.627965946466167e-06, "loss": 0.3667, "step": 2893 }, { "epoch": 0.688760635449515, "grad_norm": 0.41363244284700523, "learning_rate": 7.626325877566741e-06, "loss": 0.421, "step": 2894 }, { "epoch": 0.6889986315225799, "grad_norm": 0.38949498606977967, "learning_rate": 7.624685418330319e-06, "loss": 0.3179, "step": 2895 }, { "epoch": 0.6892366275956446, "grad_norm": 0.37724094080660947, "learning_rate": 7.623044569000712e-06, "loss": 0.3282, "step": 2896 }, { "epoch": 0.6894746236687095, "grad_norm": 0.5285214493849093, "learning_rate": 7.621403329821792e-06, "loss": 0.393, "step": 2897 }, { "epoch": 0.6897126197417742, "grad_norm": 0.3726869379004517, "learning_rate": 7.619761701037486e-06, "loss": 0.3831, "step": 2898 }, { "epoch": 0.6899506158148391, "grad_norm": 0.39521137312351157, "learning_rate": 7.618119682891782e-06, "loss": 0.3289, "step": 2899 }, { "epoch": 0.6901886118879038, "grad_norm": 0.4132662165624947, "learning_rate": 7.6164772756287234e-06, "loss": 0.3614, "step": 2900 }, { "epoch": 0.6904266079609687, "grad_norm": 0.3713002154182572, "learning_rate": 7.614834479492413e-06, "loss": 0.3873, "step": 2901 }, { "epoch": 0.6906646040340334, "grad_norm": 0.4066260605116146, "learning_rate": 7.613191294727011e-06, "loss": 0.3409, "step": 2902 }, { "epoch": 0.6909026001070983, "grad_norm": 0.4241202111497487, "learning_rate": 7.611547721576738e-06, "loss": 0.3538, "step": 2903 }, { "epoch": 0.691140596180163, "grad_norm": 0.4686662314847007, "learning_rate": 7.609903760285864e-06, "loss": 0.4259, "step": 2904 }, { "epoch": 0.6913785922532278, "grad_norm": 0.362668095436962, "learning_rate": 7.608259411098725e-06, "loss": 0.3968, "step": 2905 }, { "epoch": 0.6916165883262926, "grad_norm": 0.40773309376570577, "learning_rate": 7.606614674259714e-06, "loss": 0.3047, "step": 2906 }, { "epoch": 0.6918545843993574, "grad_norm": 0.35828275597855175, "learning_rate": 7.6049695500132754e-06, "loss": 0.3743, "step": 2907 }, { "epoch": 0.6920925804724222, "grad_norm": 0.3603447573938342, "learning_rate": 7.603324038603921e-06, "loss": 0.3842, "step": 2908 }, { "epoch": 0.692330576545487, "grad_norm": 0.40350976963719254, "learning_rate": 7.601678140276209e-06, "loss": 0.3228, "step": 2909 }, { "epoch": 0.6925685726185518, "grad_norm": 0.38318140197317035, "learning_rate": 7.600031855274764e-06, "loss": 0.3013, "step": 2910 }, { "epoch": 0.6928065686916166, "grad_norm": 0.3810631038611205, "learning_rate": 7.598385183844263e-06, "loss": 0.3572, "step": 2911 }, { "epoch": 0.6930445647646813, "grad_norm": 0.3784857718340065, "learning_rate": 7.5967381262294435e-06, "loss": 0.3771, "step": 2912 }, { "epoch": 0.6932825608377462, "grad_norm": 0.37559330533248253, "learning_rate": 7.595090682675098e-06, "loss": 0.3135, "step": 2913 }, { "epoch": 0.6935205569108109, "grad_norm": 0.365340499769047, "learning_rate": 7.593442853426077e-06, "loss": 0.337, "step": 2914 }, { "epoch": 0.6937585529838758, "grad_norm": 0.3680511519981457, "learning_rate": 7.59179463872729e-06, "loss": 0.4097, "step": 2915 }, { "epoch": 0.6939965490569405, "grad_norm": 0.3766039547521425, "learning_rate": 7.590146038823702e-06, "loss": 0.332, "step": 2916 }, { "epoch": 0.6942345451300054, "grad_norm": 0.3889666332240844, "learning_rate": 7.588497053960335e-06, "loss": 0.3063, "step": 2917 }, { "epoch": 0.6944725412030701, "grad_norm": 0.377287587936367, "learning_rate": 7.586847684382269e-06, "loss": 0.3576, "step": 2918 }, { "epoch": 0.694710537276135, "grad_norm": 0.3803646716074299, "learning_rate": 7.585197930334642e-06, "loss": 0.3992, "step": 2919 }, { "epoch": 0.6949485333491997, "grad_norm": 0.3630382010768588, "learning_rate": 7.5835477920626474e-06, "loss": 0.3336, "step": 2920 }, { "epoch": 0.6951865294222646, "grad_norm": 0.4145598312089378, "learning_rate": 7.5818972698115375e-06, "loss": 0.3337, "step": 2921 }, { "epoch": 0.6954245254953293, "grad_norm": 0.40683667940888457, "learning_rate": 7.580246363826621e-06, "loss": 0.4267, "step": 2922 }, { "epoch": 0.6956625215683941, "grad_norm": 0.4208382587332909, "learning_rate": 7.578595074353262e-06, "loss": 0.3757, "step": 2923 }, { "epoch": 0.6959005176414589, "grad_norm": 0.35944681590578803, "learning_rate": 7.5769434016368845e-06, "loss": 0.3203, "step": 2924 }, { "epoch": 0.6961385137145237, "grad_norm": 0.3599434098310551, "learning_rate": 7.575291345922966e-06, "loss": 0.3583, "step": 2925 }, { "epoch": 0.6963765097875885, "grad_norm": 0.3756154404949079, "learning_rate": 7.5736389074570425e-06, "loss": 0.408, "step": 2926 }, { "epoch": 0.6966145058606533, "grad_norm": 0.367210127037896, "learning_rate": 7.571986086484711e-06, "loss": 0.3372, "step": 2927 }, { "epoch": 0.6968525019337181, "grad_norm": 0.3875760302017146, "learning_rate": 7.570332883251618e-06, "loss": 0.3293, "step": 2928 }, { "epoch": 0.6970904980067829, "grad_norm": 0.39687946684154773, "learning_rate": 7.568679298003472e-06, "loss": 0.3794, "step": 2929 }, { "epoch": 0.6973284940798476, "grad_norm": 0.3774932208465836, "learning_rate": 7.567025330986035e-06, "loss": 0.3996, "step": 2930 }, { "epoch": 0.6975664901529125, "grad_norm": 0.37825723085597085, "learning_rate": 7.565370982445131e-06, "loss": 0.3063, "step": 2931 }, { "epoch": 0.6978044862259772, "grad_norm": 0.3701480630486983, "learning_rate": 7.563716252626632e-06, "loss": 0.3392, "step": 2932 }, { "epoch": 0.6980424822990421, "grad_norm": 0.363483092582883, "learning_rate": 7.562061141776476e-06, "loss": 0.4165, "step": 2933 }, { "epoch": 0.6982804783721068, "grad_norm": 0.40095465937550545, "learning_rate": 7.560405650140652e-06, "loss": 0.3086, "step": 2934 }, { "epoch": 0.6985184744451717, "grad_norm": 0.5381288739496879, "learning_rate": 7.5587497779652065e-06, "loss": 0.2924, "step": 2935 }, { "epoch": 0.6987564705182364, "grad_norm": 0.3515417031170579, "learning_rate": 7.557093525496245e-06, "loss": 0.389, "step": 2936 }, { "epoch": 0.6989944665913013, "grad_norm": 0.37096216086334566, "learning_rate": 7.555436892979926e-06, "loss": 0.421, "step": 2937 }, { "epoch": 0.699232462664366, "grad_norm": 0.3482586299052201, "learning_rate": 7.553779880662465e-06, "loss": 0.2726, "step": 2938 }, { "epoch": 0.6994704587374309, "grad_norm": 0.4102693276938636, "learning_rate": 7.552122488790136e-06, "loss": 0.3548, "step": 2939 }, { "epoch": 0.6997084548104956, "grad_norm": 0.3581143681257741, "learning_rate": 7.55046471760927e-06, "loss": 0.379, "step": 2940 }, { "epoch": 0.6999464508835604, "grad_norm": 0.4034626868783178, "learning_rate": 7.548806567366251e-06, "loss": 0.376, "step": 2941 }, { "epoch": 0.7001844469566252, "grad_norm": 0.37940003906957404, "learning_rate": 7.547148038307521e-06, "loss": 0.3032, "step": 2942 }, { "epoch": 0.70042244302969, "grad_norm": 0.37237400358199735, "learning_rate": 7.545489130679581e-06, "loss": 0.3705, "step": 2943 }, { "epoch": 0.7006604391027548, "grad_norm": 0.3714730152118366, "learning_rate": 7.543829844728983e-06, "loss": 0.4293, "step": 2944 }, { "epoch": 0.7008984351758196, "grad_norm": 0.35794214306124905, "learning_rate": 7.542170180702337e-06, "loss": 0.2971, "step": 2945 }, { "epoch": 0.7011364312488844, "grad_norm": 0.43530263361830346, "learning_rate": 7.540510138846313e-06, "loss": 0.324, "step": 2946 }, { "epoch": 0.7013744273219492, "grad_norm": 0.38306179336745, "learning_rate": 7.538849719407632e-06, "loss": 0.3798, "step": 2947 }, { "epoch": 0.701612423395014, "grad_norm": 0.39934257512599863, "learning_rate": 7.5371889226330765e-06, "loss": 0.3281, "step": 2948 }, { "epoch": 0.7018504194680788, "grad_norm": 0.36426837248809085, "learning_rate": 7.53552774876948e-06, "loss": 0.3066, "step": 2949 }, { "epoch": 0.7020884155411435, "grad_norm": 0.4139378891941689, "learning_rate": 7.533866198063734e-06, "loss": 0.3819, "step": 2950 }, { "epoch": 0.7023264116142084, "grad_norm": 0.3662560224348692, "learning_rate": 7.532204270762786e-06, "loss": 0.4475, "step": 2951 }, { "epoch": 0.7025644076872731, "grad_norm": 0.3990867976425942, "learning_rate": 7.530541967113639e-06, "loss": 0.3468, "step": 2952 }, { "epoch": 0.702802403760338, "grad_norm": 0.3856725854345983, "learning_rate": 7.528879287363354e-06, "loss": 0.3259, "step": 2953 }, { "epoch": 0.7030403998334027, "grad_norm": 0.37688679209097037, "learning_rate": 7.527216231759045e-06, "loss": 0.3637, "step": 2954 }, { "epoch": 0.7032783959064676, "grad_norm": 0.3667821119467153, "learning_rate": 7.525552800547883e-06, "loss": 0.3878, "step": 2955 }, { "epoch": 0.7035163919795323, "grad_norm": 0.3753572526176831, "learning_rate": 7.523888993977097e-06, "loss": 0.3284, "step": 2956 }, { "epoch": 0.7037543880525972, "grad_norm": 0.38058502140052836, "learning_rate": 7.522224812293968e-06, "loss": 0.3636, "step": 2957 }, { "epoch": 0.7039923841256619, "grad_norm": 0.38834168887671905, "learning_rate": 7.5205602557458345e-06, "loss": 0.4213, "step": 2958 }, { "epoch": 0.7042303801987267, "grad_norm": 0.37141054900070025, "learning_rate": 7.518895324580091e-06, "loss": 0.3236, "step": 2959 }, { "epoch": 0.7044683762717915, "grad_norm": 0.39065100626003746, "learning_rate": 7.517230019044188e-06, "loss": 0.3049, "step": 2960 }, { "epoch": 0.7047063723448563, "grad_norm": 0.3666524852773353, "learning_rate": 7.51556433938563e-06, "loss": 0.3719, "step": 2961 }, { "epoch": 0.7049443684179211, "grad_norm": 0.3817964753531332, "learning_rate": 7.513898285851982e-06, "loss": 0.4357, "step": 2962 }, { "epoch": 0.7051823644909859, "grad_norm": 0.4198232988530167, "learning_rate": 7.512231858690856e-06, "loss": 0.3348, "step": 2963 }, { "epoch": 0.7054203605640507, "grad_norm": 0.3964784823438321, "learning_rate": 7.510565058149927e-06, "loss": 0.3519, "step": 2964 }, { "epoch": 0.7056583566371155, "grad_norm": 0.34675732231223394, "learning_rate": 7.508897884476921e-06, "loss": 0.3907, "step": 2965 }, { "epoch": 0.7058963527101803, "grad_norm": 0.3578070224969587, "learning_rate": 7.507230337919623e-06, "loss": 0.335, "step": 2966 }, { "epoch": 0.7061343487832451, "grad_norm": 0.42191824768221664, "learning_rate": 7.505562418725869e-06, "loss": 0.296, "step": 2967 }, { "epoch": 0.7063723448563098, "grad_norm": 0.3651458752052437, "learning_rate": 7.503894127143558e-06, "loss": 0.3755, "step": 2968 }, { "epoch": 0.7066103409293747, "grad_norm": 0.4570811159042686, "learning_rate": 7.5022254634206345e-06, "loss": 0.3917, "step": 2969 }, { "epoch": 0.7068483370024394, "grad_norm": 0.34715154210823396, "learning_rate": 7.500556427805106e-06, "loss": 0.3153, "step": 2970 }, { "epoch": 0.7070863330755043, "grad_norm": 0.3947211815602096, "learning_rate": 7.498887020545031e-06, "loss": 0.3417, "step": 2971 }, { "epoch": 0.707324329148569, "grad_norm": 0.37334961366076164, "learning_rate": 7.497217241888525e-06, "loss": 0.4165, "step": 2972 }, { "epoch": 0.7075623252216339, "grad_norm": 0.3509931391175888, "learning_rate": 7.495547092083758e-06, "loss": 0.413, "step": 2973 }, { "epoch": 0.7078003212946986, "grad_norm": 0.4205974782723252, "learning_rate": 7.493876571378958e-06, "loss": 0.2983, "step": 2974 }, { "epoch": 0.7080383173677635, "grad_norm": 0.40308557662169947, "learning_rate": 7.492205680022402e-06, "loss": 0.3522, "step": 2975 }, { "epoch": 0.7082763134408282, "grad_norm": 0.3511659283555987, "learning_rate": 7.490534418262429e-06, "loss": 0.4201, "step": 2976 }, { "epoch": 0.708514309513893, "grad_norm": 0.38333575891336563, "learning_rate": 7.488862786347428e-06, "loss": 0.3414, "step": 2977 }, { "epoch": 0.7087523055869578, "grad_norm": 0.38546643322989155, "learning_rate": 7.487190784525847e-06, "loss": 0.3206, "step": 2978 }, { "epoch": 0.7089903016600226, "grad_norm": 0.36230466619537316, "learning_rate": 7.485518413046185e-06, "loss": 0.3884, "step": 2979 }, { "epoch": 0.7092282977330874, "grad_norm": 0.36360795125731105, "learning_rate": 7.4838456721569975e-06, "loss": 0.3583, "step": 2980 }, { "epoch": 0.7094662938061522, "grad_norm": 0.3836337184101506, "learning_rate": 7.482172562106894e-06, "loss": 0.3268, "step": 2981 }, { "epoch": 0.709704289879217, "grad_norm": 0.4106360537544092, "learning_rate": 7.480499083144544e-06, "loss": 0.3347, "step": 2982 }, { "epoch": 0.7099422859522818, "grad_norm": 0.3748286533906386, "learning_rate": 7.478825235518665e-06, "loss": 0.3911, "step": 2983 }, { "epoch": 0.7101802820253466, "grad_norm": 0.42427243151949334, "learning_rate": 7.477151019478033e-06, "loss": 0.3345, "step": 2984 }, { "epoch": 0.7104182780984114, "grad_norm": 0.3954736604508801, "learning_rate": 7.4754764352714775e-06, "loss": 0.3117, "step": 2985 }, { "epoch": 0.7106562741714761, "grad_norm": 0.35115884372592393, "learning_rate": 7.4738014831478825e-06, "loss": 0.3906, "step": 2986 }, { "epoch": 0.710894270244541, "grad_norm": 0.3945125666106555, "learning_rate": 7.472126163356189e-06, "loss": 0.4087, "step": 2987 }, { "epoch": 0.7111322663176057, "grad_norm": 0.36333273981672193, "learning_rate": 7.47045047614539e-06, "loss": 0.3242, "step": 2988 }, { "epoch": 0.7113702623906706, "grad_norm": 0.3914705133621617, "learning_rate": 7.468774421764534e-06, "loss": 0.3465, "step": 2989 }, { "epoch": 0.7116082584637353, "grad_norm": 0.3823529571498665, "learning_rate": 7.467098000462726e-06, "loss": 0.3816, "step": 2990 }, { "epoch": 0.7118462545368002, "grad_norm": 0.3548613233280991, "learning_rate": 7.465421212489121e-06, "loss": 0.3438, "step": 2991 }, { "epoch": 0.7120842506098649, "grad_norm": 0.39262400367886485, "learning_rate": 7.463744058092932e-06, "loss": 0.352, "step": 2992 }, { "epoch": 0.7123222466829298, "grad_norm": 0.367798301154, "learning_rate": 7.462066537523427e-06, "loss": 0.3347, "step": 2993 }, { "epoch": 0.7125602427559945, "grad_norm": 0.38423044064973816, "learning_rate": 7.460388651029925e-06, "loss": 0.4565, "step": 2994 }, { "epoch": 0.7127982388290593, "grad_norm": 0.3640635090647599, "learning_rate": 7.458710398861802e-06, "loss": 0.3073, "step": 2995 }, { "epoch": 0.7130362349021241, "grad_norm": 0.4006113452209261, "learning_rate": 7.457031781268488e-06, "loss": 0.3733, "step": 2996 }, { "epoch": 0.7132742309751889, "grad_norm": 0.3807254297958794, "learning_rate": 7.455352798499468e-06, "loss": 0.4349, "step": 2997 }, { "epoch": 0.7135122270482537, "grad_norm": 0.39126757394311634, "learning_rate": 7.453673450804279e-06, "loss": 0.3575, "step": 2998 }, { "epoch": 0.7137502231213185, "grad_norm": 0.4052451533875098, "learning_rate": 7.451993738432514e-06, "loss": 0.2873, "step": 2999 }, { "epoch": 0.7139882191943833, "grad_norm": 0.39364111352601927, "learning_rate": 7.450313661633821e-06, "loss": 0.3606, "step": 3000 }, { "epoch": 0.7142262152674481, "grad_norm": 0.40280636612509435, "learning_rate": 7.448633220657901e-06, "loss": 0.383, "step": 3001 }, { "epoch": 0.7144642113405129, "grad_norm": 0.4339937070314304, "learning_rate": 7.4469524157545055e-06, "loss": 0.336, "step": 3002 }, { "epoch": 0.7147022074135777, "grad_norm": 0.40489755454617965, "learning_rate": 7.445271247173449e-06, "loss": 0.3121, "step": 3003 }, { "epoch": 0.7149402034866424, "grad_norm": 0.4041470391859187, "learning_rate": 7.44358971516459e-06, "loss": 0.396, "step": 3004 }, { "epoch": 0.7151781995597073, "grad_norm": 0.39500293731975633, "learning_rate": 7.441907819977849e-06, "loss": 0.4046, "step": 3005 }, { "epoch": 0.715416195632772, "grad_norm": 0.41370890008241495, "learning_rate": 7.440225561863197e-06, "loss": 0.3536, "step": 3006 }, { "epoch": 0.7156541917058369, "grad_norm": 0.3939257241455852, "learning_rate": 7.438542941070657e-06, "loss": 0.3451, "step": 3007 }, { "epoch": 0.7158921877789016, "grad_norm": 0.35452897061585553, "learning_rate": 7.436859957850309e-06, "loss": 0.3992, "step": 3008 }, { "epoch": 0.7161301838519665, "grad_norm": 0.3816527448874161, "learning_rate": 7.435176612452286e-06, "loss": 0.3353, "step": 3009 }, { "epoch": 0.7163681799250312, "grad_norm": 0.41454236361169766, "learning_rate": 7.4334929051267755e-06, "loss": 0.3166, "step": 3010 }, { "epoch": 0.7166061759980961, "grad_norm": 0.3602376065948697, "learning_rate": 7.431808836124018e-06, "loss": 0.3797, "step": 3011 }, { "epoch": 0.7168441720711608, "grad_norm": 0.39512210096179917, "learning_rate": 7.4301244056943075e-06, "loss": 0.4029, "step": 3012 }, { "epoch": 0.7170821681442257, "grad_norm": 0.3827942527690594, "learning_rate": 7.42843961408799e-06, "loss": 0.3173, "step": 3013 }, { "epoch": 0.7173201642172904, "grad_norm": 0.39446387386662557, "learning_rate": 7.426754461555471e-06, "loss": 0.3544, "step": 3014 }, { "epoch": 0.7175581602903552, "grad_norm": 0.35472318916385837, "learning_rate": 7.425068948347204e-06, "loss": 0.4169, "step": 3015 }, { "epoch": 0.71779615636342, "grad_norm": 0.38263262613927496, "learning_rate": 7.423383074713697e-06, "loss": 0.3714, "step": 3016 }, { "epoch": 0.7180341524364848, "grad_norm": 0.3808280361027985, "learning_rate": 7.421696840905515e-06, "loss": 0.2928, "step": 3017 }, { "epoch": 0.7182721485095496, "grad_norm": 0.36752434940666945, "learning_rate": 7.4200102471732704e-06, "loss": 0.3481, "step": 3018 }, { "epoch": 0.7185101445826144, "grad_norm": 0.38919742791659145, "learning_rate": 7.4183232937676375e-06, "loss": 0.391, "step": 3019 }, { "epoch": 0.7187481406556792, "grad_norm": 0.37374333756044126, "learning_rate": 7.416635980939335e-06, "loss": 0.348, "step": 3020 }, { "epoch": 0.718986136728744, "grad_norm": 0.36920174596848016, "learning_rate": 7.414948308939141e-06, "loss": 0.3441, "step": 3021 }, { "epoch": 0.7192241328018087, "grad_norm": 0.3899921201145649, "learning_rate": 7.413260278017887e-06, "loss": 0.3838, "step": 3022 }, { "epoch": 0.7194621288748736, "grad_norm": 0.43144843112558834, "learning_rate": 7.411571888426452e-06, "loss": 0.3486, "step": 3023 }, { "epoch": 0.7197001249479383, "grad_norm": 0.3911155710827359, "learning_rate": 7.4098831404157765e-06, "loss": 0.3315, "step": 3024 }, { "epoch": 0.7199381210210032, "grad_norm": 0.42440357958109975, "learning_rate": 7.408194034236849e-06, "loss": 0.3736, "step": 3025 }, { "epoch": 0.7201761170940679, "grad_norm": 0.3680061754320904, "learning_rate": 7.40650457014071e-06, "loss": 0.3912, "step": 3026 }, { "epoch": 0.7204141131671328, "grad_norm": 0.3830697542132779, "learning_rate": 7.404814748378461e-06, "loss": 0.3287, "step": 3027 }, { "epoch": 0.7206521092401975, "grad_norm": 0.37264948361322325, "learning_rate": 7.403124569201246e-06, "loss": 0.3231, "step": 3028 }, { "epoch": 0.7208901053132624, "grad_norm": 0.42134581649859754, "learning_rate": 7.4014340328602685e-06, "loss": 0.3565, "step": 3029 }, { "epoch": 0.7211281013863271, "grad_norm": 0.41130909439310925, "learning_rate": 7.399743139606788e-06, "loss": 0.4445, "step": 3030 }, { "epoch": 0.721366097459392, "grad_norm": 0.4343180622590514, "learning_rate": 7.398051889692108e-06, "loss": 0.3357, "step": 3031 }, { "epoch": 0.7216040935324567, "grad_norm": 0.39112408012774896, "learning_rate": 7.396360283367594e-06, "loss": 0.3501, "step": 3032 }, { "epoch": 0.7218420896055215, "grad_norm": 0.3354333543079656, "learning_rate": 7.394668320884658e-06, "loss": 0.3829, "step": 3033 }, { "epoch": 0.7220800856785863, "grad_norm": 0.34658725353941033, "learning_rate": 7.392976002494768e-06, "loss": 0.3088, "step": 3034 }, { "epoch": 0.7223180817516511, "grad_norm": 0.42267425856173196, "learning_rate": 7.391283328449445e-06, "loss": 0.3284, "step": 3035 }, { "epoch": 0.7225560778247159, "grad_norm": 0.3696528618085508, "learning_rate": 7.389590299000262e-06, "loss": 0.3727, "step": 3036 }, { "epoch": 0.7227940738977807, "grad_norm": 0.37189620964899023, "learning_rate": 7.387896914398845e-06, "loss": 0.4148, "step": 3037 }, { "epoch": 0.7230320699708455, "grad_norm": 0.4135419215724609, "learning_rate": 7.386203174896872e-06, "loss": 0.3078, "step": 3038 }, { "epoch": 0.7232700660439103, "grad_norm": 0.3992204444214033, "learning_rate": 7.384509080746076e-06, "loss": 0.3575, "step": 3039 }, { "epoch": 0.723508062116975, "grad_norm": 0.35922624214255505, "learning_rate": 7.382814632198241e-06, "loss": 0.4262, "step": 3040 }, { "epoch": 0.7237460581900399, "grad_norm": 0.3793102673645999, "learning_rate": 7.381119829505204e-06, "loss": 0.365, "step": 3041 }, { "epoch": 0.7239840542631046, "grad_norm": 0.3944308333847217, "learning_rate": 7.379424672918853e-06, "loss": 0.2977, "step": 3042 }, { "epoch": 0.7242220503361695, "grad_norm": 0.36373682832023985, "learning_rate": 7.377729162691131e-06, "loss": 0.3509, "step": 3043 }, { "epoch": 0.7244600464092342, "grad_norm": 0.3627646085804695, "learning_rate": 7.376033299074035e-06, "loss": 0.4184, "step": 3044 }, { "epoch": 0.7246980424822991, "grad_norm": 0.3871761600279167, "learning_rate": 7.3743370823196096e-06, "loss": 0.3216, "step": 3045 }, { "epoch": 0.7249360385553638, "grad_norm": 0.3798739905567255, "learning_rate": 7.372640512679955e-06, "loss": 0.3456, "step": 3046 }, { "epoch": 0.7251740346284287, "grad_norm": 0.4259458917487435, "learning_rate": 7.370943590407225e-06, "loss": 0.4103, "step": 3047 }, { "epoch": 0.7254120307014934, "grad_norm": 0.3626974255553493, "learning_rate": 7.369246315753623e-06, "loss": 0.3433, "step": 3048 }, { "epoch": 0.7256500267745583, "grad_norm": 0.4435787230045141, "learning_rate": 7.367548688971407e-06, "loss": 0.2972, "step": 3049 }, { "epoch": 0.725888022847623, "grad_norm": 0.3554006057479478, "learning_rate": 7.365850710312883e-06, "loss": 0.3688, "step": 3050 }, { "epoch": 0.7261260189206878, "grad_norm": 0.36442397072863186, "learning_rate": 7.364152380030416e-06, "loss": 0.4098, "step": 3051 }, { "epoch": 0.7263640149937526, "grad_norm": 0.4410087294940679, "learning_rate": 7.3624536983764195e-06, "loss": 0.3272, "step": 3052 }, { "epoch": 0.7266020110668174, "grad_norm": 0.4282754810487593, "learning_rate": 7.3607546656033594e-06, "loss": 0.3241, "step": 3053 }, { "epoch": 0.7268400071398822, "grad_norm": 0.4083398243189906, "learning_rate": 7.359055281963753e-06, "loss": 0.3797, "step": 3054 }, { "epoch": 0.727078003212947, "grad_norm": 0.3965480234646783, "learning_rate": 7.357355547710172e-06, "loss": 0.3904, "step": 3055 }, { "epoch": 0.7273159992860118, "grad_norm": 0.42026409912961626, "learning_rate": 7.355655463095239e-06, "loss": 0.3146, "step": 3056 }, { "epoch": 0.7275539953590766, "grad_norm": 0.3771142416353585, "learning_rate": 7.3539550283716265e-06, "loss": 0.3445, "step": 3057 }, { "epoch": 0.7277919914321413, "grad_norm": 0.3551162990197736, "learning_rate": 7.352254243792064e-06, "loss": 0.4232, "step": 3058 }, { "epoch": 0.7280299875052062, "grad_norm": 0.4085035997854665, "learning_rate": 7.350553109609329e-06, "loss": 0.3106, "step": 3059 }, { "epoch": 0.7282679835782709, "grad_norm": 0.3847757510606681, "learning_rate": 7.348851626076252e-06, "loss": 0.3063, "step": 3060 }, { "epoch": 0.7285059796513358, "grad_norm": 0.36865435960657855, "learning_rate": 7.347149793445715e-06, "loss": 0.3544, "step": 3061 }, { "epoch": 0.7287439757244005, "grad_norm": 0.3922363467897953, "learning_rate": 7.345447611970653e-06, "loss": 0.3822, "step": 3062 }, { "epoch": 0.7289819717974654, "grad_norm": 0.34369944175304884, "learning_rate": 7.3437450819040536e-06, "loss": 0.3206, "step": 3063 }, { "epoch": 0.7292199678705301, "grad_norm": 0.35824405322282227, "learning_rate": 7.342042203498952e-06, "loss": 0.3554, "step": 3064 }, { "epoch": 0.729457963943595, "grad_norm": 0.39512318030878885, "learning_rate": 7.34033897700844e-06, "loss": 0.3839, "step": 3065 }, { "epoch": 0.7296959600166597, "grad_norm": 0.38854090842538846, "learning_rate": 7.338635402685659e-06, "loss": 0.3378, "step": 3066 }, { "epoch": 0.7299339560897246, "grad_norm": 0.38073840280648125, "learning_rate": 7.336931480783801e-06, "loss": 0.3343, "step": 3067 }, { "epoch": 0.7301719521627893, "grad_norm": 0.3869439775508733, "learning_rate": 7.335227211556113e-06, "loss": 0.3474, "step": 3068 }, { "epoch": 0.7304099482358541, "grad_norm": 0.36331918337163177, "learning_rate": 7.3335225952558904e-06, "loss": 0.4093, "step": 3069 }, { "epoch": 0.7306479443089189, "grad_norm": 0.3676892295018712, "learning_rate": 7.3318176321364835e-06, "loss": 0.3082, "step": 3070 }, { "epoch": 0.7308859403819837, "grad_norm": 0.37227813295867007, "learning_rate": 7.330112322451287e-06, "loss": 0.3266, "step": 3071 }, { "epoch": 0.7311239364550485, "grad_norm": 0.37682897636401325, "learning_rate": 7.328406666453757e-06, "loss": 0.4061, "step": 3072 }, { "epoch": 0.7313619325281133, "grad_norm": 0.43099604922414647, "learning_rate": 7.326700664397395e-06, "loss": 0.3787, "step": 3073 }, { "epoch": 0.7315999286011781, "grad_norm": 0.38646511617724494, "learning_rate": 7.324994316535753e-06, "loss": 0.3184, "step": 3074 }, { "epoch": 0.7318379246742429, "grad_norm": 0.3745081198422119, "learning_rate": 7.323287623122439e-06, "loss": 0.3465, "step": 3075 }, { "epoch": 0.7320759207473077, "grad_norm": 0.3868492244329305, "learning_rate": 7.321580584411108e-06, "loss": 0.4354, "step": 3076 }, { "epoch": 0.7323139168203725, "grad_norm": 0.3746511813825389, "learning_rate": 7.31987320065547e-06, "loss": 0.3515, "step": 3077 }, { "epoch": 0.7325519128934372, "grad_norm": 0.46299638156809153, "learning_rate": 7.318165472109282e-06, "loss": 0.3055, "step": 3078 }, { "epoch": 0.7327899089665021, "grad_norm": 0.4037577857446162, "learning_rate": 7.3164573990263574e-06, "loss": 0.3781, "step": 3079 }, { "epoch": 0.7330279050395668, "grad_norm": 0.3996195774993303, "learning_rate": 7.314748981660555e-06, "loss": 0.3987, "step": 3080 }, { "epoch": 0.7332659011126317, "grad_norm": 0.3936618750557137, "learning_rate": 7.313040220265792e-06, "loss": 0.3083, "step": 3081 }, { "epoch": 0.7335038971856964, "grad_norm": 0.38121369149338524, "learning_rate": 7.31133111509603e-06, "loss": 0.3229, "step": 3082 }, { "epoch": 0.7337418932587613, "grad_norm": 0.388825782352307, "learning_rate": 7.309621666405284e-06, "loss": 0.4128, "step": 3083 }, { "epoch": 0.733979889331826, "grad_norm": 0.3715499437212395, "learning_rate": 7.307911874447622e-06, "loss": 0.3383, "step": 3084 }, { "epoch": 0.7342178854048909, "grad_norm": 0.4232766232253306, "learning_rate": 7.306201739477159e-06, "loss": 0.3137, "step": 3085 }, { "epoch": 0.7344558814779556, "grad_norm": 0.345889539039532, "learning_rate": 7.304491261748067e-06, "loss": 0.3484, "step": 3086 }, { "epoch": 0.7346938775510204, "grad_norm": 0.37961353019617056, "learning_rate": 7.302780441514561e-06, "loss": 0.3989, "step": 3087 }, { "epoch": 0.7349318736240852, "grad_norm": 0.3772100128276546, "learning_rate": 7.3010692790309145e-06, "loss": 0.3109, "step": 3088 }, { "epoch": 0.73516986969715, "grad_norm": 0.4148043405953893, "learning_rate": 7.2993577745514475e-06, "loss": 0.339, "step": 3089 }, { "epoch": 0.7354078657702148, "grad_norm": 0.4041915618136653, "learning_rate": 7.2976459283305326e-06, "loss": 0.3987, "step": 3090 }, { "epoch": 0.7356458618432796, "grad_norm": 0.37070530009594416, "learning_rate": 7.2959337406225894e-06, "loss": 0.3478, "step": 3091 }, { "epoch": 0.7358838579163444, "grad_norm": 0.3405354595093119, "learning_rate": 7.294221211682096e-06, "loss": 0.3366, "step": 3092 }, { "epoch": 0.7361218539894092, "grad_norm": 0.36518838895931155, "learning_rate": 7.292508341763574e-06, "loss": 0.3426, "step": 3093 }, { "epoch": 0.736359850062474, "grad_norm": 0.37822344257306156, "learning_rate": 7.290795131121595e-06, "loss": 0.3879, "step": 3094 }, { "epoch": 0.7365978461355388, "grad_norm": 0.3730174664708697, "learning_rate": 7.289081580010792e-06, "loss": 0.3241, "step": 3095 }, { "epoch": 0.7368358422086035, "grad_norm": 0.360220342804819, "learning_rate": 7.287367688685835e-06, "loss": 0.3325, "step": 3096 }, { "epoch": 0.7370738382816684, "grad_norm": 0.36585884890518433, "learning_rate": 7.285653457401453e-06, "loss": 0.3872, "step": 3097 }, { "epoch": 0.7373118343547331, "grad_norm": 0.5429383224292799, "learning_rate": 7.283938886412424e-06, "loss": 0.3578, "step": 3098 }, { "epoch": 0.737549830427798, "grad_norm": 0.41494686957593174, "learning_rate": 7.2822239759735735e-06, "loss": 0.3477, "step": 3099 }, { "epoch": 0.7377878265008627, "grad_norm": 0.3816939297486696, "learning_rate": 7.280508726339781e-06, "loss": 0.3684, "step": 3100 }, { "epoch": 0.7380258225739276, "grad_norm": 0.37029991676249097, "learning_rate": 7.278793137765976e-06, "loss": 0.4007, "step": 3101 }, { "epoch": 0.7382638186469923, "grad_norm": 0.4339643261053269, "learning_rate": 7.277077210507135e-06, "loss": 0.3341, "step": 3102 }, { "epoch": 0.7385018147200572, "grad_norm": 0.37177092531956185, "learning_rate": 7.2753609448182885e-06, "loss": 0.3178, "step": 3103 }, { "epoch": 0.7387398107931219, "grad_norm": 0.36083182065635616, "learning_rate": 7.273644340954515e-06, "loss": 0.3974, "step": 3104 }, { "epoch": 0.7389778068661867, "grad_norm": 0.3742211097385228, "learning_rate": 7.271927399170946e-06, "loss": 0.3764, "step": 3105 }, { "epoch": 0.7392158029392515, "grad_norm": 0.39314424003897525, "learning_rate": 7.270210119722761e-06, "loss": 0.3178, "step": 3106 }, { "epoch": 0.7394537990123163, "grad_norm": 0.387246764459355, "learning_rate": 7.2684925028651875e-06, "loss": 0.3463, "step": 3107 }, { "epoch": 0.7396917950853811, "grad_norm": 0.3455422305424231, "learning_rate": 7.26677454885351e-06, "loss": 0.4018, "step": 3108 }, { "epoch": 0.7399297911584459, "grad_norm": 0.3680423220780498, "learning_rate": 7.265056257943059e-06, "loss": 0.3318, "step": 3109 }, { "epoch": 0.7401677872315107, "grad_norm": 0.4218016718701506, "learning_rate": 7.2633376303892115e-06, "loss": 0.3327, "step": 3110 }, { "epoch": 0.7404057833045755, "grad_norm": 0.4797971091280992, "learning_rate": 7.2616186664474e-06, "loss": 0.3781, "step": 3111 }, { "epoch": 0.7406437793776403, "grad_norm": 0.3656564240573133, "learning_rate": 7.259899366373105e-06, "loss": 0.3589, "step": 3112 }, { "epoch": 0.7408817754507051, "grad_norm": 0.39727650704719514, "learning_rate": 7.258179730421856e-06, "loss": 0.302, "step": 3113 }, { "epoch": 0.7411197715237698, "grad_norm": 0.3854240175463639, "learning_rate": 7.256459758849236e-06, "loss": 0.3257, "step": 3114 }, { "epoch": 0.7413577675968347, "grad_norm": 0.35574522570705447, "learning_rate": 7.254739451910872e-06, "loss": 0.3877, "step": 3115 }, { "epoch": 0.7415957636698994, "grad_norm": 0.4630875140371236, "learning_rate": 7.253018809862448e-06, "loss": 0.3405, "step": 3116 }, { "epoch": 0.7418337597429643, "grad_norm": 0.41595067569227445, "learning_rate": 7.251297832959691e-06, "loss": 0.3055, "step": 3117 }, { "epoch": 0.742071755816029, "grad_norm": 0.35751446775150947, "learning_rate": 7.249576521458381e-06, "loss": 0.3468, "step": 3118 }, { "epoch": 0.7423097518890939, "grad_norm": 0.3782939975585627, "learning_rate": 7.247854875614348e-06, "loss": 0.4108, "step": 3119 }, { "epoch": 0.7425477479621586, "grad_norm": 0.36443090923466487, "learning_rate": 7.246132895683472e-06, "loss": 0.3252, "step": 3120 }, { "epoch": 0.7427857440352235, "grad_norm": 0.3521241114001235, "learning_rate": 7.244410581921679e-06, "loss": 0.3367, "step": 3121 }, { "epoch": 0.7430237401082882, "grad_norm": 0.40051085069078274, "learning_rate": 7.242687934584952e-06, "loss": 0.3951, "step": 3122 }, { "epoch": 0.743261736181353, "grad_norm": 0.44680675909155915, "learning_rate": 7.2409649539293155e-06, "loss": 0.3604, "step": 3123 }, { "epoch": 0.7434997322544178, "grad_norm": 0.41922316103507395, "learning_rate": 7.239241640210849e-06, "loss": 0.3254, "step": 3124 }, { "epoch": 0.7437377283274826, "grad_norm": 0.3777661176299726, "learning_rate": 7.2375179936856775e-06, "loss": 0.371, "step": 3125 }, { "epoch": 0.7439757244005474, "grad_norm": 0.37648805520020007, "learning_rate": 7.235794014609978e-06, "loss": 0.4046, "step": 3126 }, { "epoch": 0.7442137204736122, "grad_norm": 0.410705623333718, "learning_rate": 7.234069703239979e-06, "loss": 0.3264, "step": 3127 }, { "epoch": 0.744451716546677, "grad_norm": 0.36702681685700017, "learning_rate": 7.2323450598319535e-06, "loss": 0.3487, "step": 3128 }, { "epoch": 0.7446897126197418, "grad_norm": 0.3670128177868576, "learning_rate": 7.230620084642226e-06, "loss": 0.3881, "step": 3129 }, { "epoch": 0.7449277086928066, "grad_norm": 0.40207445243333073, "learning_rate": 7.228894777927171e-06, "loss": 0.3942, "step": 3130 }, { "epoch": 0.7451657047658714, "grad_norm": 0.40080124947818796, "learning_rate": 7.227169139943211e-06, "loss": 0.3075, "step": 3131 }, { "epoch": 0.7454037008389361, "grad_norm": 0.39628761333956425, "learning_rate": 7.22544317094682e-06, "loss": 0.3387, "step": 3132 }, { "epoch": 0.745641696912001, "grad_norm": 0.37788202720786135, "learning_rate": 7.223716871194519e-06, "loss": 0.4307, "step": 3133 }, { "epoch": 0.7458796929850657, "grad_norm": 0.37196320757312157, "learning_rate": 7.221990240942878e-06, "loss": 0.3459, "step": 3134 }, { "epoch": 0.7461176890581306, "grad_norm": 0.37988100060665997, "learning_rate": 7.220263280448518e-06, "loss": 0.2937, "step": 3135 }, { "epoch": 0.7463556851311953, "grad_norm": 0.37254514931619725, "learning_rate": 7.21853598996811e-06, "loss": 0.4057, "step": 3136 }, { "epoch": 0.7465936812042602, "grad_norm": 0.3900480382097283, "learning_rate": 7.216808369758368e-06, "loss": 0.381, "step": 3137 }, { "epoch": 0.7468316772773249, "grad_norm": 0.3796183272267863, "learning_rate": 7.215080420076061e-06, "loss": 0.3289, "step": 3138 }, { "epoch": 0.7470696733503898, "grad_norm": 0.3806521308902775, "learning_rate": 7.2133521411780075e-06, "loss": 0.3483, "step": 3139 }, { "epoch": 0.7473076694234545, "grad_norm": 0.38838395980321355, "learning_rate": 7.211623533321067e-06, "loss": 0.3974, "step": 3140 }, { "epoch": 0.7475456654965194, "grad_norm": 0.41947045054774285, "learning_rate": 7.209894596762158e-06, "loss": 0.3447, "step": 3141 }, { "epoch": 0.7477836615695841, "grad_norm": 0.3540371480756213, "learning_rate": 7.2081653317582414e-06, "loss": 0.3406, "step": 3142 }, { "epoch": 0.7480216576426489, "grad_norm": 0.3826391985417264, "learning_rate": 7.20643573856633e-06, "loss": 0.3343, "step": 3143 }, { "epoch": 0.7482596537157137, "grad_norm": 0.3538807128710463, "learning_rate": 7.204705817443483e-06, "loss": 0.4207, "step": 3144 }, { "epoch": 0.7484976497887785, "grad_norm": 0.352020290864496, "learning_rate": 7.202975568646809e-06, "loss": 0.3251, "step": 3145 }, { "epoch": 0.7487356458618433, "grad_norm": 0.39868157169625523, "learning_rate": 7.201244992433466e-06, "loss": 0.3339, "step": 3146 }, { "epoch": 0.7489736419349081, "grad_norm": 0.4023017435952385, "learning_rate": 7.199514089060662e-06, "loss": 0.3981, "step": 3147 }, { "epoch": 0.7492116380079729, "grad_norm": 0.3741576715187835, "learning_rate": 7.19778285878565e-06, "loss": 0.3561, "step": 3148 }, { "epoch": 0.7494496340810377, "grad_norm": 0.4709729162057381, "learning_rate": 7.196051301865736e-06, "loss": 0.3399, "step": 3149 }, { "epoch": 0.7496876301541024, "grad_norm": 0.41862196677750924, "learning_rate": 7.19431941855827e-06, "loss": 0.3235, "step": 3150 }, { "epoch": 0.7499256262271673, "grad_norm": 0.36071859439792425, "learning_rate": 7.192587209120654e-06, "loss": 0.3893, "step": 3151 }, { "epoch": 0.750163622300232, "grad_norm": 0.3699965349983255, "learning_rate": 7.190854673810337e-06, "loss": 0.3109, "step": 3152 }, { "epoch": 0.7504016183732969, "grad_norm": 0.42095108252199576, "learning_rate": 7.189121812884816e-06, "loss": 0.3121, "step": 3153 }, { "epoch": 0.7506396144463616, "grad_norm": 0.37196059546980936, "learning_rate": 7.1873886266016365e-06, "loss": 0.3859, "step": 3154 }, { "epoch": 0.7508776105194265, "grad_norm": 0.3728280714448741, "learning_rate": 7.185655115218395e-06, "loss": 0.3766, "step": 3155 }, { "epoch": 0.7511156065924912, "grad_norm": 0.37533698264848414, "learning_rate": 7.183921278992731e-06, "loss": 0.2943, "step": 3156 }, { "epoch": 0.7513536026655561, "grad_norm": 0.3918689428403605, "learning_rate": 7.18218711818234e-06, "loss": 0.3503, "step": 3157 }, { "epoch": 0.7515915987386208, "grad_norm": 0.42511294665911836, "learning_rate": 7.180452633044958e-06, "loss": 0.4205, "step": 3158 }, { "epoch": 0.7518295948116857, "grad_norm": 0.39558443408700916, "learning_rate": 7.178717823838371e-06, "loss": 0.3445, "step": 3159 }, { "epoch": 0.7520675908847504, "grad_norm": 0.3861088188015497, "learning_rate": 7.176982690820418e-06, "loss": 0.3184, "step": 3160 }, { "epoch": 0.7523055869578152, "grad_norm": 0.3598250325709718, "learning_rate": 7.175247234248979e-06, "loss": 0.3543, "step": 3161 }, { "epoch": 0.75254358303088, "grad_norm": 0.38572071033062194, "learning_rate": 7.173511454381991e-06, "loss": 0.3887, "step": 3162 }, { "epoch": 0.7527815791039448, "grad_norm": 0.3597558768595933, "learning_rate": 7.171775351477429e-06, "loss": 0.3176, "step": 3163 }, { "epoch": 0.7530195751770096, "grad_norm": 0.3519928195184308, "learning_rate": 7.170038925793323e-06, "loss": 0.3598, "step": 3164 }, { "epoch": 0.7532575712500744, "grad_norm": 0.40143875905094994, "learning_rate": 7.16830217758775e-06, "loss": 0.405, "step": 3165 }, { "epoch": 0.7534955673231392, "grad_norm": 0.39193156061116613, "learning_rate": 7.16656510711883e-06, "loss": 0.3403, "step": 3166 }, { "epoch": 0.753733563396204, "grad_norm": 0.38720279747566255, "learning_rate": 7.164827714644738e-06, "loss": 0.292, "step": 3167 }, { "epoch": 0.7539715594692687, "grad_norm": 0.36467759519856513, "learning_rate": 7.163090000423691e-06, "loss": 0.3451, "step": 3168 }, { "epoch": 0.7542095555423336, "grad_norm": 0.41427804995589623, "learning_rate": 7.161351964713959e-06, "loss": 0.4082, "step": 3169 }, { "epoch": 0.7544475516153983, "grad_norm": 0.38192749026353107, "learning_rate": 7.159613607773857e-06, "loss": 0.3061, "step": 3170 }, { "epoch": 0.7546855476884632, "grad_norm": 0.43361143142318026, "learning_rate": 7.157874929861745e-06, "loss": 0.3096, "step": 3171 }, { "epoch": 0.7549235437615279, "grad_norm": 0.3554010401349325, "learning_rate": 7.156135931236034e-06, "loss": 0.4144, "step": 3172 }, { "epoch": 0.7551615398345928, "grad_norm": 0.39011150823679097, "learning_rate": 7.1543966121551845e-06, "loss": 0.3801, "step": 3173 }, { "epoch": 0.7553995359076575, "grad_norm": 0.3817077245262734, "learning_rate": 7.152656972877702e-06, "loss": 0.2873, "step": 3174 }, { "epoch": 0.7556375319807224, "grad_norm": 0.3720557616381678, "learning_rate": 7.150917013662138e-06, "loss": 0.3617, "step": 3175 }, { "epoch": 0.7558755280537871, "grad_norm": 0.34769187060464707, "learning_rate": 7.149176734767095e-06, "loss": 0.4133, "step": 3176 }, { "epoch": 0.756113524126852, "grad_norm": 0.3835724087378952, "learning_rate": 7.147436136451221e-06, "loss": 0.325, "step": 3177 }, { "epoch": 0.7563515201999167, "grad_norm": 0.37749841830040526, "learning_rate": 7.145695218973213e-06, "loss": 0.3102, "step": 3178 }, { "epoch": 0.7565895162729815, "grad_norm": 0.374520542733197, "learning_rate": 7.143953982591813e-06, "loss": 0.3978, "step": 3179 }, { "epoch": 0.7568275123460463, "grad_norm": 0.4023541368854894, "learning_rate": 7.142212427565812e-06, "loss": 0.3807, "step": 3180 }, { "epoch": 0.7570655084191111, "grad_norm": 0.35873098883174603, "learning_rate": 7.140470554154048e-06, "loss": 0.3559, "step": 3181 }, { "epoch": 0.7573035044921759, "grad_norm": 0.3867236526972635, "learning_rate": 7.138728362615408e-06, "loss": 0.3331, "step": 3182 }, { "epoch": 0.7575415005652407, "grad_norm": 0.34735983378932106, "learning_rate": 7.136985853208824e-06, "loss": 0.398, "step": 3183 }, { "epoch": 0.7577794966383055, "grad_norm": 0.38443189061398053, "learning_rate": 7.135243026193275e-06, "loss": 0.3334, "step": 3184 }, { "epoch": 0.7580174927113703, "grad_norm": 0.3821702158023499, "learning_rate": 7.13349988182779e-06, "loss": 0.3129, "step": 3185 }, { "epoch": 0.758255488784435, "grad_norm": 0.3569641423841968, "learning_rate": 7.131756420371441e-06, "loss": 0.38, "step": 3186 }, { "epoch": 0.7584934848574999, "grad_norm": 0.40219483194623007, "learning_rate": 7.130012642083351e-06, "loss": 0.4149, "step": 3187 }, { "epoch": 0.7587314809305646, "grad_norm": 0.3708101360867531, "learning_rate": 7.128268547222688e-06, "loss": 0.3625, "step": 3188 }, { "epoch": 0.7589694770036295, "grad_norm": 0.37603123822726897, "learning_rate": 7.126524136048669e-06, "loss": 0.33, "step": 3189 }, { "epoch": 0.7592074730766942, "grad_norm": 0.3625022362919271, "learning_rate": 7.124779408820555e-06, "loss": 0.4318, "step": 3190 }, { "epoch": 0.7594454691497591, "grad_norm": 0.39739967957247313, "learning_rate": 7.123034365797657e-06, "loss": 0.3272, "step": 3191 }, { "epoch": 0.7596834652228238, "grad_norm": 0.3702784091648422, "learning_rate": 7.121289007239331e-06, "loss": 0.3268, "step": 3192 }, { "epoch": 0.7599214612958887, "grad_norm": 0.3516224416164428, "learning_rate": 7.119543333404981e-06, "loss": 0.3504, "step": 3193 }, { "epoch": 0.7601594573689534, "grad_norm": 0.3559370032742599, "learning_rate": 7.117797344554056e-06, "loss": 0.4026, "step": 3194 }, { "epoch": 0.7603974534420183, "grad_norm": 0.3734187380388612, "learning_rate": 7.116051040946053e-06, "loss": 0.3204, "step": 3195 }, { "epoch": 0.760635449515083, "grad_norm": 0.3579819166208651, "learning_rate": 7.114304422840517e-06, "loss": 0.3277, "step": 3196 }, { "epoch": 0.7608734455881478, "grad_norm": 0.3752870062603591, "learning_rate": 7.112557490497038e-06, "loss": 0.3955, "step": 3197 }, { "epoch": 0.7611114416612126, "grad_norm": 0.3707029313698444, "learning_rate": 7.1108102441752546e-06, "loss": 0.3666, "step": 3198 }, { "epoch": 0.7613494377342774, "grad_norm": 0.3867018314993227, "learning_rate": 7.109062684134851e-06, "loss": 0.3063, "step": 3199 }, { "epoch": 0.7615874338073422, "grad_norm": 0.40876052113414646, "learning_rate": 7.107314810635555e-06, "loss": 0.3568, "step": 3200 }, { "epoch": 0.761825429880407, "grad_norm": 0.4030334876778494, "learning_rate": 7.105566623937145e-06, "loss": 0.4414, "step": 3201 }, { "epoch": 0.7620634259534718, "grad_norm": 0.36214917368846855, "learning_rate": 7.103818124299446e-06, "loss": 0.3108, "step": 3202 }, { "epoch": 0.7623014220265366, "grad_norm": 0.36698016620497026, "learning_rate": 7.102069311982329e-06, "loss": 0.3015, "step": 3203 }, { "epoch": 0.7625394180996014, "grad_norm": 0.368871180085732, "learning_rate": 7.100320187245711e-06, "loss": 0.3934, "step": 3204 }, { "epoch": 0.7627774141726662, "grad_norm": 0.39902520639387057, "learning_rate": 7.098570750349552e-06, "loss": 0.3871, "step": 3205 }, { "epoch": 0.7630154102457309, "grad_norm": 0.37446900240784753, "learning_rate": 7.096821001553863e-06, "loss": 0.3109, "step": 3206 }, { "epoch": 0.7632534063187958, "grad_norm": 0.33881592006301775, "learning_rate": 7.0950709411187e-06, "loss": 0.3471, "step": 3207 }, { "epoch": 0.7634914023918605, "grad_norm": 0.372761489856163, "learning_rate": 7.093320569304168e-06, "loss": 0.4049, "step": 3208 }, { "epoch": 0.7637293984649254, "grad_norm": 0.4017927278263893, "learning_rate": 7.0915698863704094e-06, "loss": 0.3293, "step": 3209 }, { "epoch": 0.7639673945379901, "grad_norm": 0.4054778790918442, "learning_rate": 7.089818892577625e-06, "loss": 0.335, "step": 3210 }, { "epoch": 0.764205390611055, "grad_norm": 0.3850085404080853, "learning_rate": 7.088067588186053e-06, "loss": 0.3492, "step": 3211 }, { "epoch": 0.7644433866841197, "grad_norm": 0.37942848234718374, "learning_rate": 7.086315973455982e-06, "loss": 0.4199, "step": 3212 }, { "epoch": 0.7646813827571846, "grad_norm": 0.4002793707382145, "learning_rate": 7.084564048647742e-06, "loss": 0.3223, "step": 3213 }, { "epoch": 0.7649193788302493, "grad_norm": 0.3758467906510097, "learning_rate": 7.082811814021717e-06, "loss": 0.3411, "step": 3214 }, { "epoch": 0.7651573749033141, "grad_norm": 0.38353128527707686, "learning_rate": 7.08105926983833e-06, "loss": 0.4176, "step": 3215 }, { "epoch": 0.7653953709763789, "grad_norm": 0.41062374283228403, "learning_rate": 7.0793064163580515e-06, "loss": 0.3443, "step": 3216 }, { "epoch": 0.7656333670494437, "grad_norm": 0.38600351854755544, "learning_rate": 7.0775532538414005e-06, "loss": 0.3275, "step": 3217 }, { "epoch": 0.7658713631225085, "grad_norm": 0.3588188716781081, "learning_rate": 7.0757997825489395e-06, "loss": 0.368, "step": 3218 }, { "epoch": 0.7661093591955733, "grad_norm": 0.3582749191881199, "learning_rate": 7.074046002741279e-06, "loss": 0.3938, "step": 3219 }, { "epoch": 0.7663473552686381, "grad_norm": 0.3597466282489841, "learning_rate": 7.072291914679072e-06, "loss": 0.3072, "step": 3220 }, { "epoch": 0.7665853513417029, "grad_norm": 0.3920888152123116, "learning_rate": 7.070537518623022e-06, "loss": 0.2984, "step": 3221 }, { "epoch": 0.7668233474147677, "grad_norm": 0.3641437351355874, "learning_rate": 7.068782814833872e-06, "loss": 0.3744, "step": 3222 }, { "epoch": 0.7670613434878325, "grad_norm": 0.37937338820809674, "learning_rate": 7.067027803572417e-06, "loss": 0.3666, "step": 3223 }, { "epoch": 0.7672993395608972, "grad_norm": 0.40998865963471826, "learning_rate": 7.065272485099496e-06, "loss": 0.2939, "step": 3224 }, { "epoch": 0.7675373356339621, "grad_norm": 0.4023210745015225, "learning_rate": 7.06351685967599e-06, "loss": 0.3818, "step": 3225 }, { "epoch": 0.7677753317070268, "grad_norm": 0.37345454403274114, "learning_rate": 7.061760927562831e-06, "loss": 0.4636, "step": 3226 }, { "epoch": 0.7680133277800917, "grad_norm": 0.4223720881715161, "learning_rate": 7.060004689020991e-06, "loss": 0.3236, "step": 3227 }, { "epoch": 0.7682513238531564, "grad_norm": 0.40982795591831794, "learning_rate": 7.058248144311493e-06, "loss": 0.3324, "step": 3228 }, { "epoch": 0.7684893199262213, "grad_norm": 0.3557070641164775, "learning_rate": 7.056491293695401e-06, "loss": 0.3833, "step": 3229 }, { "epoch": 0.768727315999286, "grad_norm": 0.3797321346430915, "learning_rate": 7.05473413743383e-06, "loss": 0.3552, "step": 3230 }, { "epoch": 0.7689653120723509, "grad_norm": 0.4115569949091365, "learning_rate": 7.052976675787932e-06, "loss": 0.3028, "step": 3231 }, { "epoch": 0.7692033081454156, "grad_norm": 0.42313222429594644, "learning_rate": 7.051218909018913e-06, "loss": 0.36, "step": 3232 }, { "epoch": 0.7694413042184804, "grad_norm": 0.3576077007683352, "learning_rate": 7.04946083738802e-06, "loss": 0.3817, "step": 3233 }, { "epoch": 0.7696793002915452, "grad_norm": 0.4065000621443697, "learning_rate": 7.047702461156545e-06, "loss": 0.3447, "step": 3234 }, { "epoch": 0.76991729636461, "grad_norm": 0.38063058594261784, "learning_rate": 7.045943780585826e-06, "loss": 0.2826, "step": 3235 }, { "epoch": 0.7701552924376748, "grad_norm": 0.410395519650393, "learning_rate": 7.044184795937248e-06, "loss": 0.3782, "step": 3236 }, { "epoch": 0.7703932885107396, "grad_norm": 0.38149692275265434, "learning_rate": 7.042425507472237e-06, "loss": 0.4149, "step": 3237 }, { "epoch": 0.7706312845838044, "grad_norm": 0.3809955756687497, "learning_rate": 7.040665915452269e-06, "loss": 0.3169, "step": 3238 }, { "epoch": 0.7708692806568692, "grad_norm": 0.3697765244878929, "learning_rate": 7.038906020138863e-06, "loss": 0.3496, "step": 3239 }, { "epoch": 0.771107276729934, "grad_norm": 0.4309372907682824, "learning_rate": 7.037145821793582e-06, "loss": 0.3958, "step": 3240 }, { "epoch": 0.7713452728029988, "grad_norm": 0.38099748409784767, "learning_rate": 7.035385320678035e-06, "loss": 0.3601, "step": 3241 }, { "epoch": 0.7715832688760635, "grad_norm": 0.4276643539057699, "learning_rate": 7.033624517053878e-06, "loss": 0.3132, "step": 3242 }, { "epoch": 0.7718212649491284, "grad_norm": 0.3518664176009511, "learning_rate": 7.031863411182806e-06, "loss": 0.3615, "step": 3243 }, { "epoch": 0.7720592610221931, "grad_norm": 0.362211191847796, "learning_rate": 7.0301020033265655e-06, "loss": 0.4409, "step": 3244 }, { "epoch": 0.772297257095258, "grad_norm": 0.41910572530406326, "learning_rate": 7.0283402937469455e-06, "loss": 0.3131, "step": 3245 }, { "epoch": 0.7725352531683227, "grad_norm": 0.4314331090536776, "learning_rate": 7.0265782827057804e-06, "loss": 0.3372, "step": 3246 }, { "epoch": 0.7727732492413876, "grad_norm": 0.3693815905880875, "learning_rate": 7.024815970464947e-06, "loss": 0.4088, "step": 3247 }, { "epoch": 0.7730112453144523, "grad_norm": 0.37981415522062073, "learning_rate": 7.023053357286366e-06, "loss": 0.3587, "step": 3248 }, { "epoch": 0.7732492413875172, "grad_norm": 0.408493661115404, "learning_rate": 7.0212904434320115e-06, "loss": 0.307, "step": 3249 }, { "epoch": 0.7734872374605819, "grad_norm": 0.37233119228531636, "learning_rate": 7.019527229163891e-06, "loss": 0.3307, "step": 3250 }, { "epoch": 0.7737252335336468, "grad_norm": 0.355235494120951, "learning_rate": 7.0177637147440645e-06, "loss": 0.4161, "step": 3251 }, { "epoch": 0.7739632296067115, "grad_norm": 0.41229891884255343, "learning_rate": 7.015999900434632e-06, "loss": 0.3474, "step": 3252 }, { "epoch": 0.7742012256797762, "grad_norm": 0.37602579982370005, "learning_rate": 7.0142357864977425e-06, "loss": 0.3039, "step": 3253 }, { "epoch": 0.7744392217528411, "grad_norm": 0.3958312928917149, "learning_rate": 7.012471373195584e-06, "loss": 0.3848, "step": 3254 }, { "epoch": 0.7746772178259058, "grad_norm": 0.4121169912150407, "learning_rate": 7.010706660790393e-06, "loss": 0.3912, "step": 3255 }, { "epoch": 0.7749152138989707, "grad_norm": 0.3824825922233828, "learning_rate": 7.0089416495444505e-06, "loss": 0.3104, "step": 3256 }, { "epoch": 0.7751532099720354, "grad_norm": 0.3844237240799921, "learning_rate": 7.007176339720079e-06, "loss": 0.3511, "step": 3257 }, { "epoch": 0.7753912060451003, "grad_norm": 0.4050734698257971, "learning_rate": 7.005410731579649e-06, "loss": 0.3956, "step": 3258 }, { "epoch": 0.775629202118165, "grad_norm": 0.38094079518404117, "learning_rate": 7.003644825385574e-06, "loss": 0.3168, "step": 3259 }, { "epoch": 0.7758671981912298, "grad_norm": 0.3756921808647231, "learning_rate": 7.001878621400309e-06, "loss": 0.3121, "step": 3260 }, { "epoch": 0.7761051942642946, "grad_norm": 0.3795064413693575, "learning_rate": 7.000112119886356e-06, "loss": 0.3722, "step": 3261 }, { "epoch": 0.7763431903373594, "grad_norm": 0.38072119281394334, "learning_rate": 6.998345321106264e-06, "loss": 0.3769, "step": 3262 }, { "epoch": 0.7765811864104242, "grad_norm": 0.3521206764121899, "learning_rate": 6.996578225322619e-06, "loss": 0.3074, "step": 3263 }, { "epoch": 0.776819182483489, "grad_norm": 0.3934398383104075, "learning_rate": 6.994810832798056e-06, "loss": 0.329, "step": 3264 }, { "epoch": 0.7770571785565538, "grad_norm": 0.3832785826014656, "learning_rate": 6.993043143795255e-06, "loss": 0.405, "step": 3265 }, { "epoch": 0.7772951746296186, "grad_norm": 0.3642849945216878, "learning_rate": 6.991275158576936e-06, "loss": 0.3423, "step": 3266 }, { "epoch": 0.7775331707026834, "grad_norm": 0.40701200067939586, "learning_rate": 6.989506877405867e-06, "loss": 0.3073, "step": 3267 }, { "epoch": 0.7777711667757482, "grad_norm": 0.38472233590714217, "learning_rate": 6.9877383005448595e-06, "loss": 0.3395, "step": 3268 }, { "epoch": 0.7780091628488129, "grad_norm": 0.3839123853902827, "learning_rate": 6.9859694282567655e-06, "loss": 0.3998, "step": 3269 }, { "epoch": 0.7782471589218778, "grad_norm": 0.35916227359386177, "learning_rate": 6.9842002608044844e-06, "loss": 0.3028, "step": 3270 }, { "epoch": 0.7784851549949425, "grad_norm": 0.37216903957857644, "learning_rate": 6.9824307984509565e-06, "loss": 0.3197, "step": 3271 }, { "epoch": 0.7787231510680074, "grad_norm": 0.401474993410537, "learning_rate": 6.98066104145917e-06, "loss": 0.4028, "step": 3272 }, { "epoch": 0.7789611471410721, "grad_norm": 0.4764009593683862, "learning_rate": 6.9788909900921546e-06, "loss": 0.3624, "step": 3273 }, { "epoch": 0.779199143214137, "grad_norm": 0.35941136835596654, "learning_rate": 6.977120644612981e-06, "loss": 0.3168, "step": 3274 }, { "epoch": 0.7794371392872017, "grad_norm": 0.47474946631582, "learning_rate": 6.975350005284769e-06, "loss": 0.3574, "step": 3275 }, { "epoch": 0.7796751353602666, "grad_norm": 0.3664417092614242, "learning_rate": 6.973579072370678e-06, "loss": 0.4299, "step": 3276 }, { "epoch": 0.7799131314333313, "grad_norm": 0.3583726867946201, "learning_rate": 6.971807846133912e-06, "loss": 0.3236, "step": 3277 }, { "epoch": 0.7801511275063961, "grad_norm": 0.4022285999884666, "learning_rate": 6.97003632683772e-06, "loss": 0.3138, "step": 3278 }, { "epoch": 0.7803891235794609, "grad_norm": 0.35817090327334505, "learning_rate": 6.9682645147453954e-06, "loss": 0.3843, "step": 3279 }, { "epoch": 0.7806271196525257, "grad_norm": 0.3846970980247595, "learning_rate": 6.966492410120269e-06, "loss": 0.3713, "step": 3280 }, { "epoch": 0.7808651157255905, "grad_norm": 0.366789717423642, "learning_rate": 6.964720013225723e-06, "loss": 0.2951, "step": 3281 }, { "epoch": 0.7811031117986553, "grad_norm": 0.38839627264622095, "learning_rate": 6.962947324325178e-06, "loss": 0.3634, "step": 3282 }, { "epoch": 0.7813411078717201, "grad_norm": 0.36282780184060254, "learning_rate": 6.9611743436821e-06, "loss": 0.3993, "step": 3283 }, { "epoch": 0.7815791039447849, "grad_norm": 0.35508176540562836, "learning_rate": 6.959401071559997e-06, "loss": 0.3159, "step": 3284 }, { "epoch": 0.7818171000178497, "grad_norm": 0.4152503707635735, "learning_rate": 6.957627508222421e-06, "loss": 0.2873, "step": 3285 }, { "epoch": 0.7820550960909145, "grad_norm": 0.39876662297919807, "learning_rate": 6.955853653932969e-06, "loss": 0.3802, "step": 3286 }, { "epoch": 0.7822930921639792, "grad_norm": 0.36696139093226415, "learning_rate": 6.9540795089552785e-06, "loss": 0.3693, "step": 3287 }, { "epoch": 0.7825310882370441, "grad_norm": 0.3600324867306773, "learning_rate": 6.952305073553031e-06, "loss": 0.3051, "step": 3288 }, { "epoch": 0.7827690843101088, "grad_norm": 0.3489749012167579, "learning_rate": 6.950530347989952e-06, "loss": 0.3532, "step": 3289 }, { "epoch": 0.7830070803831737, "grad_norm": 0.3620983180455999, "learning_rate": 6.9487553325298086e-06, "loss": 0.3963, "step": 3290 }, { "epoch": 0.7832450764562384, "grad_norm": 0.36785158602869183, "learning_rate": 6.946980027436413e-06, "loss": 0.343, "step": 3291 }, { "epoch": 0.7834830725293033, "grad_norm": 0.3599934529541224, "learning_rate": 6.94520443297362e-06, "loss": 0.2774, "step": 3292 }, { "epoch": 0.783721068602368, "grad_norm": 0.38715941357436445, "learning_rate": 6.943428549405327e-06, "loss": 0.3519, "step": 3293 }, { "epoch": 0.7839590646754329, "grad_norm": 0.35349331453822797, "learning_rate": 6.941652376995471e-06, "loss": 0.3988, "step": 3294 }, { "epoch": 0.7841970607484976, "grad_norm": 0.3955985233260755, "learning_rate": 6.93987591600804e-06, "loss": 0.3389, "step": 3295 }, { "epoch": 0.7844350568215624, "grad_norm": 0.34663056196232733, "learning_rate": 6.938099166707058e-06, "loss": 0.314, "step": 3296 }, { "epoch": 0.7846730528946272, "grad_norm": 0.38906902208324257, "learning_rate": 6.936322129356592e-06, "loss": 0.3764, "step": 3297 }, { "epoch": 0.784911048967692, "grad_norm": 0.41156216306072313, "learning_rate": 6.934544804220755e-06, "loss": 0.3957, "step": 3298 }, { "epoch": 0.7851490450407568, "grad_norm": 0.35518958539778267, "learning_rate": 6.932767191563703e-06, "loss": 0.282, "step": 3299 }, { "epoch": 0.7853870411138216, "grad_norm": 0.47033631889554306, "learning_rate": 6.9309892916496315e-06, "loss": 0.3442, "step": 3300 }, { "epoch": 0.7856250371868864, "grad_norm": 0.3666911616164395, "learning_rate": 6.929211104742781e-06, "loss": 0.4094, "step": 3301 }, { "epoch": 0.7858630332599512, "grad_norm": 0.3577310928807679, "learning_rate": 6.927432631107434e-06, "loss": 0.3025, "step": 3302 }, { "epoch": 0.786101029333016, "grad_norm": 0.37429247897055223, "learning_rate": 6.925653871007916e-06, "loss": 0.3226, "step": 3303 }, { "epoch": 0.7863390254060808, "grad_norm": 0.3880037905135578, "learning_rate": 6.923874824708594e-06, "loss": 0.354, "step": 3304 }, { "epoch": 0.7865770214791455, "grad_norm": 0.4433660243451845, "learning_rate": 6.922095492473877e-06, "loss": 0.3544, "step": 3305 }, { "epoch": 0.7868150175522104, "grad_norm": 0.3759346004041654, "learning_rate": 6.920315874568222e-06, "loss": 0.2971, "step": 3306 }, { "epoch": 0.7870530136252751, "grad_norm": 0.3785964570018848, "learning_rate": 6.918535971256121e-06, "loss": 0.3874, "step": 3307 }, { "epoch": 0.78729100969834, "grad_norm": 0.3558284632586464, "learning_rate": 6.91675578280211e-06, "loss": 0.422, "step": 3308 }, { "epoch": 0.7875290057714047, "grad_norm": 0.37377127129597065, "learning_rate": 6.914975309470775e-06, "loss": 0.3282, "step": 3309 }, { "epoch": 0.7877670018444696, "grad_norm": 0.40012751504223365, "learning_rate": 6.913194551526733e-06, "loss": 0.3119, "step": 3310 }, { "epoch": 0.7880049979175343, "grad_norm": 0.3754850496234726, "learning_rate": 6.911413509234651e-06, "loss": 0.3494, "step": 3311 }, { "epoch": 0.7882429939905992, "grad_norm": 0.40489835887499337, "learning_rate": 6.9096321828592336e-06, "loss": 0.4102, "step": 3312 }, { "epoch": 0.7884809900636639, "grad_norm": 0.43391003763332675, "learning_rate": 6.9078505726652345e-06, "loss": 0.3132, "step": 3313 }, { "epoch": 0.7887189861367288, "grad_norm": 0.3897042525840326, "learning_rate": 6.906068678917442e-06, "loss": 0.3263, "step": 3314 }, { "epoch": 0.7889569822097935, "grad_norm": 0.3983465394931579, "learning_rate": 6.904286501880688e-06, "loss": 0.4121, "step": 3315 }, { "epoch": 0.7891949782828583, "grad_norm": 0.3960830865161725, "learning_rate": 6.902504041819853e-06, "loss": 0.3513, "step": 3316 }, { "epoch": 0.7894329743559231, "grad_norm": 0.4092338028148718, "learning_rate": 6.900721298999849e-06, "loss": 0.3118, "step": 3317 }, { "epoch": 0.7896709704289879, "grad_norm": 0.37481354646128145, "learning_rate": 6.8989382736856405e-06, "loss": 0.3556, "step": 3318 }, { "epoch": 0.7899089665020527, "grad_norm": 0.4041714681573852, "learning_rate": 6.897154966142225e-06, "loss": 0.3973, "step": 3319 }, { "epoch": 0.7901469625751175, "grad_norm": 0.3961472202444824, "learning_rate": 6.89537137663465e-06, "loss": 0.2757, "step": 3320 }, { "epoch": 0.7903849586481823, "grad_norm": 0.3476476966030962, "learning_rate": 6.893587505427997e-06, "loss": 0.3343, "step": 3321 }, { "epoch": 0.7906229547212471, "grad_norm": 0.4234915791131859, "learning_rate": 6.891803352787396e-06, "loss": 0.3884, "step": 3322 }, { "epoch": 0.7908609507943118, "grad_norm": 0.3688518168774175, "learning_rate": 6.890018918978018e-06, "loss": 0.3443, "step": 3323 }, { "epoch": 0.7910989468673767, "grad_norm": 0.44308279354142094, "learning_rate": 6.888234204265071e-06, "loss": 0.2956, "step": 3324 }, { "epoch": 0.7913369429404414, "grad_norm": 0.3728873788461928, "learning_rate": 6.8864492089138076e-06, "loss": 0.3573, "step": 3325 }, { "epoch": 0.7915749390135063, "grad_norm": 0.3691109038727998, "learning_rate": 6.8846639331895235e-06, "loss": 0.415, "step": 3326 }, { "epoch": 0.791812935086571, "grad_norm": 0.40429091720127247, "learning_rate": 6.882878377357555e-06, "loss": 0.3342, "step": 3327 }, { "epoch": 0.7920509311596359, "grad_norm": 0.35173869530824603, "learning_rate": 6.881092541683279e-06, "loss": 0.3036, "step": 3328 }, { "epoch": 0.7922889272327006, "grad_norm": 0.3691282683210561, "learning_rate": 6.879306426432116e-06, "loss": 0.3957, "step": 3329 }, { "epoch": 0.7925269233057655, "grad_norm": 0.3547986919135188, "learning_rate": 6.877520031869527e-06, "loss": 0.3853, "step": 3330 }, { "epoch": 0.7927649193788302, "grad_norm": 0.41969998025380606, "learning_rate": 6.875733358261012e-06, "loss": 0.312, "step": 3331 }, { "epoch": 0.793002915451895, "grad_norm": 0.3666600692656378, "learning_rate": 6.873946405872116e-06, "loss": 0.3293, "step": 3332 }, { "epoch": 0.7932409115249598, "grad_norm": 0.3793195334251969, "learning_rate": 6.872159174968427e-06, "loss": 0.3999, "step": 3333 }, { "epoch": 0.7934789075980246, "grad_norm": 0.3653083899549032, "learning_rate": 6.870371665815567e-06, "loss": 0.347, "step": 3334 }, { "epoch": 0.7937169036710894, "grad_norm": 0.42000680796497764, "learning_rate": 6.868583878679209e-06, "loss": 0.3133, "step": 3335 }, { "epoch": 0.7939548997441542, "grad_norm": 0.3798826072515094, "learning_rate": 6.866795813825059e-06, "loss": 0.3855, "step": 3336 }, { "epoch": 0.794192895817219, "grad_norm": 0.4204127342603903, "learning_rate": 6.8650074715188695e-06, "loss": 0.4339, "step": 3337 }, { "epoch": 0.7944308918902838, "grad_norm": 0.3987640353825363, "learning_rate": 6.863218852026432e-06, "loss": 0.3097, "step": 3338 }, { "epoch": 0.7946688879633486, "grad_norm": 0.3758268701259083, "learning_rate": 6.861429955613579e-06, "loss": 0.3308, "step": 3339 }, { "epoch": 0.7949068840364134, "grad_norm": 0.3465046232838342, "learning_rate": 6.859640782546183e-06, "loss": 0.3863, "step": 3340 }, { "epoch": 0.7951448801094781, "grad_norm": 0.3592668570581204, "learning_rate": 6.8578513330901645e-06, "loss": 0.3441, "step": 3341 }, { "epoch": 0.795382876182543, "grad_norm": 0.3714878302081704, "learning_rate": 6.856061607511475e-06, "loss": 0.3282, "step": 3342 }, { "epoch": 0.7956208722556077, "grad_norm": 0.3717879012940753, "learning_rate": 6.854271606076114e-06, "loss": 0.3704, "step": 3343 }, { "epoch": 0.7958588683286726, "grad_norm": 0.36524627168544693, "learning_rate": 6.85248132905012e-06, "loss": 0.4022, "step": 3344 }, { "epoch": 0.7960968644017373, "grad_norm": 0.3880622674807223, "learning_rate": 6.850690776699574e-06, "loss": 0.3121, "step": 3345 }, { "epoch": 0.7963348604748022, "grad_norm": 0.38708118353283516, "learning_rate": 6.848899949290592e-06, "loss": 0.3444, "step": 3346 }, { "epoch": 0.7965728565478669, "grad_norm": 0.3721719168783397, "learning_rate": 6.847108847089339e-06, "loss": 0.3891, "step": 3347 }, { "epoch": 0.7968108526209318, "grad_norm": 0.4005020642472224, "learning_rate": 6.8453174703620155e-06, "loss": 0.3959, "step": 3348 }, { "epoch": 0.7970488486939965, "grad_norm": 0.3522265566325754, "learning_rate": 6.843525819374866e-06, "loss": 0.3006, "step": 3349 }, { "epoch": 0.7972868447670614, "grad_norm": 0.359814924986342, "learning_rate": 6.841733894394172e-06, "loss": 0.3521, "step": 3350 }, { "epoch": 0.7975248408401261, "grad_norm": 0.3908892005179815, "learning_rate": 6.839941695686261e-06, "loss": 0.4427, "step": 3351 }, { "epoch": 0.7977628369131909, "grad_norm": 0.39295135554595395, "learning_rate": 6.838149223517495e-06, "loss": 0.3039, "step": 3352 }, { "epoch": 0.7980008329862557, "grad_norm": 0.3999452447353942, "learning_rate": 6.836356478154279e-06, "loss": 0.3105, "step": 3353 }, { "epoch": 0.7982388290593205, "grad_norm": 0.36113133945632986, "learning_rate": 6.834563459863064e-06, "loss": 0.4003, "step": 3354 }, { "epoch": 0.7984768251323853, "grad_norm": 0.3993728396402891, "learning_rate": 6.832770168910332e-06, "loss": 0.3429, "step": 3355 }, { "epoch": 0.7987148212054501, "grad_norm": 0.36265914054354226, "learning_rate": 6.830976605562614e-06, "loss": 0.3019, "step": 3356 }, { "epoch": 0.7989528172785149, "grad_norm": 0.3760662642357464, "learning_rate": 6.829182770086474e-06, "loss": 0.4016, "step": 3357 }, { "epoch": 0.7991908133515797, "grad_norm": 0.3558139104694944, "learning_rate": 6.8273886627485245e-06, "loss": 0.4189, "step": 3358 }, { "epoch": 0.7994288094246444, "grad_norm": 0.42200701223038184, "learning_rate": 6.825594283815411e-06, "loss": 0.3216, "step": 3359 }, { "epoch": 0.7996668054977093, "grad_norm": 0.3628739848145437, "learning_rate": 6.8237996335538245e-06, "loss": 0.3099, "step": 3360 }, { "epoch": 0.799904801570774, "grad_norm": 0.3622850476056826, "learning_rate": 6.822004712230493e-06, "loss": 0.3894, "step": 3361 }, { "epoch": 0.8001427976438389, "grad_norm": 0.37608877349141273, "learning_rate": 6.820209520112188e-06, "loss": 0.3764, "step": 3362 }, { "epoch": 0.8003807937169036, "grad_norm": 0.3921518941166787, "learning_rate": 6.8184140574657185e-06, "loss": 0.2933, "step": 3363 }, { "epoch": 0.8006187897899685, "grad_norm": 0.3709070132502542, "learning_rate": 6.816618324557934e-06, "loss": 0.3279, "step": 3364 }, { "epoch": 0.8008567858630332, "grad_norm": 0.3457649897592504, "learning_rate": 6.8148223216557275e-06, "loss": 0.421, "step": 3365 }, { "epoch": 0.8010947819360981, "grad_norm": 0.363710583757097, "learning_rate": 6.813026049026026e-06, "loss": 0.3431, "step": 3366 }, { "epoch": 0.8013327780091628, "grad_norm": 0.44025195568280423, "learning_rate": 6.8112295069358005e-06, "loss": 0.2966, "step": 3367 }, { "epoch": 0.8015707740822277, "grad_norm": 0.44684338780747807, "learning_rate": 6.809432695652063e-06, "loss": 0.3689, "step": 3368 }, { "epoch": 0.8018087701552924, "grad_norm": 0.3866828600799754, "learning_rate": 6.807635615441866e-06, "loss": 0.4114, "step": 3369 }, { "epoch": 0.8020467662283572, "grad_norm": 0.3608818486936804, "learning_rate": 6.805838266572296e-06, "loss": 0.3066, "step": 3370 }, { "epoch": 0.802284762301422, "grad_norm": 0.3903835514254373, "learning_rate": 6.804040649310485e-06, "loss": 0.3154, "step": 3371 }, { "epoch": 0.8025227583744868, "grad_norm": 0.4174028450269264, "learning_rate": 6.802242763923603e-06, "loss": 0.3932, "step": 3372 }, { "epoch": 0.8027607544475516, "grad_norm": 0.3902998602460606, "learning_rate": 6.800444610678862e-06, "loss": 0.3686, "step": 3373 }, { "epoch": 0.8029987505206164, "grad_norm": 0.37542329189902135, "learning_rate": 6.798646189843512e-06, "loss": 0.2835, "step": 3374 }, { "epoch": 0.8032367465936812, "grad_norm": 0.5826616163085041, "learning_rate": 6.796847501684839e-06, "loss": 0.3604, "step": 3375 }, { "epoch": 0.803474742666746, "grad_norm": 0.36773753481276283, "learning_rate": 6.795048546470178e-06, "loss": 0.3924, "step": 3376 }, { "epoch": 0.8037127387398108, "grad_norm": 0.3653653462075791, "learning_rate": 6.793249324466895e-06, "loss": 0.3302, "step": 3377 }, { "epoch": 0.8039507348128756, "grad_norm": 0.3674665028573087, "learning_rate": 6.7914498359424e-06, "loss": 0.2912, "step": 3378 }, { "epoch": 0.8041887308859403, "grad_norm": 0.3535016420982183, "learning_rate": 6.78965008116414e-06, "loss": 0.3777, "step": 3379 }, { "epoch": 0.8044267269590052, "grad_norm": 0.3809175328016834, "learning_rate": 6.787850060399604e-06, "loss": 0.4065, "step": 3380 }, { "epoch": 0.8046647230320699, "grad_norm": 0.38371526792218086, "learning_rate": 6.78604977391632e-06, "loss": 0.301, "step": 3381 }, { "epoch": 0.8049027191051348, "grad_norm": 0.37885740937735535, "learning_rate": 6.784249221981856e-06, "loss": 0.3888, "step": 3382 }, { "epoch": 0.8051407151781995, "grad_norm": 0.37193003169198097, "learning_rate": 6.782448404863816e-06, "loss": 0.3764, "step": 3383 }, { "epoch": 0.8053787112512644, "grad_norm": 0.40081137867530753, "learning_rate": 6.780647322829849e-06, "loss": 0.3487, "step": 3384 }, { "epoch": 0.8056167073243291, "grad_norm": 0.38584530133036227, "learning_rate": 6.778845976147638e-06, "loss": 0.3073, "step": 3385 }, { "epoch": 0.805854703397394, "grad_norm": 0.3737571701313767, "learning_rate": 6.777044365084907e-06, "loss": 0.389, "step": 3386 }, { "epoch": 0.8060926994704587, "grad_norm": 0.38914805596699603, "learning_rate": 6.775242489909423e-06, "loss": 0.3707, "step": 3387 }, { "epoch": 0.8063306955435235, "grad_norm": 0.3908919574280197, "learning_rate": 6.773440350888986e-06, "loss": 0.3293, "step": 3388 }, { "epoch": 0.8065686916165883, "grad_norm": 0.3732810073093854, "learning_rate": 6.771637948291441e-06, "loss": 0.3214, "step": 3389 }, { "epoch": 0.8068066876896531, "grad_norm": 0.40490720807254776, "learning_rate": 6.769835282384669e-06, "loss": 0.3995, "step": 3390 }, { "epoch": 0.8070446837627179, "grad_norm": 0.39783899196176287, "learning_rate": 6.768032353436591e-06, "loss": 0.364, "step": 3391 }, { "epoch": 0.8072826798357827, "grad_norm": 0.36646595976846763, "learning_rate": 6.766229161715165e-06, "loss": 0.2909, "step": 3392 }, { "epoch": 0.8075206759088475, "grad_norm": 0.4596377496880723, "learning_rate": 6.764425707488393e-06, "loss": 0.3568, "step": 3393 }, { "epoch": 0.8077586719819123, "grad_norm": 0.38919838139408713, "learning_rate": 6.76262199102431e-06, "loss": 0.4245, "step": 3394 }, { "epoch": 0.807996668054977, "grad_norm": 0.3756696902040613, "learning_rate": 6.760818012590993e-06, "loss": 0.3169, "step": 3395 }, { "epoch": 0.8082346641280419, "grad_norm": 0.38466137056243654, "learning_rate": 6.75901377245656e-06, "loss": 0.3322, "step": 3396 }, { "epoch": 0.8084726602011066, "grad_norm": 0.372823608083243, "learning_rate": 6.757209270889164e-06, "loss": 0.3765, "step": 3397 }, { "epoch": 0.8087106562741715, "grad_norm": 0.40319286820868333, "learning_rate": 6.755404508156999e-06, "loss": 0.3428, "step": 3398 }, { "epoch": 0.8089486523472362, "grad_norm": 0.37211593039177365, "learning_rate": 6.753599484528297e-06, "loss": 0.3013, "step": 3399 }, { "epoch": 0.8091866484203011, "grad_norm": 0.36450494947046486, "learning_rate": 6.75179420027133e-06, "loss": 0.3378, "step": 3400 }, { "epoch": 0.8094246444933658, "grad_norm": 0.35084818549241026, "learning_rate": 6.749988655654408e-06, "loss": 0.4222, "step": 3401 }, { "epoch": 0.8096626405664307, "grad_norm": 0.38771334806435903, "learning_rate": 6.748182850945878e-06, "loss": 0.3184, "step": 3402 }, { "epoch": 0.8099006366394954, "grad_norm": 0.40751175987989763, "learning_rate": 6.746376786414129e-06, "loss": 0.3106, "step": 3403 }, { "epoch": 0.8101386327125603, "grad_norm": 0.3763443373885834, "learning_rate": 6.744570462327588e-06, "loss": 0.4034, "step": 3404 }, { "epoch": 0.810376628785625, "grad_norm": 0.3912057352411011, "learning_rate": 6.742763878954716e-06, "loss": 0.3267, "step": 3405 }, { "epoch": 0.8106146248586898, "grad_norm": 0.38899642397811124, "learning_rate": 6.740957036564018e-06, "loss": 0.2985, "step": 3406 }, { "epoch": 0.8108526209317546, "grad_norm": 0.38659168634889995, "learning_rate": 6.739149935424036e-06, "loss": 0.3583, "step": 3407 }, { "epoch": 0.8110906170048194, "grad_norm": 0.38015341694039984, "learning_rate": 6.737342575803347e-06, "loss": 0.4034, "step": 3408 }, { "epoch": 0.8113286130778842, "grad_norm": 0.3784853530949804, "learning_rate": 6.735534957970573e-06, "loss": 0.3077, "step": 3409 }, { "epoch": 0.811566609150949, "grad_norm": 0.3715243825768867, "learning_rate": 6.733727082194369e-06, "loss": 0.3189, "step": 3410 }, { "epoch": 0.8118046052240138, "grad_norm": 0.3920251893951503, "learning_rate": 6.73191894874343e-06, "loss": 0.3666, "step": 3411 }, { "epoch": 0.8120426012970786, "grad_norm": 0.4047784901748257, "learning_rate": 6.73011055788649e-06, "loss": 0.412, "step": 3412 }, { "epoch": 0.8122805973701434, "grad_norm": 0.34961388235995544, "learning_rate": 6.728301909892318e-06, "loss": 0.3028, "step": 3413 }, { "epoch": 0.8125185934432082, "grad_norm": 0.4964751847494611, "learning_rate": 6.72649300502973e-06, "loss": 0.3354, "step": 3414 }, { "epoch": 0.8127565895162729, "grad_norm": 0.3737712857277735, "learning_rate": 6.724683843567567e-06, "loss": 0.3834, "step": 3415 }, { "epoch": 0.8129945855893378, "grad_norm": 0.38533893171844813, "learning_rate": 6.7228744257747195e-06, "loss": 0.338, "step": 3416 }, { "epoch": 0.8132325816624025, "grad_norm": 0.39656870787598, "learning_rate": 6.72106475192011e-06, "loss": 0.2903, "step": 3417 }, { "epoch": 0.8134705777354674, "grad_norm": 0.35585546765967224, "learning_rate": 6.719254822272701e-06, "loss": 0.3375, "step": 3418 }, { "epoch": 0.8137085738085321, "grad_norm": 0.37372380792440973, "learning_rate": 6.717444637101494e-06, "loss": 0.4215, "step": 3419 }, { "epoch": 0.813946569881597, "grad_norm": 0.38397186576664505, "learning_rate": 6.715634196675527e-06, "loss": 0.3081, "step": 3420 }, { "epoch": 0.8141845659546617, "grad_norm": 0.42512101870564933, "learning_rate": 6.713823501263874e-06, "loss": 0.3501, "step": 3421 }, { "epoch": 0.8144225620277266, "grad_norm": 0.39007776244652564, "learning_rate": 6.712012551135651e-06, "loss": 0.3927, "step": 3422 }, { "epoch": 0.8146605581007913, "grad_norm": 0.37533224466831305, "learning_rate": 6.71020134656001e-06, "loss": 0.3562, "step": 3423 }, { "epoch": 0.8148985541738561, "grad_norm": 0.3691992345183985, "learning_rate": 6.708389887806142e-06, "loss": 0.3062, "step": 3424 }, { "epoch": 0.8151365502469209, "grad_norm": 0.3836542262713381, "learning_rate": 6.706578175143271e-06, "loss": 0.3577, "step": 3425 }, { "epoch": 0.8153745463199857, "grad_norm": 0.34274121993433526, "learning_rate": 6.704766208840666e-06, "loss": 0.3955, "step": 3426 }, { "epoch": 0.8156125423930505, "grad_norm": 0.3785837148211837, "learning_rate": 6.702953989167627e-06, "loss": 0.328, "step": 3427 }, { "epoch": 0.8158505384661153, "grad_norm": 0.367311727838402, "learning_rate": 6.701141516393497e-06, "loss": 0.3133, "step": 3428 }, { "epoch": 0.8160885345391801, "grad_norm": 0.38174012941670704, "learning_rate": 6.6993287907876526e-06, "loss": 0.3642, "step": 3429 }, { "epoch": 0.8163265306122449, "grad_norm": 0.36620752112375166, "learning_rate": 6.6975158126195114e-06, "loss": 0.4239, "step": 3430 }, { "epoch": 0.8165645266853097, "grad_norm": 0.388324045604444, "learning_rate": 6.695702582158527e-06, "loss": 0.3357, "step": 3431 }, { "epoch": 0.8168025227583745, "grad_norm": 0.3707836017064938, "learning_rate": 6.693889099674188e-06, "loss": 0.3521, "step": 3432 }, { "epoch": 0.8170405188314392, "grad_norm": 0.3610380265483462, "learning_rate": 6.692075365436024e-06, "loss": 0.3975, "step": 3433 }, { "epoch": 0.8172785149045041, "grad_norm": 0.3848858140680098, "learning_rate": 6.690261379713601e-06, "loss": 0.3366, "step": 3434 }, { "epoch": 0.8175165109775688, "grad_norm": 0.41597504909165217, "learning_rate": 6.688447142776522e-06, "loss": 0.3135, "step": 3435 }, { "epoch": 0.8177545070506337, "grad_norm": 0.3631226395036403, "learning_rate": 6.6866326548944276e-06, "loss": 0.3642, "step": 3436 }, { "epoch": 0.8179925031236984, "grad_norm": 0.3821455357196431, "learning_rate": 6.684817916336994e-06, "loss": 0.3862, "step": 3437 }, { "epoch": 0.8182304991967633, "grad_norm": 0.3585464421983625, "learning_rate": 6.683002927373938e-06, "loss": 0.3127, "step": 3438 }, { "epoch": 0.818468495269828, "grad_norm": 0.37218554816765576, "learning_rate": 6.681187688275013e-06, "loss": 0.3131, "step": 3439 }, { "epoch": 0.8187064913428929, "grad_norm": 0.3627840886800982, "learning_rate": 6.679372199310006e-06, "loss": 0.4235, "step": 3440 }, { "epoch": 0.8189444874159576, "grad_norm": 0.4463956845389517, "learning_rate": 6.677556460748744e-06, "loss": 0.3329, "step": 3441 }, { "epoch": 0.8191824834890225, "grad_norm": 0.41940300133110525, "learning_rate": 6.675740472861092e-06, "loss": 0.2945, "step": 3442 }, { "epoch": 0.8194204795620872, "grad_norm": 0.3861526446671477, "learning_rate": 6.673924235916948e-06, "loss": 0.3535, "step": 3443 }, { "epoch": 0.819658475635152, "grad_norm": 0.44522995704309204, "learning_rate": 6.672107750186255e-06, "loss": 0.3907, "step": 3444 }, { "epoch": 0.8198964717082168, "grad_norm": 0.43143411864941417, "learning_rate": 6.670291015938983e-06, "loss": 0.2998, "step": 3445 }, { "epoch": 0.8201344677812816, "grad_norm": 0.4500423391786268, "learning_rate": 6.6684740334451445e-06, "loss": 0.3279, "step": 3446 }, { "epoch": 0.8203724638543464, "grad_norm": 0.3777270892774342, "learning_rate": 6.666656802974789e-06, "loss": 0.361, "step": 3447 }, { "epoch": 0.8206104599274112, "grad_norm": 0.43511204091192573, "learning_rate": 6.664839324798002e-06, "loss": 0.394, "step": 3448 }, { "epoch": 0.820848456000476, "grad_norm": 0.37499179792352805, "learning_rate": 6.663021599184904e-06, "loss": 0.3216, "step": 3449 }, { "epoch": 0.8210864520735408, "grad_norm": 0.42216483258775034, "learning_rate": 6.661203626405656e-06, "loss": 0.3902, "step": 3450 }, { "epoch": 0.8213244481466055, "grad_norm": 0.39104996064612785, "learning_rate": 6.659385406730452e-06, "loss": 0.4236, "step": 3451 }, { "epoch": 0.8215624442196704, "grad_norm": 0.36054918099443395, "learning_rate": 6.6575669404295265e-06, "loss": 0.3189, "step": 3452 }, { "epoch": 0.8218004402927351, "grad_norm": 0.40811287467124596, "learning_rate": 6.6557482277731465e-06, "loss": 0.3119, "step": 3453 }, { "epoch": 0.8220384363658, "grad_norm": 0.36440658348004856, "learning_rate": 6.653929269031618e-06, "loss": 0.3753, "step": 3454 }, { "epoch": 0.8222764324388647, "grad_norm": 0.4096319668844619, "learning_rate": 6.652110064475286e-06, "loss": 0.3708, "step": 3455 }, { "epoch": 0.8225144285119296, "grad_norm": 0.4138762726742505, "learning_rate": 6.650290614374526e-06, "loss": 0.2696, "step": 3456 }, { "epoch": 0.8227524245849943, "grad_norm": 0.38274207266784216, "learning_rate": 6.648470918999754e-06, "loss": 0.3701, "step": 3457 }, { "epoch": 0.8229904206580592, "grad_norm": 0.38596079001363315, "learning_rate": 6.646650978621422e-06, "loss": 0.3932, "step": 3458 }, { "epoch": 0.8232284167311239, "grad_norm": 0.4033700687920439, "learning_rate": 6.644830793510019e-06, "loss": 0.3134, "step": 3459 }, { "epoch": 0.8234664128041888, "grad_norm": 0.3957395882295056, "learning_rate": 6.64301036393607e-06, "loss": 0.2913, "step": 3460 }, { "epoch": 0.8237044088772535, "grad_norm": 0.37332886096760914, "learning_rate": 6.641189690170135e-06, "loss": 0.3702, "step": 3461 }, { "epoch": 0.8239424049503183, "grad_norm": 0.382271014403154, "learning_rate": 6.639368772482809e-06, "loss": 0.3848, "step": 3462 }, { "epoch": 0.8241804010233831, "grad_norm": 0.42293254876321, "learning_rate": 6.637547611144729e-06, "loss": 0.3168, "step": 3463 }, { "epoch": 0.8244183970964479, "grad_norm": 0.5064018854163713, "learning_rate": 6.635726206426562e-06, "loss": 0.3296, "step": 3464 }, { "epoch": 0.8246563931695127, "grad_norm": 0.3699399246485159, "learning_rate": 6.633904558599015e-06, "loss": 0.379, "step": 3465 }, { "epoch": 0.8248943892425775, "grad_norm": 0.36822069349704023, "learning_rate": 6.63208266793283e-06, "loss": 0.3622, "step": 3466 }, { "epoch": 0.8251323853156423, "grad_norm": 0.37564917131168607, "learning_rate": 6.630260534698784e-06, "loss": 0.2927, "step": 3467 }, { "epoch": 0.8253703813887071, "grad_norm": 0.359534156281574, "learning_rate": 6.628438159167691e-06, "loss": 0.3703, "step": 3468 }, { "epoch": 0.8256083774617718, "grad_norm": 0.41724063858650523, "learning_rate": 6.626615541610404e-06, "loss": 0.4272, "step": 3469 }, { "epoch": 0.8258463735348367, "grad_norm": 0.35435885568070713, "learning_rate": 6.624792682297807e-06, "loss": 0.2995, "step": 3470 }, { "epoch": 0.8260843696079014, "grad_norm": 0.3924531439546138, "learning_rate": 6.62296958150082e-06, "loss": 0.3106, "step": 3471 }, { "epoch": 0.8263223656809663, "grad_norm": 0.3793607220180146, "learning_rate": 6.621146239490405e-06, "loss": 0.3642, "step": 3472 }, { "epoch": 0.826560361754031, "grad_norm": 0.3757765538144861, "learning_rate": 6.619322656537552e-06, "loss": 0.3751, "step": 3473 }, { "epoch": 0.8267983578270959, "grad_norm": 0.379936104170262, "learning_rate": 6.6174988329132935e-06, "loss": 0.3125, "step": 3474 }, { "epoch": 0.8270363539001606, "grad_norm": 0.36029253203578504, "learning_rate": 6.615674768888693e-06, "loss": 0.3439, "step": 3475 }, { "epoch": 0.8272743499732255, "grad_norm": 0.400916363212535, "learning_rate": 6.613850464734852e-06, "loss": 0.3963, "step": 3476 }, { "epoch": 0.8275123460462902, "grad_norm": 0.39903964241453344, "learning_rate": 6.6120259207229074e-06, "loss": 0.3212, "step": 3477 }, { "epoch": 0.827750342119355, "grad_norm": 0.4140249986684947, "learning_rate": 6.61020113712403e-06, "loss": 0.3088, "step": 3478 }, { "epoch": 0.8279883381924198, "grad_norm": 0.3755700342561937, "learning_rate": 6.60837611420943e-06, "loss": 0.3936, "step": 3479 }, { "epoch": 0.8282263342654846, "grad_norm": 0.3669468151714262, "learning_rate": 6.606550852250351e-06, "loss": 0.3653, "step": 3480 }, { "epoch": 0.8284643303385494, "grad_norm": 0.3630825299629299, "learning_rate": 6.60472535151807e-06, "loss": 0.2969, "step": 3481 }, { "epoch": 0.8287023264116142, "grad_norm": 0.48144687843420203, "learning_rate": 6.602899612283903e-06, "loss": 0.3609, "step": 3482 }, { "epoch": 0.828940322484679, "grad_norm": 0.3375193982835446, "learning_rate": 6.6010736348192e-06, "loss": 0.4184, "step": 3483 }, { "epoch": 0.8291783185577438, "grad_norm": 0.40206568719269975, "learning_rate": 6.599247419395346e-06, "loss": 0.3326, "step": 3484 }, { "epoch": 0.8294163146308086, "grad_norm": 0.35032011037790545, "learning_rate": 6.597420966283762e-06, "loss": 0.3043, "step": 3485 }, { "epoch": 0.8296543107038734, "grad_norm": 0.36114199055488116, "learning_rate": 6.595594275755905e-06, "loss": 0.3566, "step": 3486 }, { "epoch": 0.8298923067769381, "grad_norm": 0.40221894140390374, "learning_rate": 6.593767348083264e-06, "loss": 0.4178, "step": 3487 }, { "epoch": 0.830130302850003, "grad_norm": 0.4219667697893653, "learning_rate": 6.591940183537369e-06, "loss": 0.301, "step": 3488 }, { "epoch": 0.8303682989230677, "grad_norm": 0.4089831045897673, "learning_rate": 6.590112782389779e-06, "loss": 0.3545, "step": 3489 }, { "epoch": 0.8306062949961326, "grad_norm": 0.3568585115767541, "learning_rate": 6.588285144912092e-06, "loss": 0.4073, "step": 3490 }, { "epoch": 0.8308442910691973, "grad_norm": 0.39444918515542654, "learning_rate": 6.58645727137594e-06, "loss": 0.3751, "step": 3491 }, { "epoch": 0.8310822871422622, "grad_norm": 0.40709353343588783, "learning_rate": 6.584629162052991e-06, "loss": 0.3463, "step": 3492 }, { "epoch": 0.8313202832153269, "grad_norm": 0.3926505024184683, "learning_rate": 6.582800817214947e-06, "loss": 0.372, "step": 3493 }, { "epoch": 0.8315582792883918, "grad_norm": 0.39799166748115355, "learning_rate": 6.5809722371335425e-06, "loss": 0.4018, "step": 3494 }, { "epoch": 0.8317962753614565, "grad_norm": 0.38694896220045694, "learning_rate": 6.579143422080555e-06, "loss": 0.3137, "step": 3495 }, { "epoch": 0.8320342714345214, "grad_norm": 0.3643527025654742, "learning_rate": 6.577314372327788e-06, "loss": 0.3131, "step": 3496 }, { "epoch": 0.8322722675075861, "grad_norm": 0.3818835372248485, "learning_rate": 6.575485088147085e-06, "loss": 0.3707, "step": 3497 }, { "epoch": 0.8325102635806509, "grad_norm": 0.40425205370807665, "learning_rate": 6.57365556981032e-06, "loss": 0.3499, "step": 3498 }, { "epoch": 0.8327482596537157, "grad_norm": 0.4772831093586169, "learning_rate": 6.571825817589409e-06, "loss": 0.3433, "step": 3499 }, { "epoch": 0.8329862557267805, "grad_norm": 0.37500312469382296, "learning_rate": 6.569995831756296e-06, "loss": 0.3384, "step": 3500 }, { "epoch": 0.8332242517998453, "grad_norm": 0.3878483799536773, "learning_rate": 6.568165612582963e-06, "loss": 0.4041, "step": 3501 }, { "epoch": 0.8334622478729101, "grad_norm": 0.39314546980554216, "learning_rate": 6.566335160341425e-06, "loss": 0.3057, "step": 3502 }, { "epoch": 0.8337002439459749, "grad_norm": 0.39459765976708316, "learning_rate": 6.564504475303732e-06, "loss": 0.3598, "step": 3503 }, { "epoch": 0.8339382400190397, "grad_norm": 0.37557187665586705, "learning_rate": 6.562673557741972e-06, "loss": 0.3976, "step": 3504 }, { "epoch": 0.8341762360921045, "grad_norm": 0.3666761239770583, "learning_rate": 6.560842407928261e-06, "loss": 0.3936, "step": 3505 }, { "epoch": 0.8344142321651693, "grad_norm": 0.43334042433611564, "learning_rate": 6.559011026134755e-06, "loss": 0.309, "step": 3506 }, { "epoch": 0.834652228238234, "grad_norm": 0.3639925280228689, "learning_rate": 6.557179412633643e-06, "loss": 0.3225, "step": 3507 }, { "epoch": 0.8348902243112989, "grad_norm": 0.368213342084047, "learning_rate": 6.555347567697147e-06, "loss": 0.4027, "step": 3508 }, { "epoch": 0.8351282203843636, "grad_norm": 0.40280786786827166, "learning_rate": 6.553515491597525e-06, "loss": 0.2956, "step": 3509 }, { "epoch": 0.8353662164574285, "grad_norm": 0.3901118662358053, "learning_rate": 6.55168318460707e-06, "loss": 0.3167, "step": 3510 }, { "epoch": 0.8356042125304932, "grad_norm": 0.36744551369501394, "learning_rate": 6.549850646998106e-06, "loss": 0.3774, "step": 3511 }, { "epoch": 0.8358422086035581, "grad_norm": 0.42446857731031934, "learning_rate": 6.548017879042993e-06, "loss": 0.3903, "step": 3512 }, { "epoch": 0.8360802046766228, "grad_norm": 0.4358337216970375, "learning_rate": 6.546184881014128e-06, "loss": 0.316, "step": 3513 }, { "epoch": 0.8363182007496877, "grad_norm": 0.37095075714460507, "learning_rate": 6.54435165318394e-06, "loss": 0.3294, "step": 3514 }, { "epoch": 0.8365561968227524, "grad_norm": 0.3735370617178222, "learning_rate": 6.54251819582489e-06, "loss": 0.4038, "step": 3515 }, { "epoch": 0.8367941928958172, "grad_norm": 0.3754617184639044, "learning_rate": 6.5406845092094775e-06, "loss": 0.3109, "step": 3516 }, { "epoch": 0.837032188968882, "grad_norm": 0.4256438483351794, "learning_rate": 6.5388505936102305e-06, "loss": 0.3236, "step": 3517 }, { "epoch": 0.8372701850419468, "grad_norm": 0.36228819168311865, "learning_rate": 6.537016449299718e-06, "loss": 0.3672, "step": 3518 }, { "epoch": 0.8375081811150116, "grad_norm": 0.3567201288288663, "learning_rate": 6.5351820765505345e-06, "loss": 0.3876, "step": 3519 }, { "epoch": 0.8377461771880764, "grad_norm": 0.36599561427777705, "learning_rate": 6.533347475635316e-06, "loss": 0.3043, "step": 3520 }, { "epoch": 0.8379841732611412, "grad_norm": 0.4053564651011694, "learning_rate": 6.531512646826731e-06, "loss": 0.3253, "step": 3521 }, { "epoch": 0.838222169334206, "grad_norm": 0.37600903870469027, "learning_rate": 6.529677590397478e-06, "loss": 0.4036, "step": 3522 }, { "epoch": 0.8384601654072708, "grad_norm": 0.361124659195214, "learning_rate": 6.527842306620294e-06, "loss": 0.3696, "step": 3523 }, { "epoch": 0.8386981614803356, "grad_norm": 0.3908512268752686, "learning_rate": 6.5260067957679455e-06, "loss": 0.2881, "step": 3524 }, { "epoch": 0.8389361575534003, "grad_norm": 0.391451223833398, "learning_rate": 6.524171058113236e-06, "loss": 0.3734, "step": 3525 }, { "epoch": 0.8391741536264652, "grad_norm": 0.34623716604454546, "learning_rate": 6.522335093928999e-06, "loss": 0.4447, "step": 3526 }, { "epoch": 0.8394121496995299, "grad_norm": 0.35943956265557137, "learning_rate": 6.520498903488108e-06, "loss": 0.3042, "step": 3527 }, { "epoch": 0.8396501457725948, "grad_norm": 0.39161434146649104, "learning_rate": 6.518662487063464e-06, "loss": 0.291, "step": 3528 }, { "epoch": 0.8398881418456595, "grad_norm": 0.37421840622968505, "learning_rate": 6.516825844928005e-06, "loss": 0.3847, "step": 3529 }, { "epoch": 0.8401261379187244, "grad_norm": 0.37611154296694504, "learning_rate": 6.514988977354701e-06, "loss": 0.3662, "step": 3530 }, { "epoch": 0.8403641339917891, "grad_norm": 0.3647185189803361, "learning_rate": 6.513151884616556e-06, "loss": 0.3128, "step": 3531 }, { "epoch": 0.840602130064854, "grad_norm": 0.3821277967922997, "learning_rate": 6.511314566986608e-06, "loss": 0.3503, "step": 3532 }, { "epoch": 0.8408401261379187, "grad_norm": 0.35310735826505824, "learning_rate": 6.5094770247379256e-06, "loss": 0.3922, "step": 3533 }, { "epoch": 0.8410781222109835, "grad_norm": 0.39211042496153803, "learning_rate": 6.507639258143615e-06, "loss": 0.3153, "step": 3534 }, { "epoch": 0.8413161182840483, "grad_norm": 0.37374980390462503, "learning_rate": 6.5058012674768136e-06, "loss": 0.2926, "step": 3535 }, { "epoch": 0.8415541143571131, "grad_norm": 0.3480206219661194, "learning_rate": 6.5039630530106925e-06, "loss": 0.3595, "step": 3536 }, { "epoch": 0.8417921104301779, "grad_norm": 0.37436697934855795, "learning_rate": 6.502124615018456e-06, "loss": 0.3896, "step": 3537 }, { "epoch": 0.8420301065032427, "grad_norm": 0.3783010027033652, "learning_rate": 6.50028595377334e-06, "loss": 0.3203, "step": 3538 }, { "epoch": 0.8422681025763075, "grad_norm": 0.4307862323394335, "learning_rate": 6.498447069548617e-06, "loss": 0.3373, "step": 3539 }, { "epoch": 0.8425060986493723, "grad_norm": 0.4059325152116869, "learning_rate": 6.496607962617588e-06, "loss": 0.3827, "step": 3540 }, { "epoch": 0.842744094722437, "grad_norm": 0.4279651224236223, "learning_rate": 6.494768633253593e-06, "loss": 0.3371, "step": 3541 }, { "epoch": 0.8429820907955019, "grad_norm": 0.3780915639053451, "learning_rate": 6.492929081729999e-06, "loss": 0.3081, "step": 3542 }, { "epoch": 0.8432200868685666, "grad_norm": 0.42345332107805106, "learning_rate": 6.491089308320212e-06, "loss": 0.3686, "step": 3543 }, { "epoch": 0.8434580829416315, "grad_norm": 0.3479273768259802, "learning_rate": 6.489249313297665e-06, "loss": 0.4429, "step": 3544 }, { "epoch": 0.8436960790146962, "grad_norm": 0.41111841314637215, "learning_rate": 6.487409096935828e-06, "loss": 0.3142, "step": 3545 }, { "epoch": 0.8439340750877611, "grad_norm": 0.41453492108831, "learning_rate": 6.485568659508201e-06, "loss": 0.3128, "step": 3546 }, { "epoch": 0.8441720711608258, "grad_norm": 0.3412459944917207, "learning_rate": 6.483728001288322e-06, "loss": 0.3878, "step": 3547 }, { "epoch": 0.8444100672338907, "grad_norm": 0.3580627139101501, "learning_rate": 6.481887122549755e-06, "loss": 0.3663, "step": 3548 }, { "epoch": 0.8446480633069554, "grad_norm": 0.3881281457560659, "learning_rate": 6.480046023566101e-06, "loss": 0.3162, "step": 3549 }, { "epoch": 0.8448860593800203, "grad_norm": 0.38842732635621374, "learning_rate": 6.4782047046109956e-06, "loss": 0.3745, "step": 3550 }, { "epoch": 0.845124055453085, "grad_norm": 0.368464384136309, "learning_rate": 6.476363165958101e-06, "loss": 0.4105, "step": 3551 }, { "epoch": 0.8453620515261498, "grad_norm": 0.42844621258294174, "learning_rate": 6.474521407881116e-06, "loss": 0.3035, "step": 3552 }, { "epoch": 0.8456000475992146, "grad_norm": 0.37487158122933406, "learning_rate": 6.472679430653771e-06, "loss": 0.3028, "step": 3553 }, { "epoch": 0.8458380436722794, "grad_norm": 0.3697192895630358, "learning_rate": 6.470837234549831e-06, "loss": 0.3821, "step": 3554 }, { "epoch": 0.8460760397453442, "grad_norm": 0.39897676938087634, "learning_rate": 6.468994819843093e-06, "loss": 0.3787, "step": 3555 }, { "epoch": 0.846314035818409, "grad_norm": 0.370440881250878, "learning_rate": 6.467152186807381e-06, "loss": 0.3008, "step": 3556 }, { "epoch": 0.8465520318914738, "grad_norm": 0.36810682890311036, "learning_rate": 6.4653093357165605e-06, "loss": 0.3324, "step": 3557 }, { "epoch": 0.8467900279645386, "grad_norm": 0.44038968365113795, "learning_rate": 6.463466266844523e-06, "loss": 0.4133, "step": 3558 }, { "epoch": 0.8470280240376034, "grad_norm": 0.42146397355664883, "learning_rate": 6.461622980465192e-06, "loss": 0.3262, "step": 3559 }, { "epoch": 0.8472660201106682, "grad_norm": 0.3892143606840479, "learning_rate": 6.459779476852528e-06, "loss": 0.2951, "step": 3560 }, { "epoch": 0.8475040161837329, "grad_norm": 0.3606861577032436, "learning_rate": 6.45793575628052e-06, "loss": 0.3468, "step": 3561 }, { "epoch": 0.8477420122567978, "grad_norm": 0.39838162217271655, "learning_rate": 6.456091819023192e-06, "loss": 0.3662, "step": 3562 }, { "epoch": 0.8479800083298625, "grad_norm": 0.40545284210053256, "learning_rate": 6.454247665354596e-06, "loss": 0.3092, "step": 3563 }, { "epoch": 0.8482180044029274, "grad_norm": 0.3745103557445221, "learning_rate": 6.452403295548822e-06, "loss": 0.3259, "step": 3564 }, { "epoch": 0.8484560004759921, "grad_norm": 0.36452519630195096, "learning_rate": 6.450558709879988e-06, "loss": 0.3935, "step": 3565 }, { "epoch": 0.848693996549057, "grad_norm": 0.3753670451371452, "learning_rate": 6.448713908622244e-06, "loss": 0.364, "step": 3566 }, { "epoch": 0.8489319926221217, "grad_norm": 0.4415121020870584, "learning_rate": 6.446868892049774e-06, "loss": 0.2946, "step": 3567 }, { "epoch": 0.8491699886951866, "grad_norm": 0.3586879855069214, "learning_rate": 6.445023660436792e-06, "loss": 0.3556, "step": 3568 }, { "epoch": 0.8494079847682513, "grad_norm": 0.388777137976686, "learning_rate": 6.443178214057546e-06, "loss": 0.4276, "step": 3569 }, { "epoch": 0.8496459808413162, "grad_norm": 0.37232395659576667, "learning_rate": 6.441332553186317e-06, "loss": 0.2855, "step": 3570 }, { "epoch": 0.8498839769143809, "grad_norm": 0.36549499530200263, "learning_rate": 6.439486678097412e-06, "loss": 0.2883, "step": 3571 }, { "epoch": 0.8501219729874457, "grad_norm": 0.4015119927722684, "learning_rate": 6.437640589065175e-06, "loss": 0.4052, "step": 3572 }, { "epoch": 0.8503599690605105, "grad_norm": 0.39600433293408427, "learning_rate": 6.435794286363981e-06, "loss": 0.3315, "step": 3573 }, { "epoch": 0.8505979651335753, "grad_norm": 0.34745554675127294, "learning_rate": 6.4339477702682365e-06, "loss": 0.3379, "step": 3574 }, { "epoch": 0.8508359612066401, "grad_norm": 0.376990594604472, "learning_rate": 6.4321010410523785e-06, "loss": 0.3708, "step": 3575 }, { "epoch": 0.8510739572797049, "grad_norm": 0.37580912173380343, "learning_rate": 6.430254098990879e-06, "loss": 0.3867, "step": 3576 }, { "epoch": 0.8513119533527697, "grad_norm": 0.36492592391634565, "learning_rate": 6.428406944358236e-06, "loss": 0.3329, "step": 3577 }, { "epoch": 0.8515499494258345, "grad_norm": 0.4561243150373411, "learning_rate": 6.426559577428986e-06, "loss": 0.3443, "step": 3578 }, { "epoch": 0.8517879454988992, "grad_norm": 0.37361088859679636, "learning_rate": 6.42471199847769e-06, "loss": 0.3821, "step": 3579 }, { "epoch": 0.8520259415719641, "grad_norm": 0.3858002858596615, "learning_rate": 6.422864207778946e-06, "loss": 0.3575, "step": 3580 }, { "epoch": 0.8522639376450288, "grad_norm": 0.591361749000545, "learning_rate": 6.42101620560738e-06, "loss": 0.2932, "step": 3581 }, { "epoch": 0.8525019337180937, "grad_norm": 0.3862018455711387, "learning_rate": 6.4191679922376514e-06, "loss": 0.332, "step": 3582 }, { "epoch": 0.8527399297911584, "grad_norm": 0.3477194194120392, "learning_rate": 6.417319567944451e-06, "loss": 0.4039, "step": 3583 }, { "epoch": 0.8529779258642233, "grad_norm": 0.3867671638518174, "learning_rate": 6.4154709330025014e-06, "loss": 0.3283, "step": 3584 }, { "epoch": 0.853215921937288, "grad_norm": 0.3789296076013951, "learning_rate": 6.413622087686553e-06, "loss": 0.3197, "step": 3585 }, { "epoch": 0.8534539180103529, "grad_norm": 0.35204003022469893, "learning_rate": 6.411773032271391e-06, "loss": 0.3575, "step": 3586 }, { "epoch": 0.8536919140834176, "grad_norm": 0.4135524819567001, "learning_rate": 6.4099237670318295e-06, "loss": 0.3739, "step": 3587 }, { "epoch": 0.8539299101564825, "grad_norm": 0.36478421612897816, "learning_rate": 6.408074292242719e-06, "loss": 0.314, "step": 3588 }, { "epoch": 0.8541679062295472, "grad_norm": 0.3991900857385473, "learning_rate": 6.4062246081789316e-06, "loss": 0.3417, "step": 3589 }, { "epoch": 0.854405902302612, "grad_norm": 0.35893254692737425, "learning_rate": 6.40437471511538e-06, "loss": 0.3911, "step": 3590 }, { "epoch": 0.8546438983756768, "grad_norm": 0.35453540240239667, "learning_rate": 6.402524613327005e-06, "loss": 0.3469, "step": 3591 }, { "epoch": 0.8548818944487416, "grad_norm": 0.43997392067335855, "learning_rate": 6.400674303088774e-06, "loss": 0.2904, "step": 3592 }, { "epoch": 0.8551198905218064, "grad_norm": 0.3932703368817032, "learning_rate": 6.398823784675692e-06, "loss": 0.3453, "step": 3593 }, { "epoch": 0.8553578865948712, "grad_norm": 0.38331111856702316, "learning_rate": 6.3969730583627895e-06, "loss": 0.4321, "step": 3594 }, { "epoch": 0.855595882667936, "grad_norm": 0.36082426472750595, "learning_rate": 6.39512212442513e-06, "loss": 0.303, "step": 3595 }, { "epoch": 0.8558338787410008, "grad_norm": 0.36640398807283237, "learning_rate": 6.39327098313781e-06, "loss": 0.3424, "step": 3596 }, { "epoch": 0.8560718748140655, "grad_norm": 0.39345559455136353, "learning_rate": 6.391419634775955e-06, "loss": 0.3943, "step": 3597 }, { "epoch": 0.8563098708871304, "grad_norm": 0.38176260350590113, "learning_rate": 6.3895680796147195e-06, "loss": 0.3916, "step": 3598 }, { "epoch": 0.8565478669601951, "grad_norm": 0.3701812101754324, "learning_rate": 6.387716317929291e-06, "loss": 0.3091, "step": 3599 }, { "epoch": 0.85678586303326, "grad_norm": 0.37515785937868595, "learning_rate": 6.385864349994887e-06, "loss": 0.3352, "step": 3600 }, { "epoch": 0.8570238591063247, "grad_norm": 0.3811040883533929, "learning_rate": 6.384012176086756e-06, "loss": 0.4348, "step": 3601 }, { "epoch": 0.8572618551793896, "grad_norm": 0.38277086910509656, "learning_rate": 6.382159796480176e-06, "loss": 0.3105, "step": 3602 }, { "epoch": 0.8574998512524543, "grad_norm": 0.4049398160651801, "learning_rate": 6.380307211450459e-06, "loss": 0.2908, "step": 3603 }, { "epoch": 0.8577378473255192, "grad_norm": 0.37259835586706896, "learning_rate": 6.3784544212729425e-06, "loss": 0.3591, "step": 3604 }, { "epoch": 0.8579758433985839, "grad_norm": 0.39620623445763115, "learning_rate": 6.376601426222998e-06, "loss": 0.417, "step": 3605 }, { "epoch": 0.8582138394716488, "grad_norm": 0.38565150714963886, "learning_rate": 6.374748226576026e-06, "loss": 0.3028, "step": 3606 }, { "epoch": 0.8584518355447135, "grad_norm": 0.38396283701575223, "learning_rate": 6.372894822607459e-06, "loss": 0.3637, "step": 3607 }, { "epoch": 0.8586898316177783, "grad_norm": 0.36500484893052054, "learning_rate": 6.371041214592756e-06, "loss": 0.3775, "step": 3608 }, { "epoch": 0.8589278276908431, "grad_norm": 0.3661904393716665, "learning_rate": 6.369187402807409e-06, "loss": 0.3382, "step": 3609 }, { "epoch": 0.8591658237639079, "grad_norm": 0.3635402605710434, "learning_rate": 6.3673333875269435e-06, "loss": 0.3263, "step": 3610 }, { "epoch": 0.8594038198369727, "grad_norm": 0.34166628776145125, "learning_rate": 6.3654791690269115e-06, "loss": 0.3689, "step": 3611 }, { "epoch": 0.8596418159100375, "grad_norm": 0.40702525148527946, "learning_rate": 6.363624747582895e-06, "loss": 0.3635, "step": 3612 }, { "epoch": 0.8598798119831023, "grad_norm": 0.3788739157966428, "learning_rate": 6.361770123470506e-06, "loss": 0.3199, "step": 3613 }, { "epoch": 0.8601178080561671, "grad_norm": 0.4010995049473161, "learning_rate": 6.359915296965386e-06, "loss": 0.3378, "step": 3614 }, { "epoch": 0.8603558041292318, "grad_norm": 0.3976190901244627, "learning_rate": 6.3580602683432114e-06, "loss": 0.3947, "step": 3615 }, { "epoch": 0.8605938002022967, "grad_norm": 0.41040643943188415, "learning_rate": 6.356205037879683e-06, "loss": 0.353, "step": 3616 }, { "epoch": 0.8608317962753614, "grad_norm": 0.40129402029771805, "learning_rate": 6.354349605850537e-06, "loss": 0.3115, "step": 3617 }, { "epoch": 0.8610697923484263, "grad_norm": 0.37800238180174833, "learning_rate": 6.352493972531535e-06, "loss": 0.3461, "step": 3618 }, { "epoch": 0.861307788421491, "grad_norm": 0.35601066827900923, "learning_rate": 6.350638138198468e-06, "loss": 0.3927, "step": 3619 }, { "epoch": 0.8615457844945559, "grad_norm": 0.4294833642760016, "learning_rate": 6.348782103127161e-06, "loss": 0.3186, "step": 3620 }, { "epoch": 0.8617837805676206, "grad_norm": 0.39885340739339903, "learning_rate": 6.346925867593468e-06, "loss": 0.3481, "step": 3621 }, { "epoch": 0.8620217766406855, "grad_norm": 0.381073935397439, "learning_rate": 6.345069431873267e-06, "loss": 0.3632, "step": 3622 }, { "epoch": 0.8622597727137502, "grad_norm": 0.3716616071286771, "learning_rate": 6.3432127962424724e-06, "loss": 0.3735, "step": 3623 }, { "epoch": 0.8624977687868151, "grad_norm": 0.3936052335005196, "learning_rate": 6.341355960977029e-06, "loss": 0.3002, "step": 3624 }, { "epoch": 0.8627357648598798, "grad_norm": 0.35707110608872256, "learning_rate": 6.3394989263529075e-06, "loss": 0.3286, "step": 3625 }, { "epoch": 0.8629737609329446, "grad_norm": 0.37387621795612574, "learning_rate": 6.337641692646106e-06, "loss": 0.4234, "step": 3626 }, { "epoch": 0.8632117570060094, "grad_norm": 0.36608540038166837, "learning_rate": 6.335784260132656e-06, "loss": 0.3326, "step": 3627 }, { "epoch": 0.8634497530790742, "grad_norm": 0.40136651207935303, "learning_rate": 6.33392662908862e-06, "loss": 0.3229, "step": 3628 }, { "epoch": 0.863687749152139, "grad_norm": 0.38445657475293393, "learning_rate": 6.332068799790088e-06, "loss": 0.3608, "step": 3629 }, { "epoch": 0.8639257452252038, "grad_norm": 0.4134034698432213, "learning_rate": 6.330210772513179e-06, "loss": 0.3888, "step": 3630 }, { "epoch": 0.8641637412982686, "grad_norm": 0.3403794858258773, "learning_rate": 6.32835254753404e-06, "loss": 0.3299, "step": 3631 }, { "epoch": 0.8644017373713334, "grad_norm": 0.400740147025239, "learning_rate": 6.3264941251288524e-06, "loss": 0.3519, "step": 3632 }, { "epoch": 0.8646397334443982, "grad_norm": 0.3696283238042445, "learning_rate": 6.324635505573821e-06, "loss": 0.4068, "step": 3633 }, { "epoch": 0.864877729517463, "grad_norm": 0.3648815050871534, "learning_rate": 6.3227766891451834e-06, "loss": 0.3229, "step": 3634 }, { "epoch": 0.8651157255905277, "grad_norm": 0.3684339728207634, "learning_rate": 6.3209176761192056e-06, "loss": 0.2937, "step": 3635 }, { "epoch": 0.8653537216635926, "grad_norm": 0.3412830815800425, "learning_rate": 6.319058466772183e-06, "loss": 0.3547, "step": 3636 }, { "epoch": 0.8655917177366573, "grad_norm": 0.4151380109178161, "learning_rate": 6.317199061380442e-06, "loss": 0.4081, "step": 3637 }, { "epoch": 0.8658297138097222, "grad_norm": 0.3807487486762807, "learning_rate": 6.3153394602203335e-06, "loss": 0.2962, "step": 3638 }, { "epoch": 0.8660677098827869, "grad_norm": 0.3818685065331829, "learning_rate": 6.313479663568241e-06, "loss": 0.3313, "step": 3639 }, { "epoch": 0.8663057059558518, "grad_norm": 0.39854588070720026, "learning_rate": 6.311619671700577e-06, "loss": 0.3949, "step": 3640 }, { "epoch": 0.8665437020289165, "grad_norm": 0.39040880832128244, "learning_rate": 6.309759484893781e-06, "loss": 0.3463, "step": 3641 }, { "epoch": 0.8667816981019814, "grad_norm": 0.3808134094910771, "learning_rate": 6.3078991034243246e-06, "loss": 0.3069, "step": 3642 }, { "epoch": 0.8670196941750461, "grad_norm": 0.42611867594264174, "learning_rate": 6.306038527568703e-06, "loss": 0.3499, "step": 3643 }, { "epoch": 0.867257690248111, "grad_norm": 0.38268539446297695, "learning_rate": 6.304177757603449e-06, "loss": 0.381, "step": 3644 }, { "epoch": 0.8674956863211757, "grad_norm": 0.39139862991744423, "learning_rate": 6.302316793805117e-06, "loss": 0.311, "step": 3645 }, { "epoch": 0.8677336823942405, "grad_norm": 0.34134245528800944, "learning_rate": 6.300455636450291e-06, "loss": 0.3326, "step": 3646 }, { "epoch": 0.8679716784673053, "grad_norm": 0.3603534975781144, "learning_rate": 6.298594285815585e-06, "loss": 0.3793, "step": 3647 }, { "epoch": 0.8682096745403701, "grad_norm": 0.3614007549196889, "learning_rate": 6.296732742177644e-06, "loss": 0.3392, "step": 3648 }, { "epoch": 0.8684476706134349, "grad_norm": 0.4195748954343944, "learning_rate": 6.294871005813137e-06, "loss": 0.3134, "step": 3649 }, { "epoch": 0.8686856666864997, "grad_norm": 0.39119705289423146, "learning_rate": 6.293009076998763e-06, "loss": 0.3361, "step": 3650 }, { "epoch": 0.8689236627595645, "grad_norm": 0.4029024677261677, "learning_rate": 6.291146956011255e-06, "loss": 0.3868, "step": 3651 }, { "epoch": 0.8691616588326293, "grad_norm": 0.3811799244974477, "learning_rate": 6.289284643127367e-06, "loss": 0.3218, "step": 3652 }, { "epoch": 0.869399654905694, "grad_norm": 0.4261716940666211, "learning_rate": 6.287422138623886e-06, "loss": 0.3203, "step": 3653 }, { "epoch": 0.8696376509787589, "grad_norm": 0.39445236227113656, "learning_rate": 6.285559442777624e-06, "loss": 0.4, "step": 3654 }, { "epoch": 0.8698756470518236, "grad_norm": 0.4319574272153776, "learning_rate": 6.283696555865429e-06, "loss": 0.3435, "step": 3655 }, { "epoch": 0.8701136431248885, "grad_norm": 0.4546433169970997, "learning_rate": 6.281833478164168e-06, "loss": 0.3051, "step": 3656 }, { "epoch": 0.8703516391979532, "grad_norm": 0.38154852645595494, "learning_rate": 6.279970209950738e-06, "loss": 0.3498, "step": 3657 }, { "epoch": 0.8705896352710181, "grad_norm": 0.3548764531740995, "learning_rate": 6.278106751502073e-06, "loss": 0.4182, "step": 3658 }, { "epoch": 0.8708276313440828, "grad_norm": 0.3759251982640629, "learning_rate": 6.2762431030951255e-06, "loss": 0.3106, "step": 3659 }, { "epoch": 0.8710656274171477, "grad_norm": 0.3613232709245374, "learning_rate": 6.274379265006879e-06, "loss": 0.2905, "step": 3660 }, { "epoch": 0.8713036234902124, "grad_norm": 0.3751051295735669, "learning_rate": 6.272515237514349e-06, "loss": 0.358, "step": 3661 }, { "epoch": 0.8715416195632772, "grad_norm": 0.36121575959400304, "learning_rate": 6.270651020894572e-06, "loss": 0.4264, "step": 3662 }, { "epoch": 0.871779615636342, "grad_norm": 0.3780337870363989, "learning_rate": 6.2687866154246204e-06, "loss": 0.3072, "step": 3663 }, { "epoch": 0.8720176117094068, "grad_norm": 0.4035208753610946, "learning_rate": 6.266922021381588e-06, "loss": 0.3603, "step": 3664 }, { "epoch": 0.8722556077824716, "grad_norm": 0.3977305240744663, "learning_rate": 6.265057239042602e-06, "loss": 0.4096, "step": 3665 }, { "epoch": 0.8724936038555364, "grad_norm": 0.3826214538741579, "learning_rate": 6.263192268684814e-06, "loss": 0.3427, "step": 3666 }, { "epoch": 0.8727315999286012, "grad_norm": 0.40584625486221326, "learning_rate": 6.2613271105854065e-06, "loss": 0.3293, "step": 3667 }, { "epoch": 0.872969596001666, "grad_norm": 0.6329608207460291, "learning_rate": 6.259461765021584e-06, "loss": 0.3824, "step": 3668 }, { "epoch": 0.8732075920747308, "grad_norm": 0.3898204136913921, "learning_rate": 6.257596232270587e-06, "loss": 0.411, "step": 3669 }, { "epoch": 0.8734455881477956, "grad_norm": 0.36641905581111767, "learning_rate": 6.255730512609679e-06, "loss": 0.3169, "step": 3670 }, { "epoch": 0.8736835842208603, "grad_norm": 0.4829470116833935, "learning_rate": 6.25386460631615e-06, "loss": 0.3299, "step": 3671 }, { "epoch": 0.8739215802939252, "grad_norm": 0.3562617699548361, "learning_rate": 6.2519985136673235e-06, "loss": 0.3818, "step": 3672 }, { "epoch": 0.8741595763669899, "grad_norm": 0.3887628157005426, "learning_rate": 6.250132234940543e-06, "loss": 0.3696, "step": 3673 }, { "epoch": 0.8743975724400548, "grad_norm": 0.39340605103932813, "learning_rate": 6.248265770413187e-06, "loss": 0.3048, "step": 3674 }, { "epoch": 0.8746355685131195, "grad_norm": 0.3573200120544181, "learning_rate": 6.2463991203626565e-06, "loss": 0.3333, "step": 3675 }, { "epoch": 0.8748735645861844, "grad_norm": 0.3773239240601227, "learning_rate": 6.244532285066382e-06, "loss": 0.4064, "step": 3676 }, { "epoch": 0.8751115606592491, "grad_norm": 0.41082700756267004, "learning_rate": 6.2426652648018215e-06, "loss": 0.3286, "step": 3677 }, { "epoch": 0.875349556732314, "grad_norm": 0.38054049215364216, "learning_rate": 6.2407980598464615e-06, "loss": 0.2894, "step": 3678 }, { "epoch": 0.8755875528053787, "grad_norm": 0.3678209031947387, "learning_rate": 6.238930670477813e-06, "loss": 0.3526, "step": 3679 }, { "epoch": 0.8758255488784435, "grad_norm": 0.40127333894221406, "learning_rate": 6.237063096973418e-06, "loss": 0.3743, "step": 3680 }, { "epoch": 0.8760635449515083, "grad_norm": 0.3582362616157782, "learning_rate": 6.235195339610842e-06, "loss": 0.3023, "step": 3681 }, { "epoch": 0.8763015410245731, "grad_norm": 0.44829796994435595, "learning_rate": 6.233327398667682e-06, "loss": 0.3699, "step": 3682 }, { "epoch": 0.8765395370976379, "grad_norm": 0.3966518210291202, "learning_rate": 6.2314592744215605e-06, "loss": 0.4058, "step": 3683 }, { "epoch": 0.8767775331707027, "grad_norm": 0.390658057800168, "learning_rate": 6.229590967150124e-06, "loss": 0.3136, "step": 3684 }, { "epoch": 0.8770155292437675, "grad_norm": 0.4308954670330036, "learning_rate": 6.227722477131053e-06, "loss": 0.2851, "step": 3685 }, { "epoch": 0.8772535253168323, "grad_norm": 0.442296326588899, "learning_rate": 6.225853804642048e-06, "loss": 0.3569, "step": 3686 }, { "epoch": 0.8774915213898971, "grad_norm": 0.36301211552233426, "learning_rate": 6.223984949960843e-06, "loss": 0.3704, "step": 3687 }, { "epoch": 0.8777295174629619, "grad_norm": 0.39720988569190824, "learning_rate": 6.2221159133651946e-06, "loss": 0.301, "step": 3688 }, { "epoch": 0.8779675135360266, "grad_norm": 0.3772685192299192, "learning_rate": 6.220246695132887e-06, "loss": 0.3498, "step": 3689 }, { "epoch": 0.8782055096090915, "grad_norm": 0.35844628342397195, "learning_rate": 6.218377295541733e-06, "loss": 0.3919, "step": 3690 }, { "epoch": 0.8784435056821562, "grad_norm": 0.39072910342836975, "learning_rate": 6.21650771486957e-06, "loss": 0.3412, "step": 3691 }, { "epoch": 0.8786815017552211, "grad_norm": 0.3934761558248729, "learning_rate": 6.214637953394268e-06, "loss": 0.3195, "step": 3692 }, { "epoch": 0.8789194978282858, "grad_norm": 0.41802350381203424, "learning_rate": 6.212768011393717e-06, "loss": 0.3613, "step": 3693 }, { "epoch": 0.8791574939013507, "grad_norm": 0.3673222836006008, "learning_rate": 6.2108978891458374e-06, "loss": 0.4147, "step": 3694 }, { "epoch": 0.8793954899744154, "grad_norm": 0.3792040026583229, "learning_rate": 6.2090275869285735e-06, "loss": 0.3214, "step": 3695 }, { "epoch": 0.8796334860474803, "grad_norm": 0.39223693432096834, "learning_rate": 6.207157105019902e-06, "loss": 0.3345, "step": 3696 }, { "epoch": 0.879871482120545, "grad_norm": 0.36686866689029535, "learning_rate": 6.205286443697821e-06, "loss": 0.4232, "step": 3697 }, { "epoch": 0.8801094781936099, "grad_norm": 0.3987162836648213, "learning_rate": 6.2034156032403555e-06, "loss": 0.3754, "step": 3698 }, { "epoch": 0.8803474742666746, "grad_norm": 0.4458906881177551, "learning_rate": 6.201544583925562e-06, "loss": 0.3313, "step": 3699 }, { "epoch": 0.8805854703397394, "grad_norm": 0.3774384264045408, "learning_rate": 6.199673386031518e-06, "loss": 0.3442, "step": 3700 }, { "epoch": 0.8808234664128042, "grad_norm": 0.34013768584119114, "learning_rate": 6.197802009836331e-06, "loss": 0.4061, "step": 3701 }, { "epoch": 0.881061462485869, "grad_norm": 0.37237424732062263, "learning_rate": 6.195930455618132e-06, "loss": 0.333, "step": 3702 }, { "epoch": 0.8812994585589338, "grad_norm": 0.3960116351456216, "learning_rate": 6.194058723655083e-06, "loss": 0.3069, "step": 3703 }, { "epoch": 0.8815374546319986, "grad_norm": 0.39926411331947254, "learning_rate": 6.192186814225367e-06, "loss": 0.3738, "step": 3704 }, { "epoch": 0.8817754507050634, "grad_norm": 0.44748846445351975, "learning_rate": 6.190314727607196e-06, "loss": 0.3749, "step": 3705 }, { "epoch": 0.8820134467781282, "grad_norm": 0.41088375205239147, "learning_rate": 6.188442464078811e-06, "loss": 0.29, "step": 3706 }, { "epoch": 0.882251442851193, "grad_norm": 0.36272393711398615, "learning_rate": 6.1865700239184755e-06, "loss": 0.337, "step": 3707 }, { "epoch": 0.8824894389242578, "grad_norm": 0.38722534388780844, "learning_rate": 6.184697407404478e-06, "loss": 0.4223, "step": 3708 }, { "epoch": 0.8827274349973225, "grad_norm": 0.4042014513980286, "learning_rate": 6.18282461481514e-06, "loss": 0.3241, "step": 3709 }, { "epoch": 0.8829654310703874, "grad_norm": 0.4002874055607258, "learning_rate": 6.180951646428801e-06, "loss": 0.3142, "step": 3710 }, { "epoch": 0.8832034271434521, "grad_norm": 0.39939392997960865, "learning_rate": 6.179078502523834e-06, "loss": 0.3774, "step": 3711 }, { "epoch": 0.883441423216517, "grad_norm": 0.36300470644879695, "learning_rate": 6.177205183378629e-06, "loss": 0.4179, "step": 3712 }, { "epoch": 0.8836794192895817, "grad_norm": 0.36795015375713797, "learning_rate": 6.1753316892716156e-06, "loss": 0.3032, "step": 3713 }, { "epoch": 0.8839174153626466, "grad_norm": 0.4120733964613225, "learning_rate": 6.173458020481234e-06, "loss": 0.3473, "step": 3714 }, { "epoch": 0.8841554114357113, "grad_norm": 0.38507546779235274, "learning_rate": 6.171584177285962e-06, "loss": 0.4031, "step": 3715 }, { "epoch": 0.8843934075087762, "grad_norm": 0.37436623493188265, "learning_rate": 6.1697101599642976e-06, "loss": 0.3287, "step": 3716 }, { "epoch": 0.8846314035818409, "grad_norm": 0.37601841360225746, "learning_rate": 6.167835968794766e-06, "loss": 0.2813, "step": 3717 }, { "epoch": 0.8848693996549057, "grad_norm": 0.36926230908935936, "learning_rate": 6.165961604055917e-06, "loss": 0.365, "step": 3718 }, { "epoch": 0.8851073957279705, "grad_norm": 0.36487906680665755, "learning_rate": 6.1640870660263295e-06, "loss": 0.3984, "step": 3719 }, { "epoch": 0.8853453918010353, "grad_norm": 0.37472610973420145, "learning_rate": 6.162212354984607e-06, "loss": 0.3098, "step": 3720 }, { "epoch": 0.8855833878741001, "grad_norm": 0.37234322290198935, "learning_rate": 6.160337471209377e-06, "loss": 0.3439, "step": 3721 }, { "epoch": 0.8858213839471649, "grad_norm": 0.3877763209468878, "learning_rate": 6.158462414979292e-06, "loss": 0.3869, "step": 3722 }, { "epoch": 0.8860593800202297, "grad_norm": 0.4034026778107165, "learning_rate": 6.156587186573033e-06, "loss": 0.3543, "step": 3723 }, { "epoch": 0.8862973760932945, "grad_norm": 0.3700540316755882, "learning_rate": 6.154711786269307e-06, "loss": 0.3153, "step": 3724 }, { "epoch": 0.8865353721663592, "grad_norm": 0.39243565604036756, "learning_rate": 6.152836214346843e-06, "loss": 0.3664, "step": 3725 }, { "epoch": 0.8867733682394241, "grad_norm": 0.36077090161516945, "learning_rate": 6.150960471084397e-06, "loss": 0.4108, "step": 3726 }, { "epoch": 0.8870113643124888, "grad_norm": 0.37912503567699524, "learning_rate": 6.149084556760753e-06, "loss": 0.3269, "step": 3727 }, { "epoch": 0.8872493603855537, "grad_norm": 0.4517235850955721, "learning_rate": 6.147208471654715e-06, "loss": 0.3127, "step": 3728 }, { "epoch": 0.8874873564586184, "grad_norm": 0.387038812957885, "learning_rate": 6.145332216045119e-06, "loss": 0.3647, "step": 3729 }, { "epoch": 0.8877253525316833, "grad_norm": 0.3779239208107375, "learning_rate": 6.143455790210822e-06, "loss": 0.4041, "step": 3730 }, { "epoch": 0.887963348604748, "grad_norm": 0.3732475038469887, "learning_rate": 6.1415791944307056e-06, "loss": 0.3049, "step": 3731 }, { "epoch": 0.8882013446778129, "grad_norm": 0.4145109701498706, "learning_rate": 6.13970242898368e-06, "loss": 0.3649, "step": 3732 }, { "epoch": 0.8884393407508776, "grad_norm": 0.3649394561399619, "learning_rate": 6.137825494148678e-06, "loss": 0.4286, "step": 3733 }, { "epoch": 0.8886773368239425, "grad_norm": 0.35279202905563867, "learning_rate": 6.1359483902046605e-06, "loss": 0.3256, "step": 3734 }, { "epoch": 0.8889153328970072, "grad_norm": 0.3814399015745296, "learning_rate": 6.134071117430609e-06, "loss": 0.3111, "step": 3735 }, { "epoch": 0.889153328970072, "grad_norm": 0.387544011842372, "learning_rate": 6.132193676105533e-06, "loss": 0.3563, "step": 3736 }, { "epoch": 0.8893913250431368, "grad_norm": 0.357184072343786, "learning_rate": 6.1303160665084705e-06, "loss": 0.3986, "step": 3737 }, { "epoch": 0.8896293211162016, "grad_norm": 0.378840936122068, "learning_rate": 6.1284382889184756e-06, "loss": 0.3211, "step": 3738 }, { "epoch": 0.8898673171892664, "grad_norm": 0.36510963419962517, "learning_rate": 6.126560343614636e-06, "loss": 0.327, "step": 3739 }, { "epoch": 0.8901053132623312, "grad_norm": 0.359174171499247, "learning_rate": 6.1246822308760575e-06, "loss": 0.4083, "step": 3740 }, { "epoch": 0.890343309335396, "grad_norm": 0.45021032420547086, "learning_rate": 6.122803950981878e-06, "loss": 0.3493, "step": 3741 }, { "epoch": 0.8905813054084608, "grad_norm": 0.35860397415514117, "learning_rate": 6.1209255042112546e-06, "loss": 0.2942, "step": 3742 }, { "epoch": 0.8908193014815255, "grad_norm": 0.36924714501202766, "learning_rate": 6.119046890843371e-06, "loss": 0.3465, "step": 3743 }, { "epoch": 0.8910572975545904, "grad_norm": 0.3562121440352722, "learning_rate": 6.117168111157435e-06, "loss": 0.4262, "step": 3744 }, { "epoch": 0.8912952936276551, "grad_norm": 0.35320499238862774, "learning_rate": 6.115289165432681e-06, "loss": 0.3027, "step": 3745 }, { "epoch": 0.89153328970072, "grad_norm": 0.3759235321559563, "learning_rate": 6.113410053948364e-06, "loss": 0.3309, "step": 3746 }, { "epoch": 0.8917712857737847, "grad_norm": 0.36596971834289177, "learning_rate": 6.111530776983771e-06, "loss": 0.3858, "step": 3747 }, { "epoch": 0.8920092818468496, "grad_norm": 0.3707386283118514, "learning_rate": 6.109651334818204e-06, "loss": 0.3554, "step": 3748 }, { "epoch": 0.8922472779199143, "grad_norm": 0.34061647689147134, "learning_rate": 6.1077717277309986e-06, "loss": 0.2951, "step": 3749 }, { "epoch": 0.8924852739929792, "grad_norm": 0.36573408045045774, "learning_rate": 6.1058919560015106e-06, "loss": 0.3549, "step": 3750 }, { "epoch": 0.8927232700660439, "grad_norm": 0.3364732547422088, "learning_rate": 6.104012019909119e-06, "loss": 0.4163, "step": 3751 }, { "epoch": 0.8929612661391088, "grad_norm": 0.37430490741861316, "learning_rate": 6.102131919733229e-06, "loss": 0.3171, "step": 3752 }, { "epoch": 0.8931992622121735, "grad_norm": 0.3694758086583808, "learning_rate": 6.1002516557532684e-06, "loss": 0.3298, "step": 3753 }, { "epoch": 0.8934372582852383, "grad_norm": 0.34593061138576553, "learning_rate": 6.098371228248695e-06, "loss": 0.373, "step": 3754 }, { "epoch": 0.8936752543583031, "grad_norm": 0.37157546314792966, "learning_rate": 6.096490637498985e-06, "loss": 0.3566, "step": 3755 }, { "epoch": 0.8939132504313679, "grad_norm": 0.4080002638011446, "learning_rate": 6.09460988378364e-06, "loss": 0.2772, "step": 3756 }, { "epoch": 0.8941512465044327, "grad_norm": 0.39409190673689626, "learning_rate": 6.092728967382186e-06, "loss": 0.3462, "step": 3757 }, { "epoch": 0.8943892425774975, "grad_norm": 0.3707276172179733, "learning_rate": 6.090847888574176e-06, "loss": 0.3948, "step": 3758 }, { "epoch": 0.8946272386505623, "grad_norm": 0.3609993458216574, "learning_rate": 6.088966647639183e-06, "loss": 0.3002, "step": 3759 }, { "epoch": 0.8948652347236271, "grad_norm": 0.3955638153510142, "learning_rate": 6.087085244856805e-06, "loss": 0.3046, "step": 3760 }, { "epoch": 0.8951032307966919, "grad_norm": 0.3904254667786321, "learning_rate": 6.0852036805066684e-06, "loss": 0.3435, "step": 3761 }, { "epoch": 0.8953412268697567, "grad_norm": 0.3708619740535345, "learning_rate": 6.083321954868416e-06, "loss": 0.3902, "step": 3762 }, { "epoch": 0.8955792229428214, "grad_norm": 0.3966041731350864, "learning_rate": 6.0814400682217236e-06, "loss": 0.2959, "step": 3763 }, { "epoch": 0.8958172190158863, "grad_norm": 0.4221574195250755, "learning_rate": 6.0795580208462824e-06, "loss": 0.3404, "step": 3764 }, { "epoch": 0.896055215088951, "grad_norm": 0.3593405180380063, "learning_rate": 6.077675813021812e-06, "loss": 0.3972, "step": 3765 }, { "epoch": 0.8962932111620159, "grad_norm": 0.39581975922524365, "learning_rate": 6.075793445028056e-06, "loss": 0.344, "step": 3766 }, { "epoch": 0.8965312072350806, "grad_norm": 0.3955579182890299, "learning_rate": 6.073910917144778e-06, "loss": 0.3158, "step": 3767 }, { "epoch": 0.8967692033081455, "grad_norm": 0.3797132012723383, "learning_rate": 6.072028229651773e-06, "loss": 0.3482, "step": 3768 }, { "epoch": 0.8970071993812102, "grad_norm": 0.410201975103764, "learning_rate": 6.0701453828288524e-06, "loss": 0.4076, "step": 3769 }, { "epoch": 0.8972451954542751, "grad_norm": 0.4137362624937579, "learning_rate": 6.068262376955854e-06, "loss": 0.2942, "step": 3770 }, { "epoch": 0.8974831915273398, "grad_norm": 0.3409390585560632, "learning_rate": 6.066379212312638e-06, "loss": 0.3169, "step": 3771 }, { "epoch": 0.8977211876004046, "grad_norm": 0.3649824690116523, "learning_rate": 6.06449588917909e-06, "loss": 0.418, "step": 3772 }, { "epoch": 0.8979591836734694, "grad_norm": 0.4238883491721038, "learning_rate": 6.062612407835117e-06, "loss": 0.3662, "step": 3773 }, { "epoch": 0.8981971797465342, "grad_norm": 0.3737577013120221, "learning_rate": 6.060728768560654e-06, "loss": 0.3084, "step": 3774 }, { "epoch": 0.898435175819599, "grad_norm": 0.37762209485191345, "learning_rate": 6.058844971635654e-06, "loss": 0.3556, "step": 3775 }, { "epoch": 0.8986731718926638, "grad_norm": 0.34826957290855015, "learning_rate": 6.056961017340097e-06, "loss": 0.3975, "step": 3776 }, { "epoch": 0.8989111679657286, "grad_norm": 0.4161304452441926, "learning_rate": 6.055076905953986e-06, "loss": 0.2959, "step": 3777 }, { "epoch": 0.8991491640387934, "grad_norm": 0.35944427744861646, "learning_rate": 6.053192637757346e-06, "loss": 0.2953, "step": 3778 }, { "epoch": 0.8993871601118582, "grad_norm": 0.3534735542841618, "learning_rate": 6.051308213030224e-06, "loss": 0.3666, "step": 3779 }, { "epoch": 0.899625156184923, "grad_norm": 0.4729648450816866, "learning_rate": 6.049423632052693e-06, "loss": 0.3592, "step": 3780 }, { "epoch": 0.8998631522579877, "grad_norm": 0.40762763698787485, "learning_rate": 6.0475388951048486e-06, "loss": 0.298, "step": 3781 }, { "epoch": 0.9001011483310526, "grad_norm": 0.3810584850880683, "learning_rate": 6.045654002466809e-06, "loss": 0.3393, "step": 3782 }, { "epoch": 0.9003391444041173, "grad_norm": 0.4559084065779489, "learning_rate": 6.043768954418719e-06, "loss": 0.4002, "step": 3783 }, { "epoch": 0.9005771404771822, "grad_norm": 0.43908602771497135, "learning_rate": 6.041883751240739e-06, "loss": 0.3226, "step": 3784 }, { "epoch": 0.9008151365502469, "grad_norm": 0.3930802632673325, "learning_rate": 6.03999839321306e-06, "loss": 0.3064, "step": 3785 }, { "epoch": 0.9010531326233118, "grad_norm": 0.33579273743850313, "learning_rate": 6.03811288061589e-06, "loss": 0.368, "step": 3786 }, { "epoch": 0.9012911286963765, "grad_norm": 0.39169821582510483, "learning_rate": 6.036227213729464e-06, "loss": 0.3959, "step": 3787 }, { "epoch": 0.9015291247694414, "grad_norm": 0.43853941776440175, "learning_rate": 6.03434139283404e-06, "loss": 0.3188, "step": 3788 }, { "epoch": 0.9017671208425061, "grad_norm": 0.3689294208498079, "learning_rate": 6.032455418209895e-06, "loss": 0.351, "step": 3789 }, { "epoch": 0.902005116915571, "grad_norm": 0.340196421020995, "learning_rate": 6.030569290137335e-06, "loss": 0.3988, "step": 3790 }, { "epoch": 0.9022431129886357, "grad_norm": 0.39137456638518625, "learning_rate": 6.028683008896683e-06, "loss": 0.3547, "step": 3791 }, { "epoch": 0.9024811090617005, "grad_norm": 0.43319110374923225, "learning_rate": 6.026796574768288e-06, "loss": 0.3019, "step": 3792 }, { "epoch": 0.9027191051347653, "grad_norm": 0.3486367504488833, "learning_rate": 6.024909988032519e-06, "loss": 0.3584, "step": 3793 }, { "epoch": 0.9029571012078301, "grad_norm": 0.36094039333364036, "learning_rate": 6.0230232489697725e-06, "loss": 0.412, "step": 3794 }, { "epoch": 0.9031950972808949, "grad_norm": 0.3615716508779515, "learning_rate": 6.021136357860461e-06, "loss": 0.3103, "step": 3795 }, { "epoch": 0.9034330933539596, "grad_norm": 0.3767985308127309, "learning_rate": 6.0192493149850255e-06, "loss": 0.3292, "step": 3796 }, { "epoch": 0.9036710894270245, "grad_norm": 0.36959666250758044, "learning_rate": 6.017362120623928e-06, "loss": 0.3781, "step": 3797 }, { "epoch": 0.9039090855000892, "grad_norm": 0.3571566018380154, "learning_rate": 6.015474775057649e-06, "loss": 0.3583, "step": 3798 }, { "epoch": 0.904147081573154, "grad_norm": 0.38935644719203016, "learning_rate": 6.013587278566698e-06, "loss": 0.3145, "step": 3799 }, { "epoch": 0.9043850776462188, "grad_norm": 0.3701781411050491, "learning_rate": 6.011699631431603e-06, "loss": 0.3197, "step": 3800 }, { "epoch": 0.9046230737192836, "grad_norm": 0.3906408890697199, "learning_rate": 6.0098118339329124e-06, "loss": 0.4124, "step": 3801 }, { "epoch": 0.9048610697923484, "grad_norm": 0.369451923410181, "learning_rate": 6.007923886351202e-06, "loss": 0.3269, "step": 3802 }, { "epoch": 0.9050990658654132, "grad_norm": 0.37174979302655076, "learning_rate": 6.00603578896707e-06, "loss": 0.3283, "step": 3803 }, { "epoch": 0.905337061938478, "grad_norm": 0.37058723879913413, "learning_rate": 6.004147542061129e-06, "loss": 0.3723, "step": 3804 }, { "epoch": 0.9055750580115428, "grad_norm": 0.37650352346249066, "learning_rate": 6.0022591459140235e-06, "loss": 0.3471, "step": 3805 }, { "epoch": 0.9058130540846075, "grad_norm": 0.37453899744942026, "learning_rate": 6.000370600806415e-06, "loss": 0.3212, "step": 3806 }, { "epoch": 0.9060510501576724, "grad_norm": 0.3869447902100158, "learning_rate": 5.9984819070189845e-06, "loss": 0.3526, "step": 3807 }, { "epoch": 0.9062890462307371, "grad_norm": 0.3527049994912703, "learning_rate": 5.9965930648324425e-06, "loss": 0.3878, "step": 3808 }, { "epoch": 0.906527042303802, "grad_norm": 0.40048440151462644, "learning_rate": 5.9947040745275174e-06, "loss": 0.3268, "step": 3809 }, { "epoch": 0.9067650383768667, "grad_norm": 0.36101747024521835, "learning_rate": 5.992814936384958e-06, "loss": 0.3299, "step": 3810 }, { "epoch": 0.9070030344499316, "grad_norm": 0.3884060451475327, "learning_rate": 5.990925650685539e-06, "loss": 0.3604, "step": 3811 }, { "epoch": 0.9072410305229963, "grad_norm": 0.38945955785689845, "learning_rate": 5.9890362177100535e-06, "loss": 0.3968, "step": 3812 }, { "epoch": 0.9074790265960612, "grad_norm": 0.38256970785987937, "learning_rate": 5.987146637739319e-06, "loss": 0.285, "step": 3813 }, { "epoch": 0.9077170226691259, "grad_norm": 0.3725122594244136, "learning_rate": 5.985256911054171e-06, "loss": 0.3099, "step": 3814 }, { "epoch": 0.9079550187421908, "grad_norm": 0.3832707895396545, "learning_rate": 5.983367037935473e-06, "loss": 0.3996, "step": 3815 }, { "epoch": 0.9081930148152555, "grad_norm": 0.3833096459028676, "learning_rate": 5.9814770186641065e-06, "loss": 0.3466, "step": 3816 }, { "epoch": 0.9084310108883203, "grad_norm": 0.36397044311226656, "learning_rate": 5.979586853520974e-06, "loss": 0.2958, "step": 3817 }, { "epoch": 0.9086690069613851, "grad_norm": 0.36891177007878145, "learning_rate": 5.977696542787003e-06, "loss": 0.3562, "step": 3818 }, { "epoch": 0.9089070030344499, "grad_norm": 0.3840511476005476, "learning_rate": 5.975806086743137e-06, "loss": 0.3974, "step": 3819 }, { "epoch": 0.9091449991075147, "grad_norm": 0.36836219306960205, "learning_rate": 5.973915485670348e-06, "loss": 0.2928, "step": 3820 }, { "epoch": 0.9093829951805795, "grad_norm": 0.3684371158231424, "learning_rate": 5.972024739849622e-06, "loss": 0.3035, "step": 3821 }, { "epoch": 0.9096209912536443, "grad_norm": 0.3623595279659431, "learning_rate": 5.970133849561973e-06, "loss": 0.3729, "step": 3822 }, { "epoch": 0.9098589873267091, "grad_norm": 0.3813529115463126, "learning_rate": 5.968242815088435e-06, "loss": 0.3771, "step": 3823 }, { "epoch": 0.9100969833997739, "grad_norm": 0.4428663575301828, "learning_rate": 5.9663516367100614e-06, "loss": 0.3069, "step": 3824 }, { "epoch": 0.9103349794728387, "grad_norm": 0.38649551932626275, "learning_rate": 5.964460314707928e-06, "loss": 0.3451, "step": 3825 }, { "epoch": 0.9105729755459034, "grad_norm": 0.35224183137486065, "learning_rate": 5.962568849363133e-06, "loss": 0.4268, "step": 3826 }, { "epoch": 0.9108109716189683, "grad_norm": 0.3468956951171978, "learning_rate": 5.960677240956792e-06, "loss": 0.3078, "step": 3827 }, { "epoch": 0.911048967692033, "grad_norm": 0.3542112048407511, "learning_rate": 5.958785489770049e-06, "loss": 0.2749, "step": 3828 }, { "epoch": 0.9112869637650979, "grad_norm": 0.3674351382002242, "learning_rate": 5.956893596084061e-06, "loss": 0.3578, "step": 3829 }, { "epoch": 0.9115249598381626, "grad_norm": 0.35983162039990363, "learning_rate": 5.955001560180015e-06, "loss": 0.392, "step": 3830 }, { "epoch": 0.9117629559112275, "grad_norm": 0.36869673312884654, "learning_rate": 5.953109382339111e-06, "loss": 0.296, "step": 3831 }, { "epoch": 0.9120009519842922, "grad_norm": 0.3937419732099826, "learning_rate": 5.951217062842573e-06, "loss": 0.3287, "step": 3832 }, { "epoch": 0.9122389480573571, "grad_norm": 0.39019517526976494, "learning_rate": 5.949324601971648e-06, "loss": 0.4424, "step": 3833 }, { "epoch": 0.9124769441304218, "grad_norm": 0.3944112522424989, "learning_rate": 5.947432000007601e-06, "loss": 0.3025, "step": 3834 }, { "epoch": 0.9127149402034866, "grad_norm": 0.3906256947087767, "learning_rate": 5.9455392572317225e-06, "loss": 0.3067, "step": 3835 }, { "epoch": 0.9129529362765514, "grad_norm": 0.36209911312794113, "learning_rate": 5.9436463739253154e-06, "loss": 0.3718, "step": 3836 }, { "epoch": 0.9131909323496162, "grad_norm": 0.37733100352844173, "learning_rate": 5.9417533503697155e-06, "loss": 0.398, "step": 3837 }, { "epoch": 0.913428928422681, "grad_norm": 0.3835158676308841, "learning_rate": 5.939860186846269e-06, "loss": 0.3215, "step": 3838 }, { "epoch": 0.9136669244957458, "grad_norm": 0.367565730076443, "learning_rate": 5.937966883636348e-06, "loss": 0.3263, "step": 3839 }, { "epoch": 0.9139049205688106, "grad_norm": 0.35666300012315477, "learning_rate": 5.936073441021344e-06, "loss": 0.3827, "step": 3840 }, { "epoch": 0.9141429166418754, "grad_norm": 0.3638886300120447, "learning_rate": 5.934179859282668e-06, "loss": 0.3401, "step": 3841 }, { "epoch": 0.9143809127149402, "grad_norm": 0.3412227559234039, "learning_rate": 5.932286138701756e-06, "loss": 0.3269, "step": 3842 }, { "epoch": 0.914618908788005, "grad_norm": 0.36540816931263137, "learning_rate": 5.930392279560059e-06, "loss": 0.3647, "step": 3843 }, { "epoch": 0.9148569048610697, "grad_norm": 0.346958869901011, "learning_rate": 5.928498282139053e-06, "loss": 0.3772, "step": 3844 }, { "epoch": 0.9150949009341346, "grad_norm": 0.3794111378758652, "learning_rate": 5.926604146720232e-06, "loss": 0.3163, "step": 3845 }, { "epoch": 0.9153328970071993, "grad_norm": 0.39132485441773784, "learning_rate": 5.9247098735851125e-06, "loss": 0.3245, "step": 3846 }, { "epoch": 0.9155708930802642, "grad_norm": 0.36279235336046384, "learning_rate": 5.922815463015229e-06, "loss": 0.3925, "step": 3847 }, { "epoch": 0.9158088891533289, "grad_norm": 0.380778919905592, "learning_rate": 5.920920915292138e-06, "loss": 0.3313, "step": 3848 }, { "epoch": 0.9160468852263938, "grad_norm": 0.3651654131810896, "learning_rate": 5.919026230697418e-06, "loss": 0.2968, "step": 3849 }, { "epoch": 0.9162848812994585, "grad_norm": 0.3670633414101187, "learning_rate": 5.917131409512663e-06, "loss": 0.3495, "step": 3850 }, { "epoch": 0.9165228773725234, "grad_norm": 0.34896717104695807, "learning_rate": 5.915236452019491e-06, "loss": 0.3772, "step": 3851 }, { "epoch": 0.9167608734455881, "grad_norm": 0.3780403642407919, "learning_rate": 5.913341358499543e-06, "loss": 0.3302, "step": 3852 }, { "epoch": 0.916998869518653, "grad_norm": 0.36534779843005166, "learning_rate": 5.911446129234473e-06, "loss": 0.3154, "step": 3853 }, { "epoch": 0.9172368655917177, "grad_norm": 0.36166515125650645, "learning_rate": 5.909550764505959e-06, "loss": 0.3428, "step": 3854 }, { "epoch": 0.9174748616647825, "grad_norm": 0.36662895628646563, "learning_rate": 5.907655264595701e-06, "loss": 0.3726, "step": 3855 }, { "epoch": 0.9177128577378473, "grad_norm": 0.41478425489418713, "learning_rate": 5.905759629785417e-06, "loss": 0.306, "step": 3856 }, { "epoch": 0.9179508538109121, "grad_norm": 0.442141948179443, "learning_rate": 5.903863860356843e-06, "loss": 0.3664, "step": 3857 }, { "epoch": 0.9181888498839769, "grad_norm": 0.3835994090422323, "learning_rate": 5.9019679565917396e-06, "loss": 0.4351, "step": 3858 }, { "epoch": 0.9184268459570417, "grad_norm": 0.3916877069146898, "learning_rate": 5.900071918771885e-06, "loss": 0.3322, "step": 3859 }, { "epoch": 0.9186648420301065, "grad_norm": 0.4452770801712549, "learning_rate": 5.898175747179077e-06, "loss": 0.3111, "step": 3860 }, { "epoch": 0.9189028381031713, "grad_norm": 0.3810069457057913, "learning_rate": 5.896279442095132e-06, "loss": 0.3757, "step": 3861 }, { "epoch": 0.919140834176236, "grad_norm": 0.42405450068532397, "learning_rate": 5.894383003801889e-06, "loss": 0.3957, "step": 3862 }, { "epoch": 0.9193788302493009, "grad_norm": 0.3860106901737225, "learning_rate": 5.892486432581206e-06, "loss": 0.3128, "step": 3863 }, { "epoch": 0.9196168263223656, "grad_norm": 0.35769077891896367, "learning_rate": 5.890589728714959e-06, "loss": 0.3283, "step": 3864 }, { "epoch": 0.9198548223954305, "grad_norm": 0.3580135745616656, "learning_rate": 5.8886928924850484e-06, "loss": 0.4148, "step": 3865 }, { "epoch": 0.9200928184684952, "grad_norm": 0.390966292937549, "learning_rate": 5.886795924173388e-06, "loss": 0.3515, "step": 3866 }, { "epoch": 0.9203308145415601, "grad_norm": 0.3582229975022328, "learning_rate": 5.884898824061914e-06, "loss": 0.2942, "step": 3867 }, { "epoch": 0.9205688106146248, "grad_norm": 0.383565757640599, "learning_rate": 5.883001592432583e-06, "loss": 0.3574, "step": 3868 }, { "epoch": 0.9208068066876897, "grad_norm": 0.41449449325824644, "learning_rate": 5.881104229567373e-06, "loss": 0.3743, "step": 3869 }, { "epoch": 0.9210448027607544, "grad_norm": 0.4022815267702927, "learning_rate": 5.879206735748275e-06, "loss": 0.3004, "step": 3870 }, { "epoch": 0.9212827988338192, "grad_norm": 0.37800390773235576, "learning_rate": 5.877309111257306e-06, "loss": 0.3007, "step": 3871 }, { "epoch": 0.921520794906884, "grad_norm": 0.3826886690510641, "learning_rate": 5.8754113563765e-06, "loss": 0.3896, "step": 3872 }, { "epoch": 0.9217587909799488, "grad_norm": 0.39813220759183665, "learning_rate": 5.873513471387911e-06, "loss": 0.3316, "step": 3873 }, { "epoch": 0.9219967870530136, "grad_norm": 0.3865193341498503, "learning_rate": 5.871615456573608e-06, "loss": 0.302, "step": 3874 }, { "epoch": 0.9222347831260784, "grad_norm": 0.37725905957142175, "learning_rate": 5.8697173122156885e-06, "loss": 0.3364, "step": 3875 }, { "epoch": 0.9224727791991432, "grad_norm": 0.3896841573344773, "learning_rate": 5.8678190385962585e-06, "loss": 0.4141, "step": 3876 }, { "epoch": 0.922710775272208, "grad_norm": 0.3717555754024187, "learning_rate": 5.8659206359974495e-06, "loss": 0.3271, "step": 3877 }, { "epoch": 0.9229487713452728, "grad_norm": 0.38884985169390957, "learning_rate": 5.864022104701413e-06, "loss": 0.3206, "step": 3878 }, { "epoch": 0.9231867674183376, "grad_norm": 0.40651050344993656, "learning_rate": 5.862123444990319e-06, "loss": 0.3614, "step": 3879 }, { "epoch": 0.9234247634914023, "grad_norm": 0.3699122959015574, "learning_rate": 5.8602246571463506e-06, "loss": 0.3953, "step": 3880 }, { "epoch": 0.9236627595644672, "grad_norm": 0.35373619100502907, "learning_rate": 5.858325741451718e-06, "loss": 0.3106, "step": 3881 }, { "epoch": 0.9239007556375319, "grad_norm": 0.3822556148266497, "learning_rate": 5.8564266981886465e-06, "loss": 0.3551, "step": 3882 }, { "epoch": 0.9241387517105968, "grad_norm": 0.3367352347831046, "learning_rate": 5.854527527639381e-06, "loss": 0.4214, "step": 3883 }, { "epoch": 0.9243767477836615, "grad_norm": 0.41091234068434845, "learning_rate": 5.852628230086184e-06, "loss": 0.3105, "step": 3884 }, { "epoch": 0.9246147438567264, "grad_norm": 0.39373294006122483, "learning_rate": 5.850728805811339e-06, "loss": 0.3234, "step": 3885 }, { "epoch": 0.9248527399297911, "grad_norm": 0.3427571664260684, "learning_rate": 5.8488292550971485e-06, "loss": 0.3525, "step": 3886 }, { "epoch": 0.925090736002856, "grad_norm": 0.35439854536041354, "learning_rate": 5.84692957822593e-06, "loss": 0.3836, "step": 3887 }, { "epoch": 0.9253287320759207, "grad_norm": 0.396795073329766, "learning_rate": 5.845029775480026e-06, "loss": 0.295, "step": 3888 }, { "epoch": 0.9255667281489856, "grad_norm": 0.3777068010438854, "learning_rate": 5.843129847141792e-06, "loss": 0.3076, "step": 3889 }, { "epoch": 0.9258047242220503, "grad_norm": 0.35763041328215195, "learning_rate": 5.841229793493604e-06, "loss": 0.4072, "step": 3890 }, { "epoch": 0.9260427202951151, "grad_norm": 0.38017084454143346, "learning_rate": 5.839329614817858e-06, "loss": 0.3474, "step": 3891 }, { "epoch": 0.9262807163681799, "grad_norm": 0.35675610940815455, "learning_rate": 5.837429311396967e-06, "loss": 0.2979, "step": 3892 }, { "epoch": 0.9265187124412447, "grad_norm": 0.3793380743409526, "learning_rate": 5.835528883513364e-06, "loss": 0.3703, "step": 3893 }, { "epoch": 0.9267567085143095, "grad_norm": 0.36281165596625997, "learning_rate": 5.833628331449498e-06, "loss": 0.4068, "step": 3894 }, { "epoch": 0.9269947045873743, "grad_norm": 0.3691042976161106, "learning_rate": 5.831727655487839e-06, "loss": 0.3091, "step": 3895 }, { "epoch": 0.9272327006604391, "grad_norm": 0.344360877845291, "learning_rate": 5.829826855910875e-06, "loss": 0.3287, "step": 3896 }, { "epoch": 0.9274706967335039, "grad_norm": 0.3740607315809258, "learning_rate": 5.827925933001111e-06, "loss": 0.3885, "step": 3897 }, { "epoch": 0.9277086928065686, "grad_norm": 0.3685370821262386, "learning_rate": 5.826024887041071e-06, "loss": 0.3755, "step": 3898 }, { "epoch": 0.9279466888796335, "grad_norm": 0.3760368000609693, "learning_rate": 5.8241237183132986e-06, "loss": 0.289, "step": 3899 }, { "epoch": 0.9281846849526982, "grad_norm": 0.3705899806280766, "learning_rate": 5.822222427100354e-06, "loss": 0.3575, "step": 3900 }, { "epoch": 0.9284226810257631, "grad_norm": 0.39603394670641257, "learning_rate": 5.820321013684815e-06, "loss": 0.4147, "step": 3901 }, { "epoch": 0.9286606770988278, "grad_norm": 0.36521058651075333, "learning_rate": 5.818419478349281e-06, "loss": 0.3325, "step": 3902 }, { "epoch": 0.9288986731718927, "grad_norm": 0.3439794302882938, "learning_rate": 5.816517821376365e-06, "loss": 0.3189, "step": 3903 }, { "epoch": 0.9291366692449574, "grad_norm": 0.37141750525341993, "learning_rate": 5.814616043048702e-06, "loss": 0.3497, "step": 3904 }, { "epoch": 0.9293746653180223, "grad_norm": 0.38016142601364294, "learning_rate": 5.8127141436489395e-06, "loss": 0.3838, "step": 3905 }, { "epoch": 0.929612661391087, "grad_norm": 0.3606163418991101, "learning_rate": 5.810812123459753e-06, "loss": 0.2855, "step": 3906 }, { "epoch": 0.9298506574641519, "grad_norm": 0.36356185335551205, "learning_rate": 5.808909982763825e-06, "loss": 0.3472, "step": 3907 }, { "epoch": 0.9300886535372166, "grad_norm": 0.36536907609964997, "learning_rate": 5.807007721843862e-06, "loss": 0.421, "step": 3908 }, { "epoch": 0.9303266496102814, "grad_norm": 0.4097160409903546, "learning_rate": 5.805105340982586e-06, "loss": 0.3065, "step": 3909 }, { "epoch": 0.9305646456833462, "grad_norm": 0.32639160343180673, "learning_rate": 5.803202840462741e-06, "loss": 0.2853, "step": 3910 }, { "epoch": 0.930802641756411, "grad_norm": 0.34431820740085145, "learning_rate": 5.801300220567083e-06, "loss": 0.3654, "step": 3911 }, { "epoch": 0.9310406378294758, "grad_norm": 0.3393942154458769, "learning_rate": 5.799397481578388e-06, "loss": 0.3814, "step": 3912 }, { "epoch": 0.9312786339025406, "grad_norm": 0.3650148623295529, "learning_rate": 5.797494623779453e-06, "loss": 0.2883, "step": 3913 }, { "epoch": 0.9315166299756054, "grad_norm": 0.40074023711292456, "learning_rate": 5.795591647453086e-06, "loss": 0.3447, "step": 3914 }, { "epoch": 0.9317546260486702, "grad_norm": 0.38051255748001506, "learning_rate": 5.79368855288212e-06, "loss": 0.3804, "step": 3915 }, { "epoch": 0.931992622121735, "grad_norm": 0.37149987585322053, "learning_rate": 5.7917853403493985e-06, "loss": 0.3397, "step": 3916 }, { "epoch": 0.9322306181947998, "grad_norm": 0.37572329221656625, "learning_rate": 5.7898820101377885e-06, "loss": 0.2941, "step": 3917 }, { "epoch": 0.9324686142678645, "grad_norm": 0.3971060021730622, "learning_rate": 5.787978562530172e-06, "loss": 0.3382, "step": 3918 }, { "epoch": 0.9327066103409294, "grad_norm": 0.39918849730220163, "learning_rate": 5.786074997809445e-06, "loss": 0.4049, "step": 3919 }, { "epoch": 0.9329446064139941, "grad_norm": 0.386940462855031, "learning_rate": 5.784171316258528e-06, "loss": 0.3133, "step": 3920 }, { "epoch": 0.933182602487059, "grad_norm": 0.39373148829016735, "learning_rate": 5.782267518160354e-06, "loss": 0.3161, "step": 3921 }, { "epoch": 0.9334205985601237, "grad_norm": 0.4027047525256417, "learning_rate": 5.780363603797874e-06, "loss": 0.3806, "step": 3922 }, { "epoch": 0.9336585946331886, "grad_norm": 0.436189477052603, "learning_rate": 5.778459573454058e-06, "loss": 0.3396, "step": 3923 }, { "epoch": 0.9338965907062533, "grad_norm": 0.37159344865326444, "learning_rate": 5.776555427411891e-06, "loss": 0.2909, "step": 3924 }, { "epoch": 0.9341345867793182, "grad_norm": 0.35999558789407055, "learning_rate": 5.774651165954377e-06, "loss": 0.3357, "step": 3925 }, { "epoch": 0.9343725828523829, "grad_norm": 0.3450624176837847, "learning_rate": 5.772746789364534e-06, "loss": 0.4188, "step": 3926 }, { "epoch": 0.9346105789254477, "grad_norm": 0.375979183281526, "learning_rate": 5.770842297925402e-06, "loss": 0.3443, "step": 3927 }, { "epoch": 0.9348485749985125, "grad_norm": 0.3646206735183569, "learning_rate": 5.768937691920036e-06, "loss": 0.2904, "step": 3928 }, { "epoch": 0.9350865710715773, "grad_norm": 0.37338768923849774, "learning_rate": 5.767032971631506e-06, "loss": 0.3965, "step": 3929 }, { "epoch": 0.9353245671446421, "grad_norm": 0.36862713467496905, "learning_rate": 5.7651281373429e-06, "loss": 0.3441, "step": 3930 }, { "epoch": 0.9355625632177069, "grad_norm": 0.36754831493989937, "learning_rate": 5.763223189337324e-06, "loss": 0.325, "step": 3931 }, { "epoch": 0.9358005592907717, "grad_norm": 0.37946091566468837, "learning_rate": 5.761318127897903e-06, "loss": 0.3237, "step": 3932 }, { "epoch": 0.9360385553638365, "grad_norm": 0.381077983146763, "learning_rate": 5.759412953307771e-06, "loss": 0.3836, "step": 3933 }, { "epoch": 0.9362765514369012, "grad_norm": 0.3797233859010859, "learning_rate": 5.75750766585009e-06, "loss": 0.3368, "step": 3934 }, { "epoch": 0.9365145475099661, "grad_norm": 0.4055187658650143, "learning_rate": 5.7556022658080276e-06, "loss": 0.3064, "step": 3935 }, { "epoch": 0.9367525435830308, "grad_norm": 0.3428864215245841, "learning_rate": 5.753696753464778e-06, "loss": 0.3785, "step": 3936 }, { "epoch": 0.9369905396560957, "grad_norm": 0.38066009287080843, "learning_rate": 5.751791129103545e-06, "loss": 0.4031, "step": 3937 }, { "epoch": 0.9372285357291604, "grad_norm": 0.38910361084548145, "learning_rate": 5.749885393007552e-06, "loss": 0.3196, "step": 3938 }, { "epoch": 0.9374665318022253, "grad_norm": 0.4447865941468255, "learning_rate": 5.747979545460036e-06, "loss": 0.3112, "step": 3939 }, { "epoch": 0.93770452787529, "grad_norm": 0.36280386489269756, "learning_rate": 5.746073586744258e-06, "loss": 0.3824, "step": 3940 }, { "epoch": 0.9379425239483549, "grad_norm": 0.3715191063601824, "learning_rate": 5.744167517143486e-06, "loss": 0.351, "step": 3941 }, { "epoch": 0.9381805200214196, "grad_norm": 0.3916498754924406, "learning_rate": 5.742261336941013e-06, "loss": 0.3221, "step": 3942 }, { "epoch": 0.9384185160944845, "grad_norm": 0.38502974009058716, "learning_rate": 5.740355046420142e-06, "loss": 0.4041, "step": 3943 }, { "epoch": 0.9386565121675492, "grad_norm": 0.5425497961680991, "learning_rate": 5.738448645864195e-06, "loss": 0.415, "step": 3944 }, { "epoch": 0.938894508240614, "grad_norm": 0.3914605628286909, "learning_rate": 5.736542135556512e-06, "loss": 0.3102, "step": 3945 }, { "epoch": 0.9391325043136788, "grad_norm": 0.3930678191979905, "learning_rate": 5.7346355157804455e-06, "loss": 0.3227, "step": 3946 }, { "epoch": 0.9393705003867436, "grad_norm": 0.3660355806904165, "learning_rate": 5.732728786819368e-06, "loss": 0.3873, "step": 3947 }, { "epoch": 0.9396084964598084, "grad_norm": 0.3579948522758407, "learning_rate": 5.730821948956665e-06, "loss": 0.366, "step": 3948 }, { "epoch": 0.9398464925328732, "grad_norm": 0.35994213666298, "learning_rate": 5.7289150024757415e-06, "loss": 0.2896, "step": 3949 }, { "epoch": 0.940084488605938, "grad_norm": 0.3660284695948094, "learning_rate": 5.727007947660016e-06, "loss": 0.3578, "step": 3950 }, { "epoch": 0.9403224846790028, "grad_norm": 0.39417668974001635, "learning_rate": 5.725100784792924e-06, "loss": 0.3948, "step": 3951 }, { "epoch": 0.9405604807520676, "grad_norm": 0.382207382206984, "learning_rate": 5.723193514157918e-06, "loss": 0.3124, "step": 3952 }, { "epoch": 0.9407984768251324, "grad_norm": 0.4024628893156382, "learning_rate": 5.721286136038463e-06, "loss": 0.2865, "step": 3953 }, { "epoch": 0.9410364728981971, "grad_norm": 0.386924814297485, "learning_rate": 5.719378650718046e-06, "loss": 0.364, "step": 3954 }, { "epoch": 0.941274468971262, "grad_norm": 0.376900110000449, "learning_rate": 5.717471058480165e-06, "loss": 0.3686, "step": 3955 }, { "epoch": 0.9415124650443267, "grad_norm": 0.3832996377113013, "learning_rate": 5.7155633596083345e-06, "loss": 0.2966, "step": 3956 }, { "epoch": 0.9417504611173916, "grad_norm": 0.3689062510414365, "learning_rate": 5.713655554386088e-06, "loss": 0.3313, "step": 3957 }, { "epoch": 0.9419884571904563, "grad_norm": 0.36517279149138915, "learning_rate": 5.71174764309697e-06, "loss": 0.3935, "step": 3958 }, { "epoch": 0.9422264532635212, "grad_norm": 0.37317314065760315, "learning_rate": 5.709839626024545e-06, "loss": 0.2822, "step": 3959 }, { "epoch": 0.9424644493365859, "grad_norm": 0.39141777366177366, "learning_rate": 5.70793150345239e-06, "loss": 0.3011, "step": 3960 }, { "epoch": 0.9427024454096508, "grad_norm": 0.3705484724661079, "learning_rate": 5.706023275664101e-06, "loss": 0.3703, "step": 3961 }, { "epoch": 0.9429404414827155, "grad_norm": 0.37822124229893755, "learning_rate": 5.704114942943286e-06, "loss": 0.3911, "step": 3962 }, { "epoch": 0.9431784375557803, "grad_norm": 0.4070014262482564, "learning_rate": 5.702206505573572e-06, "loss": 0.3071, "step": 3963 }, { "epoch": 0.9434164336288451, "grad_norm": 0.36329996164851147, "learning_rate": 5.7002979638386005e-06, "loss": 0.3356, "step": 3964 }, { "epoch": 0.9436544297019099, "grad_norm": 0.36649962919002427, "learning_rate": 5.698389318022026e-06, "loss": 0.4008, "step": 3965 }, { "epoch": 0.9438924257749747, "grad_norm": 0.3638109792735608, "learning_rate": 5.696480568407523e-06, "loss": 0.3156, "step": 3966 }, { "epoch": 0.9441304218480395, "grad_norm": 0.3605646832595526, "learning_rate": 5.694571715278775e-06, "loss": 0.2936, "step": 3967 }, { "epoch": 0.9443684179211043, "grad_norm": 0.3453303049010467, "learning_rate": 5.692662758919489e-06, "loss": 0.3573, "step": 3968 }, { "epoch": 0.9446064139941691, "grad_norm": 0.3772126922641312, "learning_rate": 5.690753699613382e-06, "loss": 0.4241, "step": 3969 }, { "epoch": 0.9448444100672339, "grad_norm": 0.445093770628227, "learning_rate": 5.688844537644186e-06, "loss": 0.3324, "step": 3970 }, { "epoch": 0.9450824061402987, "grad_norm": 0.3699388236702028, "learning_rate": 5.686935273295649e-06, "loss": 0.3115, "step": 3971 }, { "epoch": 0.9453204022133634, "grad_norm": 0.3897628984455295, "learning_rate": 5.685025906851539e-06, "loss": 0.3806, "step": 3972 }, { "epoch": 0.9455583982864283, "grad_norm": 0.4016257052922472, "learning_rate": 5.6831164385956314e-06, "loss": 0.3558, "step": 3973 }, { "epoch": 0.945796394359493, "grad_norm": 0.3631492629536495, "learning_rate": 5.681206868811721e-06, "loss": 0.299, "step": 3974 }, { "epoch": 0.9460343904325579, "grad_norm": 0.3617842355634617, "learning_rate": 5.679297197783617e-06, "loss": 0.3351, "step": 3975 }, { "epoch": 0.9462723865056226, "grad_norm": 0.37153756822155454, "learning_rate": 5.677387425795146e-06, "loss": 0.389, "step": 3976 }, { "epoch": 0.9465103825786875, "grad_norm": 0.37351547703057986, "learning_rate": 5.675477553130145e-06, "loss": 0.3245, "step": 3977 }, { "epoch": 0.9467483786517522, "grad_norm": 0.38350414771635083, "learning_rate": 5.6735675800724695e-06, "loss": 0.3055, "step": 3978 }, { "epoch": 0.9469863747248171, "grad_norm": 0.42323362399946074, "learning_rate": 5.671657506905989e-06, "loss": 0.3664, "step": 3979 }, { "epoch": 0.9472243707978818, "grad_norm": 0.3736129312742057, "learning_rate": 5.669747333914586e-06, "loss": 0.3772, "step": 3980 }, { "epoch": 0.9474623668709466, "grad_norm": 0.3940418326123638, "learning_rate": 5.66783706138216e-06, "loss": 0.3301, "step": 3981 }, { "epoch": 0.9477003629440114, "grad_norm": 0.3647257614559865, "learning_rate": 5.665926689592626e-06, "loss": 0.3509, "step": 3982 }, { "epoch": 0.9479383590170762, "grad_norm": 0.35718234674396576, "learning_rate": 5.664016218829911e-06, "loss": 0.4159, "step": 3983 }, { "epoch": 0.948176355090141, "grad_norm": 0.35559817666437965, "learning_rate": 5.6621056493779605e-06, "loss": 0.2914, "step": 3984 }, { "epoch": 0.9484143511632058, "grad_norm": 0.4155897468492624, "learning_rate": 5.660194981520729e-06, "loss": 0.3059, "step": 3985 }, { "epoch": 0.9486523472362706, "grad_norm": 0.39058764215986536, "learning_rate": 5.658284215542191e-06, "loss": 0.3828, "step": 3986 }, { "epoch": 0.9488903433093354, "grad_norm": 0.36296631175689215, "learning_rate": 5.656373351726334e-06, "loss": 0.3917, "step": 3987 }, { "epoch": 0.9491283393824002, "grad_norm": 0.3654376052680258, "learning_rate": 5.654462390357159e-06, "loss": 0.3176, "step": 3988 }, { "epoch": 0.949366335455465, "grad_norm": 0.37351319837559777, "learning_rate": 5.652551331718681e-06, "loss": 0.3476, "step": 3989 }, { "epoch": 0.9496043315285297, "grad_norm": 0.3654444319811079, "learning_rate": 5.6506401760949335e-06, "loss": 0.4022, "step": 3990 }, { "epoch": 0.9498423276015946, "grad_norm": 0.37676497486164945, "learning_rate": 5.6487289237699595e-06, "loss": 0.3443, "step": 3991 }, { "epoch": 0.9500803236746593, "grad_norm": 0.3816522699094411, "learning_rate": 5.646817575027819e-06, "loss": 0.2994, "step": 3992 }, { "epoch": 0.9503183197477242, "grad_norm": 0.36418061297866533, "learning_rate": 5.6449061301525845e-06, "loss": 0.3747, "step": 3993 }, { "epoch": 0.9505563158207889, "grad_norm": 0.3457838501298794, "learning_rate": 5.642994589428344e-06, "loss": 0.3956, "step": 3994 }, { "epoch": 0.9507943118938538, "grad_norm": 0.3814926599582243, "learning_rate": 5.641082953139201e-06, "loss": 0.3113, "step": 3995 }, { "epoch": 0.9510323079669185, "grad_norm": 0.38468757516147417, "learning_rate": 5.639171221569273e-06, "loss": 0.3233, "step": 3996 }, { "epoch": 0.9512703040399834, "grad_norm": 0.3676820335135461, "learning_rate": 5.637259395002688e-06, "loss": 0.409, "step": 3997 }, { "epoch": 0.9515083001130481, "grad_norm": 0.366933898548463, "learning_rate": 5.635347473723592e-06, "loss": 0.3456, "step": 3998 }, { "epoch": 0.951746296186113, "grad_norm": 0.3470289154843928, "learning_rate": 5.633435458016144e-06, "loss": 0.3005, "step": 3999 }, { "epoch": 0.9519842922591777, "grad_norm": 0.3685253402972838, "learning_rate": 5.631523348164517e-06, "loss": 0.3329, "step": 4000 }, { "epoch": 0.9522222883322425, "grad_norm": 0.37326221272675375, "learning_rate": 5.629611144452896e-06, "loss": 0.3898, "step": 4001 }, { "epoch": 0.9524602844053073, "grad_norm": 0.369692070324989, "learning_rate": 5.627698847165484e-06, "loss": 0.3069, "step": 4002 }, { "epoch": 0.9526982804783721, "grad_norm": 0.4315390716578853, "learning_rate": 5.625786456586493e-06, "loss": 0.33, "step": 4003 }, { "epoch": 0.9529362765514369, "grad_norm": 0.4310155102266023, "learning_rate": 5.623873973000156e-06, "loss": 0.4028, "step": 4004 }, { "epoch": 0.9531742726245017, "grad_norm": 0.3751833494613908, "learning_rate": 5.621961396690712e-06, "loss": 0.3683, "step": 4005 }, { "epoch": 0.9534122686975665, "grad_norm": 0.42057861372299066, "learning_rate": 5.620048727942416e-06, "loss": 0.2942, "step": 4006 }, { "epoch": 0.9536502647706313, "grad_norm": 0.3625924700652696, "learning_rate": 5.618135967039542e-06, "loss": 0.3401, "step": 4007 }, { "epoch": 0.953888260843696, "grad_norm": 0.3542760242464296, "learning_rate": 5.616223114266369e-06, "loss": 0.417, "step": 4008 }, { "epoch": 0.9541262569167609, "grad_norm": 0.40855459494512736, "learning_rate": 5.614310169907199e-06, "loss": 0.3232, "step": 4009 }, { "epoch": 0.9543642529898256, "grad_norm": 0.37105385201006647, "learning_rate": 5.61239713424634e-06, "loss": 0.2834, "step": 4010 }, { "epoch": 0.9546022490628905, "grad_norm": 0.37855213750129024, "learning_rate": 5.610484007568117e-06, "loss": 0.3644, "step": 4011 }, { "epoch": 0.9548402451359552, "grad_norm": 0.36744333500958176, "learning_rate": 5.608570790156867e-06, "loss": 0.3968, "step": 4012 }, { "epoch": 0.9550782412090201, "grad_norm": 0.40177284275871733, "learning_rate": 5.606657482296943e-06, "loss": 0.3354, "step": 4013 }, { "epoch": 0.9553162372820848, "grad_norm": 0.40131947230064974, "learning_rate": 5.6047440842727075e-06, "loss": 0.3262, "step": 4014 }, { "epoch": 0.9555542333551497, "grad_norm": 0.35010111030696944, "learning_rate": 5.602830596368543e-06, "loss": 0.3737, "step": 4015 }, { "epoch": 0.9557922294282144, "grad_norm": 0.3939903727222176, "learning_rate": 5.600917018868835e-06, "loss": 0.3344, "step": 4016 }, { "epoch": 0.9560302255012793, "grad_norm": 0.4081024631555434, "learning_rate": 5.599003352057994e-06, "loss": 0.3028, "step": 4017 }, { "epoch": 0.956268221574344, "grad_norm": 0.38240903753305877, "learning_rate": 5.597089596220437e-06, "loss": 0.3847, "step": 4018 }, { "epoch": 0.9565062176474088, "grad_norm": 0.39578792437353716, "learning_rate": 5.595175751640595e-06, "loss": 0.3914, "step": 4019 }, { "epoch": 0.9567442137204736, "grad_norm": 0.43574336288913884, "learning_rate": 5.593261818602912e-06, "loss": 0.33, "step": 4020 }, { "epoch": 0.9569822097935384, "grad_norm": 0.3944916284608389, "learning_rate": 5.5913477973918465e-06, "loss": 0.3162, "step": 4021 }, { "epoch": 0.9572202058666032, "grad_norm": 0.3667260519177921, "learning_rate": 5.589433688291867e-06, "loss": 0.3784, "step": 4022 }, { "epoch": 0.957458201939668, "grad_norm": 0.38390699290292674, "learning_rate": 5.5875194915874605e-06, "loss": 0.3512, "step": 4023 }, { "epoch": 0.9576961980127328, "grad_norm": 0.37193449103190873, "learning_rate": 5.585605207563124e-06, "loss": 0.2912, "step": 4024 }, { "epoch": 0.9579341940857976, "grad_norm": 0.3699623027804539, "learning_rate": 5.583690836503366e-06, "loss": 0.3425, "step": 4025 }, { "epoch": 0.9581721901588623, "grad_norm": 0.37064282931321874, "learning_rate": 5.58177637869271e-06, "loss": 0.3992, "step": 4026 }, { "epoch": 0.9584101862319272, "grad_norm": 0.37635147922447526, "learning_rate": 5.5798618344156916e-06, "loss": 0.3117, "step": 4027 }, { "epoch": 0.9586481823049919, "grad_norm": 0.3987637365857944, "learning_rate": 5.577947203956858e-06, "loss": 0.3143, "step": 4028 }, { "epoch": 0.9588861783780568, "grad_norm": 0.4035395300731761, "learning_rate": 5.576032487600773e-06, "loss": 0.378, "step": 4029 }, { "epoch": 0.9591241744511215, "grad_norm": 0.4101548946799405, "learning_rate": 5.5741176856320105e-06, "loss": 0.3796, "step": 4030 }, { "epoch": 0.9593621705241864, "grad_norm": 0.3845855259199741, "learning_rate": 5.5722027983351565e-06, "loss": 0.3068, "step": 4031 }, { "epoch": 0.9596001665972511, "grad_norm": 0.3609449815477264, "learning_rate": 5.570287825994812e-06, "loss": 0.3256, "step": 4032 }, { "epoch": 0.959838162670316, "grad_norm": 0.3452672734583461, "learning_rate": 5.568372768895588e-06, "loss": 0.4226, "step": 4033 }, { "epoch": 0.9600761587433807, "grad_norm": 0.3712187021978535, "learning_rate": 5.566457627322109e-06, "loss": 0.3293, "step": 4034 }, { "epoch": 0.9603141548164456, "grad_norm": 0.35303931097547897, "learning_rate": 5.5645424015590144e-06, "loss": 0.28, "step": 4035 }, { "epoch": 0.9605521508895103, "grad_norm": 0.3633441697287166, "learning_rate": 5.562627091890951e-06, "loss": 0.3538, "step": 4036 }, { "epoch": 0.9607901469625751, "grad_norm": 0.41159227713226276, "learning_rate": 5.560711698602584e-06, "loss": 0.383, "step": 4037 }, { "epoch": 0.9610281430356399, "grad_norm": 0.35928803089226324, "learning_rate": 5.558796221978587e-06, "loss": 0.2985, "step": 4038 }, { "epoch": 0.9612661391087047, "grad_norm": 0.44144609493808085, "learning_rate": 5.556880662303648e-06, "loss": 0.3703, "step": 4039 }, { "epoch": 0.9615041351817695, "grad_norm": 0.3803784240581862, "learning_rate": 5.554965019862466e-06, "loss": 0.3911, "step": 4040 }, { "epoch": 0.9617421312548343, "grad_norm": 0.36481000247944867, "learning_rate": 5.553049294939752e-06, "loss": 0.3311, "step": 4041 }, { "epoch": 0.9619801273278991, "grad_norm": 0.3593387405042659, "learning_rate": 5.551133487820231e-06, "loss": 0.2952, "step": 4042 }, { "epoch": 0.9622181234009639, "grad_norm": 0.3625692759696533, "learning_rate": 5.54921759878864e-06, "loss": 0.3734, "step": 4043 }, { "epoch": 0.9624561194740286, "grad_norm": 0.4202028000178488, "learning_rate": 5.547301628129726e-06, "loss": 0.3948, "step": 4044 }, { "epoch": 0.9626941155470935, "grad_norm": 0.3783279956511722, "learning_rate": 5.545385576128252e-06, "loss": 0.3263, "step": 4045 }, { "epoch": 0.9629321116201582, "grad_norm": 0.3926107017682296, "learning_rate": 5.54346944306899e-06, "loss": 0.3155, "step": 4046 }, { "epoch": 0.9631701076932231, "grad_norm": 0.3926267406486093, "learning_rate": 5.541553229236721e-06, "loss": 0.367, "step": 4047 }, { "epoch": 0.9634081037662878, "grad_norm": 0.38364235306865657, "learning_rate": 5.539636934916247e-06, "loss": 0.3193, "step": 4048 }, { "epoch": 0.9636460998393527, "grad_norm": 0.38507849318568027, "learning_rate": 5.537720560392373e-06, "loss": 0.3056, "step": 4049 }, { "epoch": 0.9638840959124174, "grad_norm": 0.37301941582141107, "learning_rate": 5.535804105949922e-06, "loss": 0.3405, "step": 4050 }, { "epoch": 0.9641220919854823, "grad_norm": 0.35296914488364023, "learning_rate": 5.533887571873725e-06, "loss": 0.4018, "step": 4051 }, { "epoch": 0.964360088058547, "grad_norm": 0.3588491551483761, "learning_rate": 5.531970958448628e-06, "loss": 0.3004, "step": 4052 }, { "epoch": 0.9645980841316119, "grad_norm": 0.39575643827764756, "learning_rate": 5.530054265959486e-06, "loss": 0.2866, "step": 4053 }, { "epoch": 0.9648360802046766, "grad_norm": 0.3466594386599281, "learning_rate": 5.528137494691167e-06, "loss": 0.3955, "step": 4054 }, { "epoch": 0.9650740762777414, "grad_norm": 0.373285637369822, "learning_rate": 5.52622064492855e-06, "loss": 0.3484, "step": 4055 }, { "epoch": 0.9653120723508062, "grad_norm": 0.42113681485363846, "learning_rate": 5.524303716956528e-06, "loss": 0.3046, "step": 4056 }, { "epoch": 0.965550068423871, "grad_norm": 0.39214970328364873, "learning_rate": 5.522386711060002e-06, "loss": 0.3518, "step": 4057 }, { "epoch": 0.9657880644969358, "grad_norm": 0.3636119541231596, "learning_rate": 5.520469627523889e-06, "loss": 0.3726, "step": 4058 }, { "epoch": 0.9660260605700006, "grad_norm": 0.38531522777192306, "learning_rate": 5.518552466633112e-06, "loss": 0.3341, "step": 4059 }, { "epoch": 0.9662640566430654, "grad_norm": 0.3689829393306755, "learning_rate": 5.516635228672612e-06, "loss": 0.3131, "step": 4060 }, { "epoch": 0.9665020527161302, "grad_norm": 0.37131422956593985, "learning_rate": 5.514717913927336e-06, "loss": 0.3755, "step": 4061 }, { "epoch": 0.966740048789195, "grad_norm": 0.36698419750198635, "learning_rate": 5.5128005226822435e-06, "loss": 0.4112, "step": 4062 }, { "epoch": 0.9669780448622598, "grad_norm": 0.37767384948653765, "learning_rate": 5.510883055222307e-06, "loss": 0.3098, "step": 4063 }, { "epoch": 0.9672160409353245, "grad_norm": 0.3831387096070846, "learning_rate": 5.508965511832509e-06, "loss": 0.3377, "step": 4064 }, { "epoch": 0.9674540370083894, "grad_norm": 0.3413991536604678, "learning_rate": 5.507047892797846e-06, "loss": 0.3926, "step": 4065 }, { "epoch": 0.9676920330814541, "grad_norm": 0.4405648042369594, "learning_rate": 5.505130198403324e-06, "loss": 0.3568, "step": 4066 }, { "epoch": 0.967930029154519, "grad_norm": 0.36599437275948865, "learning_rate": 5.503212428933956e-06, "loss": 0.3054, "step": 4067 }, { "epoch": 0.9681680252275837, "grad_norm": 0.4337574365319751, "learning_rate": 5.501294584674771e-06, "loss": 0.3363, "step": 4068 }, { "epoch": 0.9684060213006486, "grad_norm": 0.3602978619539388, "learning_rate": 5.499376665910812e-06, "loss": 0.4053, "step": 4069 }, { "epoch": 0.9686440173737133, "grad_norm": 0.3642524483127621, "learning_rate": 5.497458672927124e-06, "loss": 0.3262, "step": 4070 }, { "epoch": 0.9688820134467782, "grad_norm": 0.3990749674732015, "learning_rate": 5.495540606008772e-06, "loss": 0.3322, "step": 4071 }, { "epoch": 0.9691200095198429, "grad_norm": 0.3452572444028366, "learning_rate": 5.493622465440828e-06, "loss": 0.401, "step": 4072 }, { "epoch": 0.9693580055929077, "grad_norm": 0.3947407561093372, "learning_rate": 5.491704251508373e-06, "loss": 0.3727, "step": 4073 }, { "epoch": 0.9695960016659725, "grad_norm": 0.35872991795969333, "learning_rate": 5.489785964496503e-06, "loss": 0.2752, "step": 4074 }, { "epoch": 0.9698339977390373, "grad_norm": 0.3810135107843619, "learning_rate": 5.48786760469032e-06, "loss": 0.3631, "step": 4075 }, { "epoch": 0.9700719938121021, "grad_norm": 0.36736070326138315, "learning_rate": 5.485949172374944e-06, "loss": 0.4297, "step": 4076 }, { "epoch": 0.9703099898851669, "grad_norm": 0.38917535599652914, "learning_rate": 5.484030667835496e-06, "loss": 0.3338, "step": 4077 }, { "epoch": 0.9705479859582317, "grad_norm": 0.4162578135435146, "learning_rate": 5.482112091357119e-06, "loss": 0.3201, "step": 4078 }, { "epoch": 0.9707859820312965, "grad_norm": 0.4446346807732351, "learning_rate": 5.480193443224957e-06, "loss": 0.3781, "step": 4079 }, { "epoch": 0.9710239781043613, "grad_norm": 0.363773543353832, "learning_rate": 5.478274723724172e-06, "loss": 0.363, "step": 4080 }, { "epoch": 0.9712619741774261, "grad_norm": 0.44939319983262227, "learning_rate": 5.47635593313993e-06, "loss": 0.3207, "step": 4081 }, { "epoch": 0.9714999702504908, "grad_norm": 0.38983047666671894, "learning_rate": 5.47443707175741e-06, "loss": 0.3369, "step": 4082 }, { "epoch": 0.9717379663235557, "grad_norm": 0.3609085790607123, "learning_rate": 5.472518139861806e-06, "loss": 0.4184, "step": 4083 }, { "epoch": 0.9719759623966204, "grad_norm": 0.4326297185681936, "learning_rate": 5.470599137738315e-06, "loss": 0.3157, "step": 4084 }, { "epoch": 0.9722139584696853, "grad_norm": 0.3863516117664841, "learning_rate": 5.468680065672152e-06, "loss": 0.3215, "step": 4085 }, { "epoch": 0.97245195454275, "grad_norm": 0.37735302540928745, "learning_rate": 5.466760923948536e-06, "loss": 0.4003, "step": 4086 }, { "epoch": 0.9726899506158149, "grad_norm": 0.3777977585862572, "learning_rate": 5.464841712852701e-06, "loss": 0.4035, "step": 4087 }, { "epoch": 0.9729279466888796, "grad_norm": 0.3884798640154453, "learning_rate": 5.462922432669886e-06, "loss": 0.2875, "step": 4088 }, { "epoch": 0.9731659427619445, "grad_norm": 0.3968066470866644, "learning_rate": 5.461003083685346e-06, "loss": 0.3509, "step": 4089 }, { "epoch": 0.9734039388350092, "grad_norm": 0.37546658100029856, "learning_rate": 5.459083666184344e-06, "loss": 0.4117, "step": 4090 }, { "epoch": 0.973641934908074, "grad_norm": 0.4033600486378232, "learning_rate": 5.4571641804521505e-06, "loss": 0.3023, "step": 4091 }, { "epoch": 0.9738799309811388, "grad_norm": 0.366298156488569, "learning_rate": 5.4552446267740515e-06, "loss": 0.2914, "step": 4092 }, { "epoch": 0.9741179270542036, "grad_norm": 0.3820600291897777, "learning_rate": 5.453325005435337e-06, "loss": 0.3516, "step": 4093 }, { "epoch": 0.9743559231272684, "grad_norm": 0.3654555161312593, "learning_rate": 5.451405316721313e-06, "loss": 0.3877, "step": 4094 }, { "epoch": 0.9745939192003332, "grad_norm": 0.40260654542095964, "learning_rate": 5.449485560917291e-06, "loss": 0.2995, "step": 4095 }, { "epoch": 0.974831915273398, "grad_norm": 0.3968641036674078, "learning_rate": 5.4475657383085955e-06, "loss": 0.306, "step": 4096 }, { "epoch": 0.9750699113464628, "grad_norm": 0.3903922997930381, "learning_rate": 5.44564584918056e-06, "loss": 0.3827, "step": 4097 }, { "epoch": 0.9753079074195276, "grad_norm": 0.439867110372997, "learning_rate": 5.443725893818524e-06, "loss": 0.3308, "step": 4098 }, { "epoch": 0.9755459034925924, "grad_norm": 0.4024543253971147, "learning_rate": 5.441805872507846e-06, "loss": 0.2899, "step": 4099 }, { "epoch": 0.9757838995656571, "grad_norm": 0.3732267890164506, "learning_rate": 5.439885785533884e-06, "loss": 0.3557, "step": 4100 }, { "epoch": 0.976021895638722, "grad_norm": 0.38784018489625477, "learning_rate": 5.437965633182012e-06, "loss": 0.3825, "step": 4101 }, { "epoch": 0.9762598917117867, "grad_norm": 0.35874607034227307, "learning_rate": 5.436045415737613e-06, "loss": 0.2903, "step": 4102 }, { "epoch": 0.9764978877848516, "grad_norm": 0.35448408385767743, "learning_rate": 5.434125133486078e-06, "loss": 0.3117, "step": 4103 }, { "epoch": 0.9767358838579163, "grad_norm": 0.36167261584165866, "learning_rate": 5.432204786712807e-06, "loss": 0.3822, "step": 4104 }, { "epoch": 0.9769738799309812, "grad_norm": 0.36236014204816985, "learning_rate": 5.430284375703213e-06, "loss": 0.3937, "step": 4105 }, { "epoch": 0.9772118760040459, "grad_norm": 0.3615011221235271, "learning_rate": 5.428363900742717e-06, "loss": 0.2882, "step": 4106 }, { "epoch": 0.9774498720771108, "grad_norm": 0.36358536744553127, "learning_rate": 5.426443362116746e-06, "loss": 0.3445, "step": 4107 }, { "epoch": 0.9776878681501755, "grad_norm": 0.3499471450758627, "learning_rate": 5.424522760110744e-06, "loss": 0.3819, "step": 4108 }, { "epoch": 0.9779258642232403, "grad_norm": 0.3783758065626236, "learning_rate": 5.422602095010157e-06, "loss": 0.3003, "step": 4109 }, { "epoch": 0.9781638602963051, "grad_norm": 0.3485194689281019, "learning_rate": 5.420681367100443e-06, "loss": 0.3274, "step": 4110 }, { "epoch": 0.9784018563693699, "grad_norm": 0.3560157044900479, "learning_rate": 5.418760576667071e-06, "loss": 0.3514, "step": 4111 }, { "epoch": 0.9786398524424347, "grad_norm": 0.36557640443599765, "learning_rate": 5.416839723995518e-06, "loss": 0.3701, "step": 4112 }, { "epoch": 0.9788778485154995, "grad_norm": 0.4127125165049302, "learning_rate": 5.41491880937127e-06, "loss": 0.3, "step": 4113 }, { "epoch": 0.9791158445885643, "grad_norm": 0.3946777818047238, "learning_rate": 5.4129978330798224e-06, "loss": 0.3319, "step": 4114 }, { "epoch": 0.9793538406616291, "grad_norm": 0.35875313580725443, "learning_rate": 5.41107679540668e-06, "loss": 0.3716, "step": 4115 }, { "epoch": 0.9795918367346939, "grad_norm": 0.3583640218186905, "learning_rate": 5.409155696637357e-06, "loss": 0.3203, "step": 4116 }, { "epoch": 0.9798298328077587, "grad_norm": 0.43393683642612224, "learning_rate": 5.4072345370573745e-06, "loss": 0.3299, "step": 4117 }, { "epoch": 0.9800678288808234, "grad_norm": 0.3689652477197836, "learning_rate": 5.405313316952265e-06, "loss": 0.3645, "step": 4118 }, { "epoch": 0.9803058249538883, "grad_norm": 0.38248656505142614, "learning_rate": 5.403392036607568e-06, "loss": 0.4105, "step": 4119 }, { "epoch": 0.980543821026953, "grad_norm": 0.34515772392085886, "learning_rate": 5.401470696308838e-06, "loss": 0.3192, "step": 4120 }, { "epoch": 0.9807818171000179, "grad_norm": 0.349261482848996, "learning_rate": 5.399549296341629e-06, "loss": 0.3257, "step": 4121 }, { "epoch": 0.9810198131730826, "grad_norm": 0.3802296455540952, "learning_rate": 5.39762783699151e-06, "loss": 0.3968, "step": 4122 }, { "epoch": 0.9812578092461475, "grad_norm": 0.3615234129840896, "learning_rate": 5.395706318544059e-06, "loss": 0.3298, "step": 4123 }, { "epoch": 0.9814958053192122, "grad_norm": 0.4070437397173655, "learning_rate": 5.393784741284858e-06, "loss": 0.3196, "step": 4124 }, { "epoch": 0.9817338013922771, "grad_norm": 0.3920234501678305, "learning_rate": 5.391863105499505e-06, "loss": 0.3159, "step": 4125 }, { "epoch": 0.9819717974653418, "grad_norm": 0.3990009179740692, "learning_rate": 5.389941411473598e-06, "loss": 0.4174, "step": 4126 }, { "epoch": 0.9822097935384067, "grad_norm": 0.41580376912089045, "learning_rate": 5.3880196594927514e-06, "loss": 0.2869, "step": 4127 }, { "epoch": 0.9824477896114714, "grad_norm": 0.41224321601260766, "learning_rate": 5.3860978498425845e-06, "loss": 0.2956, "step": 4128 }, { "epoch": 0.9826857856845362, "grad_norm": 0.37553862338123545, "learning_rate": 5.384175982808726e-06, "loss": 0.4063, "step": 4129 }, { "epoch": 0.982923781757601, "grad_norm": 0.3883006441711229, "learning_rate": 5.382254058676812e-06, "loss": 0.3706, "step": 4130 }, { "epoch": 0.9831617778306658, "grad_norm": 0.35991795831667905, "learning_rate": 5.380332077732489e-06, "loss": 0.2823, "step": 4131 }, { "epoch": 0.9833997739037306, "grad_norm": 0.34116602859971285, "learning_rate": 5.378410040261408e-06, "loss": 0.3513, "step": 4132 }, { "epoch": 0.9836377699767954, "grad_norm": 0.4029763010820559, "learning_rate": 5.376487946549235e-06, "loss": 0.3987, "step": 4133 }, { "epoch": 0.9838757660498602, "grad_norm": 0.3868100898163792, "learning_rate": 5.374565796881639e-06, "loss": 0.2773, "step": 4134 }, { "epoch": 0.984113762122925, "grad_norm": 0.3428976994977603, "learning_rate": 5.3726435915442986e-06, "loss": 0.2696, "step": 4135 }, { "epoch": 0.9843517581959897, "grad_norm": 0.37266226267641545, "learning_rate": 5.370721330822904e-06, "loss": 0.3714, "step": 4136 }, { "epoch": 0.9845897542690546, "grad_norm": 0.34598524004373593, "learning_rate": 5.368799015003146e-06, "loss": 0.384, "step": 4137 }, { "epoch": 0.9848277503421193, "grad_norm": 0.4192229780499278, "learning_rate": 5.366876644370733e-06, "loss": 0.273, "step": 4138 }, { "epoch": 0.9850657464151842, "grad_norm": 0.3486972025471823, "learning_rate": 5.364954219211372e-06, "loss": 0.337, "step": 4139 }, { "epoch": 0.9853037424882489, "grad_norm": 0.36821440890479656, "learning_rate": 5.363031739810787e-06, "loss": 0.4031, "step": 4140 }, { "epoch": 0.9855417385613138, "grad_norm": 0.39566913279734434, "learning_rate": 5.361109206454704e-06, "loss": 0.3342, "step": 4141 }, { "epoch": 0.9857797346343785, "grad_norm": 0.4004101088241766, "learning_rate": 5.359186619428861e-06, "loss": 0.2989, "step": 4142 }, { "epoch": 0.9860177307074434, "grad_norm": 0.41706306261687004, "learning_rate": 5.357263979018999e-06, "loss": 0.3715, "step": 4143 }, { "epoch": 0.9862557267805081, "grad_norm": 0.38390634367368964, "learning_rate": 5.355341285510872e-06, "loss": 0.4016, "step": 4144 }, { "epoch": 0.986493722853573, "grad_norm": 0.4119688642491026, "learning_rate": 5.35341853919024e-06, "loss": 0.3256, "step": 4145 }, { "epoch": 0.9867317189266377, "grad_norm": 0.3966809599743582, "learning_rate": 5.351495740342868e-06, "loss": 0.3151, "step": 4146 }, { "epoch": 0.9869697149997025, "grad_norm": 0.37458039305346175, "learning_rate": 5.349572889254535e-06, "loss": 0.3874, "step": 4147 }, { "epoch": 0.9872077110727673, "grad_norm": 0.35624311527568975, "learning_rate": 5.347649986211022e-06, "loss": 0.3685, "step": 4148 }, { "epoch": 0.9874457071458321, "grad_norm": 0.41927466293047233, "learning_rate": 5.34572703149812e-06, "loss": 0.3162, "step": 4149 }, { "epoch": 0.9876837032188969, "grad_norm": 0.3880202059036109, "learning_rate": 5.343804025401628e-06, "loss": 0.3319, "step": 4150 }, { "epoch": 0.9879216992919617, "grad_norm": 0.36850162612727255, "learning_rate": 5.3418809682073546e-06, "loss": 0.4004, "step": 4151 }, { "epoch": 0.9881596953650265, "grad_norm": 0.3884277693954743, "learning_rate": 5.339957860201111e-06, "loss": 0.2997, "step": 4152 }, { "epoch": 0.9883976914380913, "grad_norm": 0.3670588779035891, "learning_rate": 5.338034701668717e-06, "loss": 0.3034, "step": 4153 }, { "epoch": 0.988635687511156, "grad_norm": 0.35848269208319117, "learning_rate": 5.336111492896005e-06, "loss": 0.3707, "step": 4154 }, { "epoch": 0.9888736835842209, "grad_norm": 0.39501627370624287, "learning_rate": 5.334188234168811e-06, "loss": 0.3591, "step": 4155 }, { "epoch": 0.9891116796572856, "grad_norm": 0.3924138317274099, "learning_rate": 5.332264925772979e-06, "loss": 0.3118, "step": 4156 }, { "epoch": 0.9893496757303505, "grad_norm": 0.40183975512658543, "learning_rate": 5.330341567994357e-06, "loss": 0.3298, "step": 4157 }, { "epoch": 0.9895876718034152, "grad_norm": 0.3911218716929348, "learning_rate": 5.328418161118807e-06, "loss": 0.3957, "step": 4158 }, { "epoch": 0.9898256678764801, "grad_norm": 0.39644737968164145, "learning_rate": 5.326494705432194e-06, "loss": 0.3453, "step": 4159 }, { "epoch": 0.9900636639495448, "grad_norm": 0.37820532138547525, "learning_rate": 5.324571201220388e-06, "loss": 0.303, "step": 4160 }, { "epoch": 0.9903016600226097, "grad_norm": 0.3680837268102721, "learning_rate": 5.322647648769275e-06, "loss": 0.3685, "step": 4161 }, { "epoch": 0.9905396560956744, "grad_norm": 0.398138838445303, "learning_rate": 5.320724048364736e-06, "loss": 0.3738, "step": 4162 }, { "epoch": 0.9907776521687393, "grad_norm": 0.3627540114538959, "learning_rate": 5.3188004002926715e-06, "loss": 0.3166, "step": 4163 }, { "epoch": 0.991015648241804, "grad_norm": 0.3694127802681357, "learning_rate": 5.316876704838981e-06, "loss": 0.3229, "step": 4164 }, { "epoch": 0.9912536443148688, "grad_norm": 0.37299601489965684, "learning_rate": 5.314952962289574e-06, "loss": 0.3883, "step": 4165 }, { "epoch": 0.9914916403879336, "grad_norm": 0.3572974513846591, "learning_rate": 5.3130291729303625e-06, "loss": 0.3368, "step": 4166 }, { "epoch": 0.9917296364609984, "grad_norm": 0.3730121718719649, "learning_rate": 5.311105337047273e-06, "loss": 0.2987, "step": 4167 }, { "epoch": 0.9919676325340632, "grad_norm": 0.34901771416498145, "learning_rate": 5.3091814549262345e-06, "loss": 0.3522, "step": 4168 }, { "epoch": 0.992205628607128, "grad_norm": 0.3810404289861147, "learning_rate": 5.3072575268531835e-06, "loss": 0.404, "step": 4169 }, { "epoch": 0.9924436246801928, "grad_norm": 0.35455196776033093, "learning_rate": 5.305333553114061e-06, "loss": 0.2767, "step": 4170 }, { "epoch": 0.9926816207532576, "grad_norm": 0.3861290164430468, "learning_rate": 5.303409533994821e-06, "loss": 0.319, "step": 4171 }, { "epoch": 0.9929196168263223, "grad_norm": 0.3494911589336109, "learning_rate": 5.301485469781418e-06, "loss": 0.3845, "step": 4172 }, { "epoch": 0.9931576128993872, "grad_norm": 0.3522517568562557, "learning_rate": 5.299561360759815e-06, "loss": 0.3433, "step": 4173 }, { "epoch": 0.9933956089724519, "grad_norm": 0.39675837098549627, "learning_rate": 5.297637207215982e-06, "loss": 0.3027, "step": 4174 }, { "epoch": 0.9936336050455168, "grad_norm": 0.37785284486500015, "learning_rate": 5.295713009435898e-06, "loss": 0.3505, "step": 4175 }, { "epoch": 0.9938716011185815, "grad_norm": 0.3903018829163007, "learning_rate": 5.293788767705544e-06, "loss": 0.4349, "step": 4176 }, { "epoch": 0.9941095971916464, "grad_norm": 0.36388379671761967, "learning_rate": 5.291864482310913e-06, "loss": 0.3286, "step": 4177 }, { "epoch": 0.9943475932647111, "grad_norm": 0.359114110954639, "learning_rate": 5.289940153537999e-06, "loss": 0.3227, "step": 4178 }, { "epoch": 0.994585589337776, "grad_norm": 0.3408663433796706, "learning_rate": 5.2880157816728055e-06, "loss": 0.4037, "step": 4179 }, { "epoch": 0.9948235854108407, "grad_norm": 0.3827699859436007, "learning_rate": 5.2860913670013415e-06, "loss": 0.363, "step": 4180 }, { "epoch": 0.9950615814839056, "grad_norm": 0.34748568520852696, "learning_rate": 5.2841669098096215e-06, "loss": 0.2973, "step": 4181 }, { "epoch": 0.9952995775569703, "grad_norm": 0.3573147481932218, "learning_rate": 5.2822424103836715e-06, "loss": 0.3185, "step": 4182 }, { "epoch": 0.9955375736300351, "grad_norm": 0.35989079023901194, "learning_rate": 5.280317869009514e-06, "loss": 0.3781, "step": 4183 }, { "epoch": 0.9957755697030999, "grad_norm": 0.43015819010371176, "learning_rate": 5.278393285973189e-06, "loss": 0.3283, "step": 4184 }, { "epoch": 0.9960135657761647, "grad_norm": 0.36962744882859006, "learning_rate": 5.276468661560733e-06, "loss": 0.2961, "step": 4185 }, { "epoch": 0.9962515618492295, "grad_norm": 0.351978630130564, "learning_rate": 5.274543996058195e-06, "loss": 0.3664, "step": 4186 }, { "epoch": 0.9964895579222943, "grad_norm": 0.3936832601718924, "learning_rate": 5.272619289751627e-06, "loss": 0.4083, "step": 4187 }, { "epoch": 0.9967275539953591, "grad_norm": 0.4103960278310374, "learning_rate": 5.270694542927089e-06, "loss": 0.3127, "step": 4188 }, { "epoch": 0.9969655500684239, "grad_norm": 0.3889947229714806, "learning_rate": 5.268769755870643e-06, "loss": 0.3141, "step": 4189 }, { "epoch": 0.9972035461414887, "grad_norm": 0.39726469757369814, "learning_rate": 5.266844928868364e-06, "loss": 0.3777, "step": 4190 }, { "epoch": 0.9974415422145535, "grad_norm": 0.3797443444152874, "learning_rate": 5.264920062206328e-06, "loss": 0.3664, "step": 4191 }, { "epoch": 0.9976795382876182, "grad_norm": 0.3731570350476907, "learning_rate": 5.262995156170616e-06, "loss": 0.3014, "step": 4192 }, { "epoch": 0.9979175343606831, "grad_norm": 0.3635536818396404, "learning_rate": 5.261070211047318e-06, "loss": 0.335, "step": 4193 }, { "epoch": 0.9981555304337478, "grad_norm": 0.37957844831809195, "learning_rate": 5.2591452271225276e-06, "loss": 0.385, "step": 4194 }, { "epoch": 0.9983935265068127, "grad_norm": 0.4412946371353015, "learning_rate": 5.2572202046823455e-06, "loss": 0.333, "step": 4195 }, { "epoch": 0.9986315225798774, "grad_norm": 0.3949114004805517, "learning_rate": 5.255295144012877e-06, "loss": 0.327, "step": 4196 }, { "epoch": 0.9988695186529423, "grad_norm": 0.37599046677243475, "learning_rate": 5.253370045400236e-06, "loss": 0.4231, "step": 4197 }, { "epoch": 0.999107514726007, "grad_norm": 0.36211875864414783, "learning_rate": 5.2514449091305375e-06, "loss": 0.3453, "step": 4198 }, { "epoch": 0.9993455107990719, "grad_norm": 0.4080030072391759, "learning_rate": 5.249519735489904e-06, "loss": 0.2976, "step": 4199 }, { "epoch": 0.9995835068721366, "grad_norm": 0.4151890843838898, "learning_rate": 5.247594524764466e-06, "loss": 0.3492, "step": 4200 }, { "epoch": 0.9998215029452014, "grad_norm": 0.3667853249254919, "learning_rate": 5.2456692772403565e-06, "loss": 0.4018, "step": 4201 }, { "epoch": 1.0002379960730647, "grad_norm": 0.34868915612115237, "learning_rate": 5.243743993203715e-06, "loss": 0.461, "step": 4202 }, { "epoch": 1.0004759921461295, "grad_norm": 0.3543503974639351, "learning_rate": 5.241818672940684e-06, "loss": 0.3138, "step": 4203 }, { "epoch": 1.0007139882191944, "grad_norm": 0.35172615371783905, "learning_rate": 5.239893316737419e-06, "loss": 0.3275, "step": 4204 }, { "epoch": 1.0009519842922592, "grad_norm": 0.37691116884826764, "learning_rate": 5.23796792488007e-06, "loss": 0.3566, "step": 4205 }, { "epoch": 1.001189980365324, "grad_norm": 0.3409709908376464, "learning_rate": 5.236042497654802e-06, "loss": 0.3944, "step": 4206 }, { "epoch": 1.0014279764383887, "grad_norm": 0.36845210293609737, "learning_rate": 5.2341170353477795e-06, "loss": 0.2646, "step": 4207 }, { "epoch": 1.0016659725114536, "grad_norm": 0.359375740170696, "learning_rate": 5.232191538245173e-06, "loss": 0.319, "step": 4208 }, { "epoch": 1.0019039685845184, "grad_norm": 0.3920628761903601, "learning_rate": 5.23026600663316e-06, "loss": 0.3928, "step": 4209 }, { "epoch": 1.002141964657583, "grad_norm": 0.41390620119344756, "learning_rate": 5.2283404407979225e-06, "loss": 0.3013, "step": 4210 }, { "epoch": 1.0023799607306478, "grad_norm": 0.4220811528276169, "learning_rate": 5.226414841025645e-06, "loss": 0.2726, "step": 4211 }, { "epoch": 1.0026179568037128, "grad_norm": 0.41557061395608813, "learning_rate": 5.2244892076025225e-06, "loss": 0.365, "step": 4212 }, { "epoch": 1.0028559528767775, "grad_norm": 0.40671365818027116, "learning_rate": 5.22256354081475e-06, "loss": 0.3638, "step": 4213 }, { "epoch": 1.0030939489498423, "grad_norm": 0.38827259080751864, "learning_rate": 5.220637840948528e-06, "loss": 0.3195, "step": 4214 }, { "epoch": 1.003331945022907, "grad_norm": 0.46007325776629265, "learning_rate": 5.218712108290065e-06, "loss": 0.3423, "step": 4215 }, { "epoch": 1.003569941095972, "grad_norm": 0.3927299977513199, "learning_rate": 5.216786343125572e-06, "loss": 0.3815, "step": 4216 }, { "epoch": 1.0038079371690367, "grad_norm": 0.3777948807064563, "learning_rate": 5.214860545741266e-06, "loss": 0.3139, "step": 4217 }, { "epoch": 1.0040459332421015, "grad_norm": 0.4096489630411397, "learning_rate": 5.212934716423368e-06, "loss": 0.2923, "step": 4218 }, { "epoch": 1.0042839293151662, "grad_norm": 0.4428443016282393, "learning_rate": 5.211008855458103e-06, "loss": 0.3226, "step": 4219 }, { "epoch": 1.0045219253882312, "grad_norm": 0.3548101722105468, "learning_rate": 5.209082963131702e-06, "loss": 0.3717, "step": 4220 }, { "epoch": 1.004759921461296, "grad_norm": 0.3661098943341066, "learning_rate": 5.2071570397303995e-06, "loss": 0.3291, "step": 4221 }, { "epoch": 1.0049979175343606, "grad_norm": 0.39846837704199717, "learning_rate": 5.2052310855404356e-06, "loss": 0.2979, "step": 4222 }, { "epoch": 1.0052359136074254, "grad_norm": 0.44041097154948994, "learning_rate": 5.203305100848056e-06, "loss": 0.3587, "step": 4223 }, { "epoch": 1.0054739096804903, "grad_norm": 0.3696154903241395, "learning_rate": 5.20137908593951e-06, "loss": 0.357, "step": 4224 }, { "epoch": 1.005711905753555, "grad_norm": 0.3589543414638921, "learning_rate": 5.1994530411010495e-06, "loss": 0.3017, "step": 4225 }, { "epoch": 1.0059499018266198, "grad_norm": 0.4534518628656481, "learning_rate": 5.1975269666189325e-06, "loss": 0.3259, "step": 4226 }, { "epoch": 1.0061878978996845, "grad_norm": 0.37753721010737906, "learning_rate": 5.195600862779421e-06, "loss": 0.3984, "step": 4227 }, { "epoch": 1.0064258939727495, "grad_norm": 0.35907800320760047, "learning_rate": 5.193674729868781e-06, "loss": 0.3162, "step": 4228 }, { "epoch": 1.0066638900458142, "grad_norm": 0.38681209195212035, "learning_rate": 5.191748568173288e-06, "loss": 0.3052, "step": 4229 }, { "epoch": 1.006901886118879, "grad_norm": 0.38975003350282217, "learning_rate": 5.1898223779792125e-06, "loss": 0.369, "step": 4230 }, { "epoch": 1.0071398821919437, "grad_norm": 0.37451302101771794, "learning_rate": 5.187896159572836e-06, "loss": 0.3867, "step": 4231 }, { "epoch": 1.0073778782650087, "grad_norm": 0.3746202575214494, "learning_rate": 5.185969913240442e-06, "loss": 0.3123, "step": 4232 }, { "epoch": 1.0076158743380734, "grad_norm": 0.3399200166882226, "learning_rate": 5.184043639268318e-06, "loss": 0.3423, "step": 4233 }, { "epoch": 1.0078538704111382, "grad_norm": 0.37484361667395105, "learning_rate": 5.1821173379427566e-06, "loss": 0.3874, "step": 4234 }, { "epoch": 1.008091866484203, "grad_norm": 0.38264504872383276, "learning_rate": 5.180191009550053e-06, "loss": 0.3665, "step": 4235 }, { "epoch": 1.0083298625572679, "grad_norm": 0.3905871905210143, "learning_rate": 5.178264654376507e-06, "loss": 0.3112, "step": 4236 }, { "epoch": 1.0085678586303326, "grad_norm": 0.39456565680861116, "learning_rate": 5.176338272708424e-06, "loss": 0.3265, "step": 4237 }, { "epoch": 1.0088058547033973, "grad_norm": 0.36221965002648904, "learning_rate": 5.174411864832111e-06, "loss": 0.3884, "step": 4238 }, { "epoch": 1.009043850776462, "grad_norm": 0.40646971651183245, "learning_rate": 5.172485431033882e-06, "loss": 0.2814, "step": 4239 }, { "epoch": 1.009281846849527, "grad_norm": 0.40606667706481386, "learning_rate": 5.17055897160005e-06, "loss": 0.295, "step": 4240 }, { "epoch": 1.0095198429225918, "grad_norm": 0.4187704484376356, "learning_rate": 5.168632486816934e-06, "loss": 0.3786, "step": 4241 }, { "epoch": 1.0097578389956565, "grad_norm": 0.38137336025761603, "learning_rate": 5.1667059769708595e-06, "loss": 0.3093, "step": 4242 }, { "epoch": 1.0099958350687213, "grad_norm": 0.4262476491330625, "learning_rate": 5.1647794423481516e-06, "loss": 0.2841, "step": 4243 }, { "epoch": 1.0102338311417862, "grad_norm": 0.42194011631004263, "learning_rate": 5.1628528832351436e-06, "loss": 0.3438, "step": 4244 }, { "epoch": 1.010471827214851, "grad_norm": 0.37173229226099785, "learning_rate": 5.160926299918167e-06, "loss": 0.3863, "step": 4245 }, { "epoch": 1.0107098232879157, "grad_norm": 0.3611360955387892, "learning_rate": 5.158999692683563e-06, "loss": 0.2911, "step": 4246 }, { "epoch": 1.0109478193609804, "grad_norm": 0.37976056901327965, "learning_rate": 5.15707306181767e-06, "loss": 0.3135, "step": 4247 }, { "epoch": 1.0111858154340454, "grad_norm": 0.40048201279174483, "learning_rate": 5.155146407606835e-06, "loss": 0.3757, "step": 4248 }, { "epoch": 1.0114238115071101, "grad_norm": 0.36075054638382786, "learning_rate": 5.153219730337406e-06, "loss": 0.3147, "step": 4249 }, { "epoch": 1.0116618075801749, "grad_norm": 0.3679232730274379, "learning_rate": 5.151293030295732e-06, "loss": 0.2865, "step": 4250 }, { "epoch": 1.0118998036532396, "grad_norm": 0.3581758351944555, "learning_rate": 5.149366307768173e-06, "loss": 0.3218, "step": 4251 }, { "epoch": 1.0121377997263046, "grad_norm": 0.3823224717214005, "learning_rate": 5.147439563041086e-06, "loss": 0.3987, "step": 4252 }, { "epoch": 1.0123757957993693, "grad_norm": 0.33371251114397465, "learning_rate": 5.145512796400831e-06, "loss": 0.2999, "step": 4253 }, { "epoch": 1.012613791872434, "grad_norm": 0.3823340185719183, "learning_rate": 5.143586008133776e-06, "loss": 0.2941, "step": 4254 }, { "epoch": 1.0128517879454988, "grad_norm": 0.382504369490985, "learning_rate": 5.141659198526287e-06, "loss": 0.3752, "step": 4255 }, { "epoch": 1.0130897840185638, "grad_norm": 0.3814481433528493, "learning_rate": 5.139732367864736e-06, "loss": 0.3442, "step": 4256 }, { "epoch": 1.0133277800916285, "grad_norm": 0.3964801873649591, "learning_rate": 5.137805516435499e-06, "loss": 0.2744, "step": 4257 }, { "epoch": 1.0135657761646932, "grad_norm": 0.35135187470588386, "learning_rate": 5.135878644524953e-06, "loss": 0.3428, "step": 4258 }, { "epoch": 1.013803772237758, "grad_norm": 0.43647825401352464, "learning_rate": 5.13395175241948e-06, "loss": 0.4011, "step": 4259 }, { "epoch": 1.014041768310823, "grad_norm": 0.48740933719008334, "learning_rate": 5.132024840405462e-06, "loss": 0.3116, "step": 4260 }, { "epoch": 1.0142797643838877, "grad_norm": 0.3754855598282161, "learning_rate": 5.130097908769287e-06, "loss": 0.2721, "step": 4261 }, { "epoch": 1.0145177604569524, "grad_norm": 0.37968335966178834, "learning_rate": 5.128170957797345e-06, "loss": 0.3164, "step": 4262 }, { "epoch": 1.0147557565300172, "grad_norm": 0.38142166044175096, "learning_rate": 5.126243987776026e-06, "loss": 0.3873, "step": 4263 }, { "epoch": 1.0149937526030821, "grad_norm": 0.374445975748934, "learning_rate": 5.124316998991728e-06, "loss": 0.2786, "step": 4264 }, { "epoch": 1.0152317486761469, "grad_norm": 0.42403267311698933, "learning_rate": 5.122389991730848e-06, "loss": 0.3162, "step": 4265 }, { "epoch": 1.0154697447492116, "grad_norm": 0.3964607797578093, "learning_rate": 5.120462966279789e-06, "loss": 0.3996, "step": 4266 }, { "epoch": 1.0157077408222763, "grad_norm": 0.363101926710214, "learning_rate": 5.118535922924952e-06, "loss": 0.3341, "step": 4267 }, { "epoch": 1.0159457368953413, "grad_norm": 0.3751042986845883, "learning_rate": 5.1166088619527445e-06, "loss": 0.2738, "step": 4268 }, { "epoch": 1.016183732968406, "grad_norm": 0.36892262345896815, "learning_rate": 5.114681783649575e-06, "loss": 0.3194, "step": 4269 }, { "epoch": 1.0164217290414708, "grad_norm": 0.34748547029304633, "learning_rate": 5.112754688301855e-06, "loss": 0.3562, "step": 4270 }, { "epoch": 1.0166597251145355, "grad_norm": 0.38763196396366484, "learning_rate": 5.110827576196e-06, "loss": 0.307, "step": 4271 }, { "epoch": 1.0168977211876005, "grad_norm": 0.37714618148409906, "learning_rate": 5.1089004476184255e-06, "loss": 0.3069, "step": 4272 }, { "epoch": 1.0171357172606652, "grad_norm": 0.3771535074762055, "learning_rate": 5.10697330285555e-06, "loss": 0.3587, "step": 4273 }, { "epoch": 1.01737371333373, "grad_norm": 0.3691570859798567, "learning_rate": 5.105046142193796e-06, "loss": 0.3495, "step": 4274 }, { "epoch": 1.0176117094067947, "grad_norm": 0.4171393197587053, "learning_rate": 5.103118965919586e-06, "loss": 0.2785, "step": 4275 }, { "epoch": 1.0178497054798596, "grad_norm": 0.3718279478108766, "learning_rate": 5.101191774319346e-06, "loss": 0.3278, "step": 4276 }, { "epoch": 1.0180877015529244, "grad_norm": 0.3762497178178657, "learning_rate": 5.099264567679505e-06, "loss": 0.3814, "step": 4277 }, { "epoch": 1.0183256976259891, "grad_norm": 0.3925916540388656, "learning_rate": 5.097337346286494e-06, "loss": 0.3096, "step": 4278 }, { "epoch": 1.0185636936990539, "grad_norm": 0.4026406915827254, "learning_rate": 5.095410110426746e-06, "loss": 0.2751, "step": 4279 }, { "epoch": 1.0188016897721188, "grad_norm": 0.3737121034084029, "learning_rate": 5.093482860386695e-06, "loss": 0.3442, "step": 4280 }, { "epoch": 1.0190396858451836, "grad_norm": 0.3662525796650136, "learning_rate": 5.091555596452777e-06, "loss": 0.3694, "step": 4281 }, { "epoch": 1.0192776819182483, "grad_norm": 0.37406892875546127, "learning_rate": 5.089628318911434e-06, "loss": 0.2785, "step": 4282 }, { "epoch": 1.019515677991313, "grad_norm": 0.362735438961647, "learning_rate": 5.0877010280491045e-06, "loss": 0.3289, "step": 4283 }, { "epoch": 1.019753674064378, "grad_norm": 0.3737293250336674, "learning_rate": 5.085773724152232e-06, "loss": 0.3725, "step": 4284 }, { "epoch": 1.0199916701374427, "grad_norm": 0.37431985674627993, "learning_rate": 5.083846407507263e-06, "loss": 0.3119, "step": 4285 }, { "epoch": 1.0202296662105075, "grad_norm": 0.39281524554349173, "learning_rate": 5.0819190784006444e-06, "loss": 0.3318, "step": 4286 }, { "epoch": 1.0204676622835722, "grad_norm": 0.4178241632215565, "learning_rate": 5.079991737118823e-06, "loss": 0.3592, "step": 4287 }, { "epoch": 1.0207056583566372, "grad_norm": 0.37007927511322464, "learning_rate": 5.0780643839482515e-06, "loss": 0.3696, "step": 4288 }, { "epoch": 1.020943654429702, "grad_norm": 0.393635301072028, "learning_rate": 5.076137019175381e-06, "loss": 0.2891, "step": 4289 }, { "epoch": 1.0211816505027667, "grad_norm": 0.37703876121952784, "learning_rate": 5.074209643086666e-06, "loss": 0.33, "step": 4290 }, { "epoch": 1.0214196465758314, "grad_norm": 0.42049817515097926, "learning_rate": 5.072282255968561e-06, "loss": 0.3764, "step": 4291 }, { "epoch": 1.0216576426488964, "grad_norm": 0.38019207344494804, "learning_rate": 5.070354858107526e-06, "loss": 0.31, "step": 4292 }, { "epoch": 1.021895638721961, "grad_norm": 0.4040481113943476, "learning_rate": 5.068427449790019e-06, "loss": 0.2868, "step": 4293 }, { "epoch": 1.0221336347950258, "grad_norm": 0.370686703144065, "learning_rate": 5.0665000313024995e-06, "loss": 0.3369, "step": 4294 }, { "epoch": 1.0223716308680906, "grad_norm": 0.39346612528862, "learning_rate": 5.064572602931428e-06, "loss": 0.3823, "step": 4295 }, { "epoch": 1.0226096269411555, "grad_norm": 0.3717514479143731, "learning_rate": 5.0626451649632725e-06, "loss": 0.3087, "step": 4296 }, { "epoch": 1.0228476230142203, "grad_norm": 0.4092985098023293, "learning_rate": 5.060717717684496e-06, "loss": 0.3115, "step": 4297 }, { "epoch": 1.023085619087285, "grad_norm": 0.3934618504996616, "learning_rate": 5.058790261381563e-06, "loss": 0.3801, "step": 4298 }, { "epoch": 1.0233236151603498, "grad_norm": 0.3492191091572103, "learning_rate": 5.056862796340944e-06, "loss": 0.3489, "step": 4299 }, { "epoch": 1.0235616112334147, "grad_norm": 0.4051063697173622, "learning_rate": 5.054935322849107e-06, "loss": 0.2997, "step": 4300 }, { "epoch": 1.0237996073064795, "grad_norm": 0.3469654129695318, "learning_rate": 5.053007841192522e-06, "loss": 0.3036, "step": 4301 }, { "epoch": 1.0240376033795442, "grad_norm": 0.38270686628936906, "learning_rate": 5.05108035165766e-06, "loss": 0.3964, "step": 4302 }, { "epoch": 1.024275599452609, "grad_norm": 0.36001024577676244, "learning_rate": 5.049152854530994e-06, "loss": 0.2792, "step": 4303 }, { "epoch": 1.024513595525674, "grad_norm": 0.37961661632016525, "learning_rate": 5.047225350098999e-06, "loss": 0.3038, "step": 4304 }, { "epoch": 1.0247515915987386, "grad_norm": 0.3494836771560282, "learning_rate": 5.045297838648145e-06, "loss": 0.3919, "step": 4305 }, { "epoch": 1.0249895876718034, "grad_norm": 0.36305044333379766, "learning_rate": 5.043370320464915e-06, "loss": 0.3636, "step": 4306 }, { "epoch": 1.025227583744868, "grad_norm": 0.4258357687705437, "learning_rate": 5.041442795835783e-06, "loss": 0.2803, "step": 4307 }, { "epoch": 1.025465579817933, "grad_norm": 0.4160868010268067, "learning_rate": 5.039515265047224e-06, "loss": 0.3107, "step": 4308 }, { "epoch": 1.0257035758909978, "grad_norm": 0.4342418241767286, "learning_rate": 5.037587728385719e-06, "loss": 0.4278, "step": 4309 }, { "epoch": 1.0259415719640625, "grad_norm": 0.37103949650763524, "learning_rate": 5.035660186137749e-06, "loss": 0.3015, "step": 4310 }, { "epoch": 1.0261795680371273, "grad_norm": 0.38074037486983087, "learning_rate": 5.033732638589793e-06, "loss": 0.2781, "step": 4311 }, { "epoch": 1.0264175641101922, "grad_norm": 0.4189038925958272, "learning_rate": 5.0318050860283306e-06, "loss": 0.3664, "step": 4312 }, { "epoch": 1.026655560183257, "grad_norm": 0.3951890393473658, "learning_rate": 5.029877528739848e-06, "loss": 0.3446, "step": 4313 }, { "epoch": 1.0268935562563217, "grad_norm": 0.3983002055289879, "learning_rate": 5.0279499670108245e-06, "loss": 0.3065, "step": 4314 }, { "epoch": 1.0271315523293865, "grad_norm": 0.39958104133807376, "learning_rate": 5.0260224011277445e-06, "loss": 0.3262, "step": 4315 }, { "epoch": 1.0273695484024514, "grad_norm": 0.4123932451298326, "learning_rate": 5.0240948313770934e-06, "loss": 0.3763, "step": 4316 }, { "epoch": 1.0276075444755162, "grad_norm": 0.39306243304232114, "learning_rate": 5.022167258045353e-06, "loss": 0.3214, "step": 4317 }, { "epoch": 1.027845540548581, "grad_norm": 0.3761676443591786, "learning_rate": 5.0202396814190095e-06, "loss": 0.2848, "step": 4318 }, { "epoch": 1.0280835366216456, "grad_norm": 0.38223410956180287, "learning_rate": 5.018312101784548e-06, "loss": 0.3257, "step": 4319 }, { "epoch": 1.0283215326947106, "grad_norm": 0.3597507159599325, "learning_rate": 5.016384519428456e-06, "loss": 0.3752, "step": 4320 }, { "epoch": 1.0285595287677753, "grad_norm": 0.328025805606429, "learning_rate": 5.0144569346372185e-06, "loss": 0.2937, "step": 4321 }, { "epoch": 1.02879752484084, "grad_norm": 0.4660913256009466, "learning_rate": 5.012529347697322e-06, "loss": 0.3239, "step": 4322 }, { "epoch": 1.0290355209139048, "grad_norm": 0.383898516473306, "learning_rate": 5.010601758895257e-06, "loss": 0.3778, "step": 4323 }, { "epoch": 1.0292735169869698, "grad_norm": 0.36767695340931256, "learning_rate": 5.008674168517507e-06, "loss": 0.3279, "step": 4324 }, { "epoch": 1.0295115130600345, "grad_norm": 0.37694685767170544, "learning_rate": 5.006746576850562e-06, "loss": 0.3108, "step": 4325 }, { "epoch": 1.0297495091330993, "grad_norm": 0.37325893976856545, "learning_rate": 5.004818984180907e-06, "loss": 0.3387, "step": 4326 }, { "epoch": 1.029987505206164, "grad_norm": 0.38537315515694565, "learning_rate": 5.002891390795033e-06, "loss": 0.4258, "step": 4327 }, { "epoch": 1.030225501279229, "grad_norm": 0.3589773496139594, "learning_rate": 5.0009637969794255e-06, "loss": 0.2959, "step": 4328 }, { "epoch": 1.0304634973522937, "grad_norm": 0.37090708026971275, "learning_rate": 4.9990362030205745e-06, "loss": 0.2818, "step": 4329 }, { "epoch": 1.0307014934253584, "grad_norm": 0.3798707951977427, "learning_rate": 4.997108609204968e-06, "loss": 0.3882, "step": 4330 }, { "epoch": 1.0309394894984232, "grad_norm": 0.3786611077695848, "learning_rate": 4.995181015819094e-06, "loss": 0.3903, "step": 4331 }, { "epoch": 1.0311774855714881, "grad_norm": 0.37672618523404194, "learning_rate": 4.99325342314944e-06, "loss": 0.3122, "step": 4332 }, { "epoch": 1.0314154816445529, "grad_norm": 0.391170036332283, "learning_rate": 4.991325831482494e-06, "loss": 0.3309, "step": 4333 }, { "epoch": 1.0316534777176176, "grad_norm": 0.4742272257387251, "learning_rate": 4.989398241104745e-06, "loss": 0.3744, "step": 4334 }, { "epoch": 1.0318914737906824, "grad_norm": 0.381528588265086, "learning_rate": 4.987470652302679e-06, "loss": 0.3236, "step": 4335 }, { "epoch": 1.0321294698637473, "grad_norm": 0.3909739709474945, "learning_rate": 4.985543065362782e-06, "loss": 0.3005, "step": 4336 }, { "epoch": 1.032367465936812, "grad_norm": 0.4030798587204337, "learning_rate": 4.983615480571546e-06, "loss": 0.3292, "step": 4337 }, { "epoch": 1.0326054620098768, "grad_norm": 0.39016102552178, "learning_rate": 4.981687898215454e-06, "loss": 0.3811, "step": 4338 }, { "epoch": 1.0328434580829415, "grad_norm": 0.35718795349461896, "learning_rate": 4.979760318580993e-06, "loss": 0.297, "step": 4339 }, { "epoch": 1.0330814541560065, "grad_norm": 1.5662065706280457, "learning_rate": 4.97783274195465e-06, "loss": 0.3122, "step": 4340 }, { "epoch": 1.0333194502290712, "grad_norm": 0.4031583976642777, "learning_rate": 4.97590516862291e-06, "loss": 0.3742, "step": 4341 }, { "epoch": 1.033557446302136, "grad_norm": 0.3666936736763186, "learning_rate": 4.973977598872257e-06, "loss": 0.3361, "step": 4342 }, { "epoch": 1.0337954423752007, "grad_norm": 0.3870156161445879, "learning_rate": 4.9720500329891755e-06, "loss": 0.2907, "step": 4343 }, { "epoch": 1.0340334384482657, "grad_norm": 0.3943973816109862, "learning_rate": 4.9701224712601526e-06, "loss": 0.3316, "step": 4344 }, { "epoch": 1.0342714345213304, "grad_norm": 0.37045738296936315, "learning_rate": 4.9681949139716686e-06, "loss": 0.3745, "step": 4345 }, { "epoch": 1.0345094305943952, "grad_norm": 0.3526903041685496, "learning_rate": 4.966267361410209e-06, "loss": 0.2671, "step": 4346 }, { "epoch": 1.03474742666746, "grad_norm": 0.4097067620359685, "learning_rate": 4.964339813862252e-06, "loss": 0.3019, "step": 4347 }, { "epoch": 1.0349854227405249, "grad_norm": 0.3907934312556052, "learning_rate": 4.962412271614282e-06, "loss": 0.3721, "step": 4348 }, { "epoch": 1.0352234188135896, "grad_norm": 0.39946066906031263, "learning_rate": 4.9604847349527775e-06, "loss": 0.3449, "step": 4349 }, { "epoch": 1.0354614148866543, "grad_norm": 0.35694279142285595, "learning_rate": 4.958557204164219e-06, "loss": 0.2945, "step": 4350 }, { "epoch": 1.035699410959719, "grad_norm": 0.36264389505281575, "learning_rate": 4.956629679535086e-06, "loss": 0.3114, "step": 4351 }, { "epoch": 1.035937407032784, "grad_norm": 0.3837297046950812, "learning_rate": 4.954702161351856e-06, "loss": 0.398, "step": 4352 }, { "epoch": 1.0361754031058488, "grad_norm": 0.3723581905698087, "learning_rate": 4.952774649901004e-06, "loss": 0.3005, "step": 4353 }, { "epoch": 1.0364133991789135, "grad_norm": 0.3668387139380813, "learning_rate": 4.950847145469008e-06, "loss": 0.3008, "step": 4354 }, { "epoch": 1.0366513952519782, "grad_norm": 0.36162042038397063, "learning_rate": 4.948919648342342e-06, "loss": 0.3702, "step": 4355 }, { "epoch": 1.0368893913250432, "grad_norm": 0.3603460188850127, "learning_rate": 4.946992158807481e-06, "loss": 0.3833, "step": 4356 }, { "epoch": 1.037127387398108, "grad_norm": 0.3744254359291755, "learning_rate": 4.945064677150893e-06, "loss": 0.281, "step": 4357 }, { "epoch": 1.0373653834711727, "grad_norm": 0.3842915853972375, "learning_rate": 4.943137203659056e-06, "loss": 0.346, "step": 4358 }, { "epoch": 1.0376033795442374, "grad_norm": 0.3822698279560983, "learning_rate": 4.941209738618437e-06, "loss": 0.4112, "step": 4359 }, { "epoch": 1.0378413756173024, "grad_norm": 0.40057285679058796, "learning_rate": 4.939282282315505e-06, "loss": 0.2888, "step": 4360 }, { "epoch": 1.0380793716903671, "grad_norm": 0.382452358107358, "learning_rate": 4.937354835036728e-06, "loss": 0.2905, "step": 4361 }, { "epoch": 1.0383173677634319, "grad_norm": 0.3850074270426723, "learning_rate": 4.935427397068573e-06, "loss": 0.3644, "step": 4362 }, { "epoch": 1.0385553638364966, "grad_norm": 0.34238196519140435, "learning_rate": 4.933499968697503e-06, "loss": 0.3405, "step": 4363 }, { "epoch": 1.0387933599095616, "grad_norm": 0.36795318780076897, "learning_rate": 4.931572550209983e-06, "loss": 0.2819, "step": 4364 }, { "epoch": 1.0390313559826263, "grad_norm": 0.3974431339386504, "learning_rate": 4.929645141892475e-06, "loss": 0.3485, "step": 4365 }, { "epoch": 1.039269352055691, "grad_norm": 0.3861629647354368, "learning_rate": 4.92771774403144e-06, "loss": 0.4002, "step": 4366 }, { "epoch": 1.0395073481287558, "grad_norm": 0.3874473883648921, "learning_rate": 4.925790356913337e-06, "loss": 0.3324, "step": 4367 }, { "epoch": 1.0397453442018207, "grad_norm": 0.39344198259301555, "learning_rate": 4.923862980824622e-06, "loss": 0.2779, "step": 4368 }, { "epoch": 1.0399833402748855, "grad_norm": 0.4195324906692714, "learning_rate": 4.921935616051751e-06, "loss": 0.3325, "step": 4369 }, { "epoch": 1.0402213363479502, "grad_norm": 0.3908083748851083, "learning_rate": 4.920008262881177e-06, "loss": 0.3987, "step": 4370 }, { "epoch": 1.040459332421015, "grad_norm": 0.3951373300669574, "learning_rate": 4.918080921599356e-06, "loss": 0.2903, "step": 4371 }, { "epoch": 1.04069732849408, "grad_norm": 0.40344550596748047, "learning_rate": 4.9161535924927375e-06, "loss": 0.3174, "step": 4372 }, { "epoch": 1.0409353245671447, "grad_norm": 0.3856931317543428, "learning_rate": 4.914226275847768e-06, "loss": 0.3455, "step": 4373 }, { "epoch": 1.0411733206402094, "grad_norm": 0.36331854787145207, "learning_rate": 4.912298971950897e-06, "loss": 0.3507, "step": 4374 }, { "epoch": 1.0414113167132741, "grad_norm": 0.370895311159966, "learning_rate": 4.910371681088568e-06, "loss": 0.3087, "step": 4375 }, { "epoch": 1.041649312786339, "grad_norm": 0.3771524826161335, "learning_rate": 4.908444403547224e-06, "loss": 0.3444, "step": 4376 }, { "epoch": 1.0418873088594038, "grad_norm": 0.41144922180124954, "learning_rate": 4.906517139613307e-06, "loss": 0.4052, "step": 4377 }, { "epoch": 1.0421253049324686, "grad_norm": 0.3646312683356659, "learning_rate": 4.9045898895732555e-06, "loss": 0.296, "step": 4378 }, { "epoch": 1.0423633010055333, "grad_norm": 0.38904303184845496, "learning_rate": 4.902662653713507e-06, "loss": 0.2914, "step": 4379 }, { "epoch": 1.0426012970785983, "grad_norm": 0.38168935291772754, "learning_rate": 4.900735432320496e-06, "loss": 0.3709, "step": 4380 }, { "epoch": 1.042839293151663, "grad_norm": 0.3730634093577279, "learning_rate": 4.898808225680656e-06, "loss": 0.358, "step": 4381 }, { "epoch": 1.0430772892247278, "grad_norm": 0.38559125950569123, "learning_rate": 4.8968810340804166e-06, "loss": 0.2736, "step": 4382 }, { "epoch": 1.0433152852977925, "grad_norm": 0.38852084763466593, "learning_rate": 4.894953857806207e-06, "loss": 0.3232, "step": 4383 }, { "epoch": 1.0435532813708575, "grad_norm": 0.3922704604899118, "learning_rate": 4.893026697144451e-06, "loss": 0.3953, "step": 4384 }, { "epoch": 1.0437912774439222, "grad_norm": 0.3464747600169571, "learning_rate": 4.891099552381575e-06, "loss": 0.3247, "step": 4385 }, { "epoch": 1.044029273516987, "grad_norm": 0.3814240246283631, "learning_rate": 4.8891724238040004e-06, "loss": 0.2907, "step": 4386 }, { "epoch": 1.0442672695900517, "grad_norm": 0.3771293957546087, "learning_rate": 4.887245311698146e-06, "loss": 0.355, "step": 4387 }, { "epoch": 1.0445052656631166, "grad_norm": 0.3868624148021604, "learning_rate": 4.8853182163504265e-06, "loss": 0.3455, "step": 4388 }, { "epoch": 1.0447432617361814, "grad_norm": 0.4023587442678937, "learning_rate": 4.883391138047258e-06, "loss": 0.3055, "step": 4389 }, { "epoch": 1.0449812578092461, "grad_norm": 0.3743342358321569, "learning_rate": 4.8814640770750495e-06, "loss": 0.3441, "step": 4390 }, { "epoch": 1.0452192538823109, "grad_norm": 0.3908606318397311, "learning_rate": 4.879537033720212e-06, "loss": 0.3837, "step": 4391 }, { "epoch": 1.0454572499553758, "grad_norm": 0.3927864891120312, "learning_rate": 4.877610008269153e-06, "loss": 0.3233, "step": 4392 }, { "epoch": 1.0456952460284406, "grad_norm": 0.37891845327252005, "learning_rate": 4.875683001008274e-06, "loss": 0.3127, "step": 4393 }, { "epoch": 1.0459332421015053, "grad_norm": 0.40161470551225587, "learning_rate": 4.873756012223977e-06, "loss": 0.3332, "step": 4394 }, { "epoch": 1.04617123817457, "grad_norm": 0.436580051432871, "learning_rate": 4.871829042202658e-06, "loss": 0.3743, "step": 4395 }, { "epoch": 1.046409234247635, "grad_norm": 0.4008401671583846, "learning_rate": 4.8699020912307155e-06, "loss": 0.303, "step": 4396 }, { "epoch": 1.0466472303206997, "grad_norm": 0.39091168851981944, "learning_rate": 4.86797515959454e-06, "loss": 0.3068, "step": 4397 }, { "epoch": 1.0468852263937645, "grad_norm": 0.4171630119960414, "learning_rate": 4.866048247580521e-06, "loss": 0.3736, "step": 4398 }, { "epoch": 1.0471232224668292, "grad_norm": 0.3830177263756179, "learning_rate": 4.864121355475047e-06, "loss": 0.3746, "step": 4399 }, { "epoch": 1.0473612185398942, "grad_norm": 0.37078719399047205, "learning_rate": 4.862194483564501e-06, "loss": 0.2977, "step": 4400 }, { "epoch": 1.047599214612959, "grad_norm": 0.36764015487272417, "learning_rate": 4.8602676321352646e-06, "loss": 0.3353, "step": 4401 }, { "epoch": 1.0478372106860236, "grad_norm": 0.3791879193271118, "learning_rate": 4.858340801473715e-06, "loss": 0.3836, "step": 4402 }, { "epoch": 1.0480752067590884, "grad_norm": 0.34908273436476017, "learning_rate": 4.856413991866225e-06, "loss": 0.2991, "step": 4403 }, { "epoch": 1.0483132028321533, "grad_norm": 0.3726740932651389, "learning_rate": 4.85448720359917e-06, "loss": 0.3106, "step": 4404 }, { "epoch": 1.048551198905218, "grad_norm": 0.4036355116456374, "learning_rate": 4.852560436958916e-06, "loss": 0.3737, "step": 4405 }, { "epoch": 1.0487891949782828, "grad_norm": 0.36697835588631594, "learning_rate": 4.850633692231828e-06, "loss": 0.3501, "step": 4406 }, { "epoch": 1.0490271910513476, "grad_norm": 0.36549398149960405, "learning_rate": 4.848706969704269e-06, "loss": 0.285, "step": 4407 }, { "epoch": 1.0492651871244125, "grad_norm": 0.38262831908848516, "learning_rate": 4.846780269662597e-06, "loss": 0.3424, "step": 4408 }, { "epoch": 1.0495031831974773, "grad_norm": 0.427528345212208, "learning_rate": 4.8448535923931675e-06, "loss": 0.3711, "step": 4409 }, { "epoch": 1.049741179270542, "grad_norm": 0.35527661752034534, "learning_rate": 4.842926938182332e-06, "loss": 0.3213, "step": 4410 }, { "epoch": 1.0499791753436067, "grad_norm": 0.40048394169356094, "learning_rate": 4.84100030731644e-06, "loss": 0.2941, "step": 4411 }, { "epoch": 1.0502171714166717, "grad_norm": 0.3892299648187431, "learning_rate": 4.8390737000818326e-06, "loss": 0.3235, "step": 4412 }, { "epoch": 1.0504551674897364, "grad_norm": 0.3772914101337546, "learning_rate": 4.837147116764857e-06, "loss": 0.346, "step": 4413 }, { "epoch": 1.0506931635628012, "grad_norm": 0.37840783896597235, "learning_rate": 4.835220557651849e-06, "loss": 0.2795, "step": 4414 }, { "epoch": 1.050931159635866, "grad_norm": 0.38825533133807016, "learning_rate": 4.833294023029142e-06, "loss": 0.3024, "step": 4415 }, { "epoch": 1.0511691557089309, "grad_norm": 0.3865655826669927, "learning_rate": 4.831367513183068e-06, "loss": 0.3913, "step": 4416 }, { "epoch": 1.0514071517819956, "grad_norm": 0.35292818007412385, "learning_rate": 4.829441028399952e-06, "loss": 0.322, "step": 4417 }, { "epoch": 1.0516451478550604, "grad_norm": 0.3668531742705322, "learning_rate": 4.827514568966119e-06, "loss": 0.298, "step": 4418 }, { "epoch": 1.051883143928125, "grad_norm": 0.3749278980780594, "learning_rate": 4.825588135167889e-06, "loss": 0.3288, "step": 4419 }, { "epoch": 1.05212114000119, "grad_norm": 0.3622324145932679, "learning_rate": 4.823661727291577e-06, "loss": 0.389, "step": 4420 }, { "epoch": 1.0523591360742548, "grad_norm": 0.396752910815969, "learning_rate": 4.821735345623494e-06, "loss": 0.2953, "step": 4421 }, { "epoch": 1.0525971321473195, "grad_norm": 0.3758408909876523, "learning_rate": 4.819808990449949e-06, "loss": 0.303, "step": 4422 }, { "epoch": 1.0528351282203843, "grad_norm": 0.4081539657673205, "learning_rate": 4.817882662057246e-06, "loss": 0.3709, "step": 4423 }, { "epoch": 1.0530731242934492, "grad_norm": 0.349815937390151, "learning_rate": 4.815956360731684e-06, "loss": 0.349, "step": 4424 }, { "epoch": 1.053311120366514, "grad_norm": 0.34903612236748616, "learning_rate": 4.814030086759561e-06, "loss": 0.2885, "step": 4425 }, { "epoch": 1.0535491164395787, "grad_norm": 0.39164825325025976, "learning_rate": 4.812103840427165e-06, "loss": 0.3512, "step": 4426 }, { "epoch": 1.0537871125126435, "grad_norm": 0.36669475356586617, "learning_rate": 4.810177622020788e-06, "loss": 0.3903, "step": 4427 }, { "epoch": 1.0540251085857084, "grad_norm": 0.3504650544959202, "learning_rate": 4.808251431826713e-06, "loss": 0.2901, "step": 4428 }, { "epoch": 1.0542631046587732, "grad_norm": 0.34674534781413774, "learning_rate": 4.806325270131219e-06, "loss": 0.2899, "step": 4429 }, { "epoch": 1.054501100731838, "grad_norm": 0.3777786304373956, "learning_rate": 4.80439913722058e-06, "loss": 0.3304, "step": 4430 }, { "epoch": 1.0547390968049026, "grad_norm": 0.37181874792865827, "learning_rate": 4.802473033381069e-06, "loss": 0.4006, "step": 4431 }, { "epoch": 1.0549770928779676, "grad_norm": 0.38052787907987795, "learning_rate": 4.800546958898952e-06, "loss": 0.3153, "step": 4432 }, { "epoch": 1.0552150889510323, "grad_norm": 0.40481761082909645, "learning_rate": 4.798620914060492e-06, "loss": 0.3188, "step": 4433 }, { "epoch": 1.055453085024097, "grad_norm": 0.3588854522073126, "learning_rate": 4.7966948991519446e-06, "loss": 0.3802, "step": 4434 }, { "epoch": 1.0556910810971618, "grad_norm": 0.3773596458270944, "learning_rate": 4.794768914459565e-06, "loss": 0.3467, "step": 4435 }, { "epoch": 1.0559290771702268, "grad_norm": 0.4156895917585443, "learning_rate": 4.792842960269603e-06, "loss": 0.3065, "step": 4436 }, { "epoch": 1.0561670732432915, "grad_norm": 0.3609593155405436, "learning_rate": 4.790917036868301e-06, "loss": 0.3456, "step": 4437 }, { "epoch": 1.0564050693163562, "grad_norm": 0.37963675817335796, "learning_rate": 4.7889911445419e-06, "loss": 0.3859, "step": 4438 }, { "epoch": 1.056643065389421, "grad_norm": 0.38339295139402263, "learning_rate": 4.787065283576633e-06, "loss": 0.3096, "step": 4439 }, { "epoch": 1.056881061462486, "grad_norm": 0.4148001262695314, "learning_rate": 4.785139454258734e-06, "loss": 0.3063, "step": 4440 }, { "epoch": 1.0571190575355507, "grad_norm": 0.3849351006849719, "learning_rate": 4.783213656874428e-06, "loss": 0.354, "step": 4441 }, { "epoch": 1.0573570536086154, "grad_norm": 0.3904733075529352, "learning_rate": 4.781287891709936e-06, "loss": 0.3307, "step": 4442 }, { "epoch": 1.0575950496816802, "grad_norm": 0.39073356763740286, "learning_rate": 4.779362159051474e-06, "loss": 0.3202, "step": 4443 }, { "epoch": 1.0578330457547451, "grad_norm": 0.382841095559944, "learning_rate": 4.777436459185252e-06, "loss": 0.3363, "step": 4444 }, { "epoch": 1.0580710418278099, "grad_norm": 0.37809442113241104, "learning_rate": 4.775510792397479e-06, "loss": 0.365, "step": 4445 }, { "epoch": 1.0583090379008746, "grad_norm": 0.36463101257216596, "learning_rate": 4.773585158974356e-06, "loss": 0.3039, "step": 4446 }, { "epoch": 1.0585470339739393, "grad_norm": 0.4116743791608411, "learning_rate": 4.77165955920208e-06, "loss": 0.3284, "step": 4447 }, { "epoch": 1.0587850300470043, "grad_norm": 0.42410933724473493, "learning_rate": 4.769733993366842e-06, "loss": 0.3708, "step": 4448 }, { "epoch": 1.059023026120069, "grad_norm": 0.37205012492946415, "learning_rate": 4.76780846175483e-06, "loss": 0.3242, "step": 4449 }, { "epoch": 1.0592610221931338, "grad_norm": 0.3472944005021577, "learning_rate": 4.765882964652223e-06, "loss": 0.2834, "step": 4450 }, { "epoch": 1.0594990182661985, "grad_norm": 0.420700058102123, "learning_rate": 4.7639575023452e-06, "loss": 0.3451, "step": 4451 }, { "epoch": 1.0597370143392635, "grad_norm": 0.3850537500639467, "learning_rate": 4.762032075119932e-06, "loss": 0.4028, "step": 4452 }, { "epoch": 1.0599750104123282, "grad_norm": 0.35606255517837127, "learning_rate": 4.760106683262582e-06, "loss": 0.3206, "step": 4453 }, { "epoch": 1.060213006485393, "grad_norm": 0.4207878717161147, "learning_rate": 4.758181327059316e-06, "loss": 0.2905, "step": 4454 }, { "epoch": 1.0604510025584577, "grad_norm": 0.39178183992943744, "learning_rate": 4.756256006796287e-06, "loss": 0.3826, "step": 4455 }, { "epoch": 1.0606889986315227, "grad_norm": 0.3829442776223385, "learning_rate": 4.754330722759645e-06, "loss": 0.3628, "step": 4456 }, { "epoch": 1.0609269947045874, "grad_norm": 0.3954498442186975, "learning_rate": 4.7524054752355345e-06, "loss": 0.3066, "step": 4457 }, { "epoch": 1.0611649907776521, "grad_norm": 0.393762593771785, "learning_rate": 4.750480264510097e-06, "loss": 0.3186, "step": 4458 }, { "epoch": 1.0614029868507169, "grad_norm": 0.36465796218337193, "learning_rate": 4.748555090869464e-06, "loss": 0.3876, "step": 4459 }, { "epoch": 1.0616409829237818, "grad_norm": 0.3664333642625829, "learning_rate": 4.746629954599766e-06, "loss": 0.3216, "step": 4460 }, { "epoch": 1.0618789789968466, "grad_norm": 0.37283914856358796, "learning_rate": 4.744704855987125e-06, "loss": 0.2674, "step": 4461 }, { "epoch": 1.0621169750699113, "grad_norm": 0.3929643060999283, "learning_rate": 4.742779795317657e-06, "loss": 0.3625, "step": 4462 }, { "epoch": 1.062354971142976, "grad_norm": 0.3639867047560858, "learning_rate": 4.740854772877475e-06, "loss": 0.3875, "step": 4463 }, { "epoch": 1.062592967216041, "grad_norm": 0.34468157426611506, "learning_rate": 4.738929788952685e-06, "loss": 0.3047, "step": 4464 }, { "epoch": 1.0628309632891058, "grad_norm": 0.407693623350532, "learning_rate": 4.737004843829387e-06, "loss": 0.3164, "step": 4465 }, { "epoch": 1.0630689593621705, "grad_norm": 0.3953104625961449, "learning_rate": 4.735079937793675e-06, "loss": 0.3848, "step": 4466 }, { "epoch": 1.0633069554352352, "grad_norm": 0.35620972393509376, "learning_rate": 4.733155071131636e-06, "loss": 0.3204, "step": 4467 }, { "epoch": 1.0635449515083002, "grad_norm": 0.37941109344701723, "learning_rate": 4.731230244129357e-06, "loss": 0.2926, "step": 4468 }, { "epoch": 1.063782947581365, "grad_norm": 0.36519045880745366, "learning_rate": 4.729305457072913e-06, "loss": 0.3606, "step": 4469 }, { "epoch": 1.0640209436544297, "grad_norm": 0.3575723913390525, "learning_rate": 4.727380710248375e-06, "loss": 0.3744, "step": 4470 }, { "epoch": 1.0642589397274944, "grad_norm": 0.3567610268225815, "learning_rate": 4.725456003941805e-06, "loss": 0.2866, "step": 4471 }, { "epoch": 1.0644969358005594, "grad_norm": 0.37128027520359136, "learning_rate": 4.723531338439268e-06, "loss": 0.3214, "step": 4472 }, { "epoch": 1.0647349318736241, "grad_norm": 0.3820839576708276, "learning_rate": 4.721606714026812e-06, "loss": 0.3902, "step": 4473 }, { "epoch": 1.0649729279466889, "grad_norm": 0.4089191938756884, "learning_rate": 4.7196821309904865e-06, "loss": 0.334, "step": 4474 }, { "epoch": 1.0652109240197536, "grad_norm": 0.3728452645121599, "learning_rate": 4.717757589616331e-06, "loss": 0.2551, "step": 4475 }, { "epoch": 1.0654489200928186, "grad_norm": 0.37511700011909915, "learning_rate": 4.715833090190379e-06, "loss": 0.3443, "step": 4476 }, { "epoch": 1.0656869161658833, "grad_norm": 0.37701885649521155, "learning_rate": 4.713908632998661e-06, "loss": 0.3962, "step": 4477 }, { "epoch": 1.065924912238948, "grad_norm": 0.3616386275749788, "learning_rate": 4.711984218327197e-06, "loss": 0.3166, "step": 4478 }, { "epoch": 1.0661629083120128, "grad_norm": 0.38456233240401916, "learning_rate": 4.710059846462003e-06, "loss": 0.3029, "step": 4479 }, { "epoch": 1.0664009043850777, "grad_norm": 0.3789566875155259, "learning_rate": 4.708135517689088e-06, "loss": 0.3717, "step": 4480 }, { "epoch": 1.0666389004581425, "grad_norm": 0.3768525328415589, "learning_rate": 4.706211232294456e-06, "loss": 0.3438, "step": 4481 }, { "epoch": 1.0668768965312072, "grad_norm": 0.3783650867548586, "learning_rate": 4.704286990564103e-06, "loss": 0.2877, "step": 4482 }, { "epoch": 1.067114892604272, "grad_norm": 0.41564337367706855, "learning_rate": 4.702362792784019e-06, "loss": 0.3331, "step": 4483 }, { "epoch": 1.067352888677337, "grad_norm": 0.43075275803922985, "learning_rate": 4.700438639240186e-06, "loss": 0.3629, "step": 4484 }, { "epoch": 1.0675908847504016, "grad_norm": 0.36040007984808253, "learning_rate": 4.698514530218584e-06, "loss": 0.3061, "step": 4485 }, { "epoch": 1.0678288808234664, "grad_norm": 0.404962256464901, "learning_rate": 4.6965904660051804e-06, "loss": 0.3088, "step": 4486 }, { "epoch": 1.0680668768965311, "grad_norm": 0.3697507889870145, "learning_rate": 4.6946664468859395e-06, "loss": 0.3457, "step": 4487 }, { "epoch": 1.068304872969596, "grad_norm": 0.4036468754442713, "learning_rate": 4.692742473146818e-06, "loss": 0.3603, "step": 4488 }, { "epoch": 1.0685428690426608, "grad_norm": 0.4042242479571805, "learning_rate": 4.690818545073767e-06, "loss": 0.2794, "step": 4489 }, { "epoch": 1.0687808651157256, "grad_norm": 0.4099157906703565, "learning_rate": 4.688894662952729e-06, "loss": 0.3127, "step": 4490 }, { "epoch": 1.0690188611887903, "grad_norm": 0.3468763641985224, "learning_rate": 4.686970827069639e-06, "loss": 0.3843, "step": 4491 }, { "epoch": 1.0692568572618553, "grad_norm": 0.37635304439254064, "learning_rate": 4.68504703771043e-06, "loss": 0.3453, "step": 4492 }, { "epoch": 1.06949485333492, "grad_norm": 0.39528560108604954, "learning_rate": 4.683123295161021e-06, "loss": 0.2812, "step": 4493 }, { "epoch": 1.0697328494079847, "grad_norm": 0.4283777664794825, "learning_rate": 4.6811995997073285e-06, "loss": 0.3331, "step": 4494 }, { "epoch": 1.0699708454810495, "grad_norm": 0.3710567512234336, "learning_rate": 4.679275951635264e-06, "loss": 0.3875, "step": 4495 }, { "epoch": 1.0702088415541144, "grad_norm": 0.36243117057229535, "learning_rate": 4.6773523512307275e-06, "loss": 0.2728, "step": 4496 }, { "epoch": 1.0704468376271792, "grad_norm": 0.37892611536648607, "learning_rate": 4.675428798779613e-06, "loss": 0.3035, "step": 4497 }, { "epoch": 1.070684833700244, "grad_norm": 0.36860351230116295, "learning_rate": 4.673505294567809e-06, "loss": 0.3669, "step": 4498 }, { "epoch": 1.0709228297733087, "grad_norm": 0.36740997263006975, "learning_rate": 4.6715818388811945e-06, "loss": 0.3238, "step": 4499 }, { "epoch": 1.0711608258463736, "grad_norm": 0.3994604434926032, "learning_rate": 4.669658432005644e-06, "loss": 0.296, "step": 4500 }, { "epoch": 1.0713988219194384, "grad_norm": 0.3698668250700613, "learning_rate": 4.667735074227024e-06, "loss": 0.3494, "step": 4501 }, { "epoch": 1.071636817992503, "grad_norm": 0.440292753533519, "learning_rate": 4.66581176583119e-06, "loss": 0.4327, "step": 4502 }, { "epoch": 1.0718748140655678, "grad_norm": 0.3719781722435344, "learning_rate": 4.663888507103996e-06, "loss": 0.3222, "step": 4503 }, { "epoch": 1.0721128101386328, "grad_norm": 0.4082136221681287, "learning_rate": 4.6619652983312844e-06, "loss": 0.2975, "step": 4504 }, { "epoch": 1.0723508062116975, "grad_norm": 0.360658966423447, "learning_rate": 4.660042139798892e-06, "loss": 0.342, "step": 4505 }, { "epoch": 1.0725888022847623, "grad_norm": 0.3593445634450902, "learning_rate": 4.658119031792648e-06, "loss": 0.3406, "step": 4506 }, { "epoch": 1.072826798357827, "grad_norm": 0.40080585396608215, "learning_rate": 4.6561959745983724e-06, "loss": 0.2878, "step": 4507 }, { "epoch": 1.073064794430892, "grad_norm": 0.38452336744074866, "learning_rate": 4.65427296850188e-06, "loss": 0.3205, "step": 4508 }, { "epoch": 1.0733027905039567, "grad_norm": 0.40724996130914426, "learning_rate": 4.652350013788979e-06, "loss": 0.4083, "step": 4509 }, { "epoch": 1.0735407865770215, "grad_norm": 0.4014847828369795, "learning_rate": 4.650427110745467e-06, "loss": 0.3173, "step": 4510 }, { "epoch": 1.0737787826500862, "grad_norm": 0.38549635848058716, "learning_rate": 4.648504259657132e-06, "loss": 0.2915, "step": 4511 }, { "epoch": 1.0740167787231512, "grad_norm": 0.3852457361423471, "learning_rate": 4.646581460809762e-06, "loss": 0.3348, "step": 4512 }, { "epoch": 1.074254774796216, "grad_norm": 0.3712671658962982, "learning_rate": 4.644658714489129e-06, "loss": 0.3808, "step": 4513 }, { "epoch": 1.0744927708692806, "grad_norm": 0.43356148305108094, "learning_rate": 4.642736020981002e-06, "loss": 0.2974, "step": 4514 }, { "epoch": 1.0747307669423454, "grad_norm": 0.3800025715731251, "learning_rate": 4.64081338057114e-06, "loss": 0.3053, "step": 4515 }, { "epoch": 1.0749687630154103, "grad_norm": 0.4199497942695732, "learning_rate": 4.638890793545297e-06, "loss": 0.3937, "step": 4516 }, { "epoch": 1.075206759088475, "grad_norm": 0.35073357189494836, "learning_rate": 4.636968260189214e-06, "loss": 0.3195, "step": 4517 }, { "epoch": 1.0754447551615398, "grad_norm": 0.4014979008525594, "learning_rate": 4.635045780788629e-06, "loss": 0.2757, "step": 4518 }, { "epoch": 1.0756827512346046, "grad_norm": 0.4018578448257378, "learning_rate": 4.63312335562927e-06, "loss": 0.3202, "step": 4519 }, { "epoch": 1.0759207473076695, "grad_norm": 0.3853979552410293, "learning_rate": 4.6312009849968544e-06, "loss": 0.3796, "step": 4520 }, { "epoch": 1.0761587433807343, "grad_norm": 0.3725529344423733, "learning_rate": 4.629278669177098e-06, "loss": 0.3162, "step": 4521 }, { "epoch": 1.076396739453799, "grad_norm": 0.3465817848520335, "learning_rate": 4.627356408455701e-06, "loss": 0.2863, "step": 4522 }, { "epoch": 1.0766347355268637, "grad_norm": 0.3869139089298558, "learning_rate": 4.625434203118362e-06, "loss": 0.3826, "step": 4523 }, { "epoch": 1.0768727315999287, "grad_norm": 0.3809180722646469, "learning_rate": 4.623512053450767e-06, "loss": 0.3408, "step": 4524 }, { "epoch": 1.0771107276729934, "grad_norm": 0.3703176815754259, "learning_rate": 4.621589959738593e-06, "loss": 0.3108, "step": 4525 }, { "epoch": 1.0773487237460582, "grad_norm": 0.37759518980753926, "learning_rate": 4.619667922267514e-06, "loss": 0.3366, "step": 4526 }, { "epoch": 1.077586719819123, "grad_norm": 0.3898155012281468, "learning_rate": 4.617745941323189e-06, "loss": 0.4133, "step": 4527 }, { "epoch": 1.0778247158921879, "grad_norm": 0.36255189396131254, "learning_rate": 4.615824017191275e-06, "loss": 0.3031, "step": 4528 }, { "epoch": 1.0780627119652526, "grad_norm": 0.3691335793651894, "learning_rate": 4.613902150157416e-06, "loss": 0.2816, "step": 4529 }, { "epoch": 1.0783007080383173, "grad_norm": 0.39196606245884325, "learning_rate": 4.61198034050725e-06, "loss": 0.3632, "step": 4530 }, { "epoch": 1.078538704111382, "grad_norm": 0.39160631748508107, "learning_rate": 4.610058588526404e-06, "loss": 0.3471, "step": 4531 }, { "epoch": 1.078776700184447, "grad_norm": 0.3624974985968683, "learning_rate": 4.6081368945004976e-06, "loss": 0.301, "step": 4532 }, { "epoch": 1.0790146962575118, "grad_norm": 0.3921259171027228, "learning_rate": 4.606215258715144e-06, "loss": 0.3611, "step": 4533 }, { "epoch": 1.0792526923305765, "grad_norm": 0.38270676417966176, "learning_rate": 4.604293681455942e-06, "loss": 0.3717, "step": 4534 }, { "epoch": 1.0794906884036413, "grad_norm": 0.4878601599757468, "learning_rate": 4.602372163008491e-06, "loss": 0.3385, "step": 4535 }, { "epoch": 1.0797286844767062, "grad_norm": 0.4258797436561885, "learning_rate": 4.6004507036583714e-06, "loss": 0.3118, "step": 4536 }, { "epoch": 1.079966680549771, "grad_norm": 0.36842771403409225, "learning_rate": 4.598529303691163e-06, "loss": 0.354, "step": 4537 }, { "epoch": 1.0802046766228357, "grad_norm": 0.38395032278946645, "learning_rate": 4.596607963392431e-06, "loss": 0.3878, "step": 4538 }, { "epoch": 1.0804426726959004, "grad_norm": 0.37424827121079335, "learning_rate": 4.594686683047736e-06, "loss": 0.3127, "step": 4539 }, { "epoch": 1.0806806687689654, "grad_norm": 0.36463462903952687, "learning_rate": 4.592765462942627e-06, "loss": 0.3135, "step": 4540 }, { "epoch": 1.0809186648420301, "grad_norm": 0.4010499621374228, "learning_rate": 4.590844303362645e-06, "loss": 0.4033, "step": 4541 }, { "epoch": 1.0811566609150949, "grad_norm": 0.35721252951546795, "learning_rate": 4.5889232045933204e-06, "loss": 0.3467, "step": 4542 }, { "epoch": 1.0813946569881596, "grad_norm": 0.3883975133944084, "learning_rate": 4.587002166920178e-06, "loss": 0.2614, "step": 4543 }, { "epoch": 1.0816326530612246, "grad_norm": 0.3644825130668685, "learning_rate": 4.5850811906287315e-06, "loss": 0.3605, "step": 4544 }, { "epoch": 1.0818706491342893, "grad_norm": 0.3839532185379082, "learning_rate": 4.583160276004483e-06, "loss": 0.3859, "step": 4545 }, { "epoch": 1.082108645207354, "grad_norm": 0.37149271895145186, "learning_rate": 4.58123942333293e-06, "loss": 0.2943, "step": 4546 }, { "epoch": 1.0823466412804188, "grad_norm": 0.386864449700323, "learning_rate": 4.5793186328995585e-06, "loss": 0.3084, "step": 4547 }, { "epoch": 1.0825846373534838, "grad_norm": 0.36399312445382515, "learning_rate": 4.577397904989846e-06, "loss": 0.3798, "step": 4548 }, { "epoch": 1.0828226334265485, "grad_norm": 0.4192409273041034, "learning_rate": 4.575477239889258e-06, "loss": 0.359, "step": 4549 }, { "epoch": 1.0830606294996132, "grad_norm": 0.4118535694398554, "learning_rate": 4.5735566378832545e-06, "loss": 0.3003, "step": 4550 }, { "epoch": 1.083298625572678, "grad_norm": 0.39405326883837694, "learning_rate": 4.571636099257285e-06, "loss": 0.3287, "step": 4551 }, { "epoch": 1.083536621645743, "grad_norm": 0.4282411602854888, "learning_rate": 4.569715624296788e-06, "loss": 0.3933, "step": 4552 }, { "epoch": 1.0837746177188077, "grad_norm": 0.3646511346959913, "learning_rate": 4.567795213287194e-06, "loss": 0.3013, "step": 4553 }, { "epoch": 1.0840126137918724, "grad_norm": 0.39611556493858835, "learning_rate": 4.565874866513924e-06, "loss": 0.3023, "step": 4554 }, { "epoch": 1.0842506098649372, "grad_norm": 0.41873346499425335, "learning_rate": 4.563954584262388e-06, "loss": 0.3308, "step": 4555 }, { "epoch": 1.0844886059380021, "grad_norm": 0.37895952135841016, "learning_rate": 4.562034366817989e-06, "loss": 0.3503, "step": 4556 }, { "epoch": 1.0847266020110669, "grad_norm": 0.3755212007026155, "learning_rate": 4.560114214466118e-06, "loss": 0.2777, "step": 4557 }, { "epoch": 1.0849645980841316, "grad_norm": 0.3686507710984981, "learning_rate": 4.558194127492156e-06, "loss": 0.3146, "step": 4558 }, { "epoch": 1.0852025941571963, "grad_norm": 0.37532310578022504, "learning_rate": 4.556274106181477e-06, "loss": 0.4073, "step": 4559 }, { "epoch": 1.0854405902302613, "grad_norm": 0.38070453470704196, "learning_rate": 4.554354150819442e-06, "loss": 0.3069, "step": 4560 }, { "epoch": 1.085678586303326, "grad_norm": 0.38448824696456024, "learning_rate": 4.552434261691405e-06, "loss": 0.2876, "step": 4561 }, { "epoch": 1.0859165823763908, "grad_norm": 0.4030821070519119, "learning_rate": 4.55051443908271e-06, "loss": 0.3773, "step": 4562 }, { "epoch": 1.0861545784494555, "grad_norm": 0.35298371324439093, "learning_rate": 4.5485946832786885e-06, "loss": 0.3785, "step": 4563 }, { "epoch": 1.0863925745225205, "grad_norm": 0.36012679234114514, "learning_rate": 4.546674994564664e-06, "loss": 0.3021, "step": 4564 }, { "epoch": 1.0866305705955852, "grad_norm": 0.41175830720140366, "learning_rate": 4.544755373225949e-06, "loss": 0.3291, "step": 4565 }, { "epoch": 1.08686856666865, "grad_norm": 0.3729710973827501, "learning_rate": 4.54283581954785e-06, "loss": 0.3917, "step": 4566 }, { "epoch": 1.0871065627417147, "grad_norm": 0.38900125919653233, "learning_rate": 4.540916333815658e-06, "loss": 0.3425, "step": 4567 }, { "epoch": 1.0873445588147796, "grad_norm": 0.4185005530339485, "learning_rate": 4.5389969163146544e-06, "loss": 0.3012, "step": 4568 }, { "epoch": 1.0875825548878444, "grad_norm": 0.4088166803824769, "learning_rate": 4.537077567330115e-06, "loss": 0.3202, "step": 4569 }, { "epoch": 1.0878205509609091, "grad_norm": 0.39177237999984393, "learning_rate": 4.535158287147301e-06, "loss": 0.3713, "step": 4570 }, { "epoch": 1.0880585470339739, "grad_norm": 0.3853574514336472, "learning_rate": 4.533239076051465e-06, "loss": 0.3007, "step": 4571 }, { "epoch": 1.0882965431070388, "grad_norm": 0.43082485652705493, "learning_rate": 4.531319934327849e-06, "loss": 0.3271, "step": 4572 }, { "epoch": 1.0885345391801036, "grad_norm": 0.40152293574128467, "learning_rate": 4.529400862261686e-06, "loss": 0.3935, "step": 4573 }, { "epoch": 1.0887725352531683, "grad_norm": 0.3820747241972296, "learning_rate": 4.527481860138196e-06, "loss": 0.3331, "step": 4574 }, { "epoch": 1.089010531326233, "grad_norm": 0.3766358656424945, "learning_rate": 4.525562928242592e-06, "loss": 0.3045, "step": 4575 }, { "epoch": 1.089248527399298, "grad_norm": 0.4237754758083936, "learning_rate": 4.523644066860074e-06, "loss": 0.3531, "step": 4576 }, { "epoch": 1.0894865234723627, "grad_norm": 0.4304790203953773, "learning_rate": 4.52172527627583e-06, "loss": 0.3691, "step": 4577 }, { "epoch": 1.0897245195454275, "grad_norm": 0.35559337217271153, "learning_rate": 4.519806556775043e-06, "loss": 0.3105, "step": 4578 }, { "epoch": 1.0899625156184922, "grad_norm": 0.36470318443377925, "learning_rate": 4.517887908642882e-06, "loss": 0.2944, "step": 4579 }, { "epoch": 1.0902005116915572, "grad_norm": 0.3917075725340582, "learning_rate": 4.515969332164504e-06, "loss": 0.3561, "step": 4580 }, { "epoch": 1.090438507764622, "grad_norm": 0.3706508136005053, "learning_rate": 4.514050827625058e-06, "loss": 0.3862, "step": 4581 }, { "epoch": 1.0906765038376867, "grad_norm": 0.38950829726986785, "learning_rate": 4.512132395309681e-06, "loss": 0.2885, "step": 4582 }, { "epoch": 1.0909144999107514, "grad_norm": 0.3771455973737931, "learning_rate": 4.510214035503499e-06, "loss": 0.3464, "step": 4583 }, { "epoch": 1.0911524959838164, "grad_norm": 0.36988427743457747, "learning_rate": 4.508295748491628e-06, "loss": 0.4167, "step": 4584 }, { "epoch": 1.091390492056881, "grad_norm": 0.35658085377837206, "learning_rate": 4.506377534559174e-06, "loss": 0.3147, "step": 4585 }, { "epoch": 1.0916284881299458, "grad_norm": 0.3761893075654618, "learning_rate": 4.504459393991229e-06, "loss": 0.2916, "step": 4586 }, { "epoch": 1.0918664842030106, "grad_norm": 0.3778558101407558, "learning_rate": 4.502541327072877e-06, "loss": 0.3372, "step": 4587 }, { "epoch": 1.0921044802760755, "grad_norm": 0.3819227496695729, "learning_rate": 4.50062333408919e-06, "loss": 0.4006, "step": 4588 }, { "epoch": 1.0923424763491403, "grad_norm": 0.4723091940072908, "learning_rate": 4.49870541532523e-06, "loss": 0.3153, "step": 4589 }, { "epoch": 1.092580472422205, "grad_norm": 0.45016072076415764, "learning_rate": 4.496787571066047e-06, "loss": 0.3088, "step": 4590 }, { "epoch": 1.0928184684952698, "grad_norm": 0.3866045379739883, "learning_rate": 4.494869801596679e-06, "loss": 0.3687, "step": 4591 }, { "epoch": 1.0930564645683347, "grad_norm": 0.3739372092733529, "learning_rate": 4.492952107202154e-06, "loss": 0.2934, "step": 4592 }, { "epoch": 1.0932944606413995, "grad_norm": 0.35295097365438255, "learning_rate": 4.49103448816749e-06, "loss": 0.2993, "step": 4593 }, { "epoch": 1.0935324567144642, "grad_norm": 0.3938263757181118, "learning_rate": 4.489116944777694e-06, "loss": 0.3359, "step": 4594 }, { "epoch": 1.093770452787529, "grad_norm": 0.3831990383015829, "learning_rate": 4.487199477317758e-06, "loss": 0.3769, "step": 4595 }, { "epoch": 1.094008448860594, "grad_norm": 0.35100904325779053, "learning_rate": 4.485282086072666e-06, "loss": 0.2762, "step": 4596 }, { "epoch": 1.0942464449336586, "grad_norm": 0.38231100716355565, "learning_rate": 4.48336477132739e-06, "loss": 0.2986, "step": 4597 }, { "epoch": 1.0944844410067234, "grad_norm": 0.36444180770894197, "learning_rate": 4.4814475333668884e-06, "loss": 0.3311, "step": 4598 }, { "epoch": 1.0947224370797881, "grad_norm": 0.36930290531645005, "learning_rate": 4.479530372476113e-06, "loss": 0.3345, "step": 4599 }, { "epoch": 1.094960433152853, "grad_norm": 0.38130542257095973, "learning_rate": 4.477613288939999e-06, "loss": 0.2754, "step": 4600 }, { "epoch": 1.0951984292259178, "grad_norm": 0.371481003330978, "learning_rate": 4.4756962830434735e-06, "loss": 0.3172, "step": 4601 }, { "epoch": 1.0954364252989826, "grad_norm": 0.3810788506730066, "learning_rate": 4.4737793550714515e-06, "loss": 0.3931, "step": 4602 }, { "epoch": 1.0956744213720473, "grad_norm": 0.35314794218327844, "learning_rate": 4.471862505308835e-06, "loss": 0.2966, "step": 4603 }, { "epoch": 1.0959124174451123, "grad_norm": 0.3836019074456394, "learning_rate": 4.469945734040516e-06, "loss": 0.331, "step": 4604 }, { "epoch": 1.096150413518177, "grad_norm": 0.42836723354827033, "learning_rate": 4.468029041551372e-06, "loss": 0.3328, "step": 4605 }, { "epoch": 1.0963884095912417, "grad_norm": 0.3480001600905687, "learning_rate": 4.466112428126275e-06, "loss": 0.3601, "step": 4606 }, { "epoch": 1.0966264056643065, "grad_norm": 0.3885251422953546, "learning_rate": 4.464195894050079e-06, "loss": 0.2733, "step": 4607 }, { "epoch": 1.0968644017373714, "grad_norm": 0.3940111256961415, "learning_rate": 4.462279439607628e-06, "loss": 0.3078, "step": 4608 }, { "epoch": 1.0971023978104362, "grad_norm": 0.41464737325896495, "learning_rate": 4.4603630650837545e-06, "loss": 0.4018, "step": 4609 }, { "epoch": 1.097340393883501, "grad_norm": 0.3657995338802541, "learning_rate": 4.4584467707632804e-06, "loss": 0.2893, "step": 4610 }, { "epoch": 1.0975783899565656, "grad_norm": 0.3670886985872823, "learning_rate": 4.456530556931013e-06, "loss": 0.2717, "step": 4611 }, { "epoch": 1.0978163860296306, "grad_norm": 0.3815190866043315, "learning_rate": 4.454614423871749e-06, "loss": 0.3265, "step": 4612 }, { "epoch": 1.0980543821026953, "grad_norm": 0.3479008384560524, "learning_rate": 4.4526983718702745e-06, "loss": 0.3696, "step": 4613 }, { "epoch": 1.09829237817576, "grad_norm": 0.3510433837303061, "learning_rate": 4.450782401211362e-06, "loss": 0.2636, "step": 4614 }, { "epoch": 1.0985303742488248, "grad_norm": 0.3804726727656412, "learning_rate": 4.4488665121797696e-06, "loss": 0.3307, "step": 4615 }, { "epoch": 1.0987683703218898, "grad_norm": 0.3614441700666874, "learning_rate": 4.446950705060249e-06, "loss": 0.3949, "step": 4616 }, { "epoch": 1.0990063663949545, "grad_norm": 0.37288138794486936, "learning_rate": 4.445034980137536e-06, "loss": 0.2985, "step": 4617 }, { "epoch": 1.0992443624680193, "grad_norm": 0.3609717776051928, "learning_rate": 4.4431193376963534e-06, "loss": 0.2821, "step": 4618 }, { "epoch": 1.099482358541084, "grad_norm": 0.377067343268631, "learning_rate": 4.441203778021412e-06, "loss": 0.3213, "step": 4619 }, { "epoch": 1.099720354614149, "grad_norm": 0.385028432731855, "learning_rate": 4.439288301397416e-06, "loss": 0.3769, "step": 4620 }, { "epoch": 1.0999583506872137, "grad_norm": 0.3857984245884134, "learning_rate": 4.437372908109049e-06, "loss": 0.284, "step": 4621 }, { "epoch": 1.1001963467602784, "grad_norm": 0.3804900796934547, "learning_rate": 4.435457598440987e-06, "loss": 0.3023, "step": 4622 }, { "epoch": 1.1004343428333432, "grad_norm": 0.4095757121830929, "learning_rate": 4.4335423726778914e-06, "loss": 0.3569, "step": 4623 }, { "epoch": 1.1006723389064081, "grad_norm": 0.3626590444928867, "learning_rate": 4.431627231104413e-06, "loss": 0.3528, "step": 4624 }, { "epoch": 1.1009103349794729, "grad_norm": 0.3950093241561531, "learning_rate": 4.429712174005189e-06, "loss": 0.299, "step": 4625 }, { "epoch": 1.1011483310525376, "grad_norm": 0.35813564107373463, "learning_rate": 4.427797201664844e-06, "loss": 0.3488, "step": 4626 }, { "epoch": 1.1013863271256024, "grad_norm": 0.39623477898910664, "learning_rate": 4.425882314367991e-06, "loss": 0.3686, "step": 4627 }, { "epoch": 1.1016243231986673, "grad_norm": 0.3536379619561069, "learning_rate": 4.423967512399228e-06, "loss": 0.3094, "step": 4628 }, { "epoch": 1.101862319271732, "grad_norm": 0.361987975704904, "learning_rate": 4.4220527960431435e-06, "loss": 0.2832, "step": 4629 }, { "epoch": 1.1021003153447968, "grad_norm": 0.40248707316608023, "learning_rate": 4.420138165584311e-06, "loss": 0.3405, "step": 4630 }, { "epoch": 1.1023383114178615, "grad_norm": 0.37274174398754245, "learning_rate": 4.418223621307293e-06, "loss": 0.3721, "step": 4631 }, { "epoch": 1.1025763074909265, "grad_norm": 0.3929251247694433, "learning_rate": 4.416309163496635e-06, "loss": 0.2833, "step": 4632 }, { "epoch": 1.1028143035639912, "grad_norm": 0.41200450576439623, "learning_rate": 4.414394792436877e-06, "loss": 0.3211, "step": 4633 }, { "epoch": 1.103052299637056, "grad_norm": 0.39715739923858967, "learning_rate": 4.41248050841254e-06, "loss": 0.3868, "step": 4634 }, { "epoch": 1.1032902957101207, "grad_norm": 0.36901909132216004, "learning_rate": 4.410566311708134e-06, "loss": 0.316, "step": 4635 }, { "epoch": 1.1035282917831857, "grad_norm": 0.3902124213524561, "learning_rate": 4.408652202608156e-06, "loss": 0.2951, "step": 4636 }, { "epoch": 1.1037662878562504, "grad_norm": 0.37677094100062875, "learning_rate": 4.40673818139709e-06, "loss": 0.3694, "step": 4637 }, { "epoch": 1.1040042839293152, "grad_norm": 0.4185542623961448, "learning_rate": 4.404824248359407e-06, "loss": 0.3527, "step": 4638 }, { "epoch": 1.10424228000238, "grad_norm": 0.3668392732421244, "learning_rate": 4.402910403779564e-06, "loss": 0.273, "step": 4639 }, { "epoch": 1.1044802760754449, "grad_norm": 0.370730106826423, "learning_rate": 4.400996647942007e-06, "loss": 0.3284, "step": 4640 }, { "epoch": 1.1047182721485096, "grad_norm": 0.3592174118196979, "learning_rate": 4.399082981131166e-06, "loss": 0.393, "step": 4641 }, { "epoch": 1.1049562682215743, "grad_norm": 0.36645767089673803, "learning_rate": 4.39716940363146e-06, "loss": 0.3077, "step": 4642 }, { "epoch": 1.105194264294639, "grad_norm": 0.35544644161118766, "learning_rate": 4.395255915727294e-06, "loss": 0.278, "step": 4643 }, { "epoch": 1.105432260367704, "grad_norm": 0.3834171151203786, "learning_rate": 4.39334251770306e-06, "loss": 0.3348, "step": 4644 }, { "epoch": 1.1056702564407688, "grad_norm": 0.3825948031558651, "learning_rate": 4.391429209843135e-06, "loss": 0.387, "step": 4645 }, { "epoch": 1.1059082525138335, "grad_norm": 0.35003853431694987, "learning_rate": 4.389515992431884e-06, "loss": 0.3046, "step": 4646 }, { "epoch": 1.1061462485868983, "grad_norm": 0.3825762439469127, "learning_rate": 4.387602865753661e-06, "loss": 0.327, "step": 4647 }, { "epoch": 1.1063842446599632, "grad_norm": 0.3975773348300809, "learning_rate": 4.385689830092802e-06, "loss": 0.3667, "step": 4648 }, { "epoch": 1.106622240733028, "grad_norm": 0.4272073165238033, "learning_rate": 4.383776885733631e-06, "loss": 0.337, "step": 4649 }, { "epoch": 1.1068602368060927, "grad_norm": 0.38783082489646, "learning_rate": 4.3818640329604594e-06, "loss": 0.3064, "step": 4650 }, { "epoch": 1.1070982328791574, "grad_norm": 0.37650129964931706, "learning_rate": 4.3799512720575845e-06, "loss": 0.3432, "step": 4651 }, { "epoch": 1.1073362289522224, "grad_norm": 0.3741163747344154, "learning_rate": 4.3780386033092905e-06, "loss": 0.3701, "step": 4652 }, { "epoch": 1.1075742250252871, "grad_norm": 0.37940812325594914, "learning_rate": 4.376126026999846e-06, "loss": 0.3167, "step": 4653 }, { "epoch": 1.1078122210983519, "grad_norm": 0.3866628177506814, "learning_rate": 4.374213543413508e-06, "loss": 0.3018, "step": 4654 }, { "epoch": 1.1080502171714166, "grad_norm": 0.4190491021927373, "learning_rate": 4.372301152834518e-06, "loss": 0.3635, "step": 4655 }, { "epoch": 1.1082882132444816, "grad_norm": 0.36038485452576885, "learning_rate": 4.370388855547106e-06, "loss": 0.3567, "step": 4656 }, { "epoch": 1.1085262093175463, "grad_norm": 0.36962141459506104, "learning_rate": 4.368476651835485e-06, "loss": 0.2713, "step": 4657 }, { "epoch": 1.108764205390611, "grad_norm": 0.3760349048148074, "learning_rate": 4.366564541983858e-06, "loss": 0.315, "step": 4658 }, { "epoch": 1.1090022014636758, "grad_norm": 0.39751754536485306, "learning_rate": 4.36465252627641e-06, "loss": 0.3915, "step": 4659 }, { "epoch": 1.1092401975367407, "grad_norm": 0.380234301198992, "learning_rate": 4.362740604997312e-06, "loss": 0.3096, "step": 4660 }, { "epoch": 1.1094781936098055, "grad_norm": 0.3650822635588656, "learning_rate": 4.360828778430728e-06, "loss": 0.2777, "step": 4661 }, { "epoch": 1.1097161896828702, "grad_norm": 0.38870114316759063, "learning_rate": 4.358917046860799e-06, "loss": 0.3885, "step": 4662 }, { "epoch": 1.109954185755935, "grad_norm": 0.3821913095469407, "learning_rate": 4.357005410571657e-06, "loss": 0.3813, "step": 4663 }, { "epoch": 1.110192181829, "grad_norm": 0.38268196285227385, "learning_rate": 4.355093869847418e-06, "loss": 0.2743, "step": 4664 }, { "epoch": 1.1104301779020647, "grad_norm": 0.3950888429234955, "learning_rate": 4.353182424972184e-06, "loss": 0.3504, "step": 4665 }, { "epoch": 1.1106681739751294, "grad_norm": 0.38723636956923085, "learning_rate": 4.351271076230042e-06, "loss": 0.4136, "step": 4666 }, { "epoch": 1.1109061700481941, "grad_norm": 0.3745288707524673, "learning_rate": 4.349359823905068e-06, "loss": 0.3286, "step": 4667 }, { "epoch": 1.111144166121259, "grad_norm": 0.35266375814406953, "learning_rate": 4.34744866828132e-06, "loss": 0.2839, "step": 4668 }, { "epoch": 1.1113821621943238, "grad_norm": 0.4116398445629615, "learning_rate": 4.345537609642843e-06, "loss": 0.2851, "step": 4669 }, { "epoch": 1.1116201582673886, "grad_norm": 0.4227512343669503, "learning_rate": 4.343626648273667e-06, "loss": 0.3862, "step": 4670 }, { "epoch": 1.1118581543404533, "grad_norm": 0.40325487336775656, "learning_rate": 4.34171578445781e-06, "loss": 0.3086, "step": 4671 }, { "epoch": 1.1120961504135183, "grad_norm": 0.41223011157726425, "learning_rate": 4.339805018479273e-06, "loss": 0.2965, "step": 4672 }, { "epoch": 1.112334146486583, "grad_norm": 0.3738155892736554, "learning_rate": 4.337894350622043e-06, "loss": 0.371, "step": 4673 }, { "epoch": 1.1125721425596478, "grad_norm": 0.3812405808634638, "learning_rate": 4.335983781170089e-06, "loss": 0.3477, "step": 4674 }, { "epoch": 1.1128101386327125, "grad_norm": 0.4077123636219675, "learning_rate": 4.334073310407375e-06, "loss": 0.283, "step": 4675 }, { "epoch": 1.1130481347057772, "grad_norm": 0.36774441909673067, "learning_rate": 4.332162938617841e-06, "loss": 0.3361, "step": 4676 }, { "epoch": 1.1132861307788422, "grad_norm": 0.38011910226148504, "learning_rate": 4.3302526660854155e-06, "loss": 0.4107, "step": 4677 }, { "epoch": 1.113524126851907, "grad_norm": 0.37483811120325883, "learning_rate": 4.3283424930940135e-06, "loss": 0.3076, "step": 4678 }, { "epoch": 1.1137621229249717, "grad_norm": 0.4000942474412326, "learning_rate": 4.326432419927532e-06, "loss": 0.3053, "step": 4679 }, { "epoch": 1.1140001189980366, "grad_norm": 0.4085016676353177, "learning_rate": 4.324522446869856e-06, "loss": 0.3721, "step": 4680 }, { "epoch": 1.1142381150711014, "grad_norm": 0.38122936015480213, "learning_rate": 4.322612574204856e-06, "loss": 0.3561, "step": 4681 }, { "epoch": 1.1144761111441661, "grad_norm": 0.46561579745978454, "learning_rate": 4.320702802216384e-06, "loss": 0.2734, "step": 4682 }, { "epoch": 1.1147141072172309, "grad_norm": 0.38133591698111996, "learning_rate": 4.318793131188281e-06, "loss": 0.3366, "step": 4683 }, { "epoch": 1.1149521032902956, "grad_norm": 0.39153072129909505, "learning_rate": 4.316883561404371e-06, "loss": 0.3929, "step": 4684 }, { "epoch": 1.1151900993633606, "grad_norm": 0.39464520028456496, "learning_rate": 4.314974093148464e-06, "loss": 0.2922, "step": 4685 }, { "epoch": 1.1154280954364253, "grad_norm": 0.3582950558941121, "learning_rate": 4.313064726704352e-06, "loss": 0.2963, "step": 4686 }, { "epoch": 1.11566609150949, "grad_norm": 0.39622662281594123, "learning_rate": 4.311155462355817e-06, "loss": 0.3557, "step": 4687 }, { "epoch": 1.115904087582555, "grad_norm": 0.38590265523350004, "learning_rate": 4.309246300386619e-06, "loss": 0.3763, "step": 4688 }, { "epoch": 1.1161420836556197, "grad_norm": 0.3951607366057468, "learning_rate": 4.3073372410805115e-06, "loss": 0.2872, "step": 4689 }, { "epoch": 1.1163800797286845, "grad_norm": 0.3600542205066233, "learning_rate": 4.305428284721225e-06, "loss": 0.3088, "step": 4690 }, { "epoch": 1.1166180758017492, "grad_norm": 0.37643095384884356, "learning_rate": 4.303519431592479e-06, "loss": 0.3968, "step": 4691 }, { "epoch": 1.116856071874814, "grad_norm": 0.36518409589510575, "learning_rate": 4.301610681977975e-06, "loss": 0.3333, "step": 4692 }, { "epoch": 1.117094067947879, "grad_norm": 0.37667222528849537, "learning_rate": 4.2997020361614e-06, "loss": 0.2865, "step": 4693 }, { "epoch": 1.1173320640209436, "grad_norm": 0.4077874035405591, "learning_rate": 4.297793494426429e-06, "loss": 0.3299, "step": 4694 }, { "epoch": 1.1175700600940084, "grad_norm": 0.36323609120530465, "learning_rate": 4.295885057056716e-06, "loss": 0.4048, "step": 4695 }, { "epoch": 1.1178080561670733, "grad_norm": 0.36319522250442954, "learning_rate": 4.293976724335901e-06, "loss": 0.2963, "step": 4696 }, { "epoch": 1.118046052240138, "grad_norm": 0.4272106433249216, "learning_rate": 4.292068496547612e-06, "loss": 0.3082, "step": 4697 }, { "epoch": 1.1182840483132028, "grad_norm": 0.3620957198196407, "learning_rate": 4.290160373975457e-06, "loss": 0.338, "step": 4698 }, { "epoch": 1.1185220443862676, "grad_norm": 0.36206405693132543, "learning_rate": 4.2882523569030325e-06, "loss": 0.3244, "step": 4699 }, { "epoch": 1.1187600404593323, "grad_norm": 0.40476613713113396, "learning_rate": 4.286344445613914e-06, "loss": 0.2865, "step": 4700 }, { "epoch": 1.1189980365323973, "grad_norm": 0.4027176453027199, "learning_rate": 4.2844366403916654e-06, "loss": 0.3403, "step": 4701 }, { "epoch": 1.119236032605462, "grad_norm": 0.37078100855116464, "learning_rate": 4.282528941519836e-06, "loss": 0.389, "step": 4702 }, { "epoch": 1.1194740286785267, "grad_norm": 0.35909975767598074, "learning_rate": 4.280621349281954e-06, "loss": 0.2934, "step": 4703 }, { "epoch": 1.1197120247515917, "grad_norm": 0.39195993134735413, "learning_rate": 4.278713863961538e-06, "loss": 0.2757, "step": 4704 }, { "epoch": 1.1199500208246564, "grad_norm": 0.4917626986677953, "learning_rate": 4.276806485842084e-06, "loss": 0.3648, "step": 4705 }, { "epoch": 1.1201880168977212, "grad_norm": 0.3819285748588736, "learning_rate": 4.274899215207077e-06, "loss": 0.3353, "step": 4706 }, { "epoch": 1.120426012970786, "grad_norm": 0.3671927065772577, "learning_rate": 4.272992052339986e-06, "loss": 0.2612, "step": 4707 }, { "epoch": 1.1206640090438507, "grad_norm": 0.44096283257819413, "learning_rate": 4.271084997524261e-06, "loss": 0.3343, "step": 4708 }, { "epoch": 1.1209020051169156, "grad_norm": 0.41801562980887697, "learning_rate": 4.269178051043336e-06, "loss": 0.3761, "step": 4709 }, { "epoch": 1.1211400011899804, "grad_norm": 0.3605543613421053, "learning_rate": 4.2672712131806334e-06, "loss": 0.2884, "step": 4710 }, { "epoch": 1.121377997263045, "grad_norm": 0.4186671411554116, "learning_rate": 4.265364484219556e-06, "loss": 0.286, "step": 4711 }, { "epoch": 1.12161599333611, "grad_norm": 0.3763168545269733, "learning_rate": 4.263457864443491e-06, "loss": 0.3482, "step": 4712 }, { "epoch": 1.1218539894091748, "grad_norm": 0.3640333763753411, "learning_rate": 4.261551354135807e-06, "loss": 0.3576, "step": 4713 }, { "epoch": 1.1220919854822395, "grad_norm": 0.3807965516234715, "learning_rate": 4.259644953579861e-06, "loss": 0.2808, "step": 4714 }, { "epoch": 1.1223299815553043, "grad_norm": 0.38737302259739137, "learning_rate": 4.2577386630589875e-06, "loss": 0.3466, "step": 4715 }, { "epoch": 1.122567977628369, "grad_norm": 0.3709834921935741, "learning_rate": 4.255832482856514e-06, "loss": 0.3879, "step": 4716 }, { "epoch": 1.122805973701434, "grad_norm": 0.397332853820356, "learning_rate": 4.253926413255743e-06, "loss": 0.3053, "step": 4717 }, { "epoch": 1.1230439697744987, "grad_norm": 0.37589401628573843, "learning_rate": 4.252020454539965e-06, "loss": 0.3125, "step": 4718 }, { "epoch": 1.1232819658475635, "grad_norm": 0.39174326299097356, "learning_rate": 4.250114606992451e-06, "loss": 0.3167, "step": 4719 }, { "epoch": 1.1235199619206284, "grad_norm": 0.3617505028367288, "learning_rate": 4.248208870896456e-06, "loss": 0.3868, "step": 4720 }, { "epoch": 1.1237579579936932, "grad_norm": 0.401359881455322, "learning_rate": 4.246303246535224e-06, "loss": 0.2771, "step": 4721 }, { "epoch": 1.123995954066758, "grad_norm": 0.3950415349640952, "learning_rate": 4.244397734191973e-06, "loss": 0.3337, "step": 4722 }, { "epoch": 1.1242339501398226, "grad_norm": 0.40327719323124134, "learning_rate": 4.242492334149911e-06, "loss": 0.4014, "step": 4723 }, { "epoch": 1.1244719462128874, "grad_norm": 0.37522594034975987, "learning_rate": 4.2405870466922295e-06, "loss": 0.3311, "step": 4724 }, { "epoch": 1.1247099422859523, "grad_norm": 0.3900606525913724, "learning_rate": 4.2386818721021e-06, "loss": 0.2906, "step": 4725 }, { "epoch": 1.124947938359017, "grad_norm": 0.36868405071537597, "learning_rate": 4.236776810662677e-06, "loss": 0.3458, "step": 4726 }, { "epoch": 1.1251859344320818, "grad_norm": 0.3751466988447734, "learning_rate": 4.2348718626571024e-06, "loss": 0.3748, "step": 4727 }, { "epoch": 1.1254239305051468, "grad_norm": 0.3635206406113209, "learning_rate": 4.232967028368498e-06, "loss": 0.27, "step": 4728 }, { "epoch": 1.1256619265782115, "grad_norm": 0.3790908884920694, "learning_rate": 4.231062308079965e-06, "loss": 0.2931, "step": 4729 }, { "epoch": 1.1258999226512763, "grad_norm": 0.3699061956945172, "learning_rate": 4.229157702074598e-06, "loss": 0.3573, "step": 4730 }, { "epoch": 1.126137918724341, "grad_norm": 0.3541326251134355, "learning_rate": 4.227253210635467e-06, "loss": 0.3273, "step": 4731 }, { "epoch": 1.1263759147974057, "grad_norm": 0.3706068751730151, "learning_rate": 4.225348834045625e-06, "loss": 0.3089, "step": 4732 }, { "epoch": 1.1266139108704707, "grad_norm": 0.36912447443811713, "learning_rate": 4.223444572588111e-06, "loss": 0.3112, "step": 4733 }, { "epoch": 1.1268519069435354, "grad_norm": 0.38272894471100344, "learning_rate": 4.221540426545943e-06, "loss": 0.3732, "step": 4734 }, { "epoch": 1.1270899030166002, "grad_norm": 0.3659819182595056, "learning_rate": 4.219636396202127e-06, "loss": 0.3125, "step": 4735 }, { "epoch": 1.1273278990896651, "grad_norm": 0.4029280672903165, "learning_rate": 4.217732481839647e-06, "loss": 0.2993, "step": 4736 }, { "epoch": 1.1275658951627299, "grad_norm": 0.3471586840183988, "learning_rate": 4.215828683741473e-06, "loss": 0.3264, "step": 4737 }, { "epoch": 1.1278038912357946, "grad_norm": 0.40496516884275047, "learning_rate": 4.2139250021905564e-06, "loss": 0.3911, "step": 4738 }, { "epoch": 1.1280418873088593, "grad_norm": 0.3604417470905285, "learning_rate": 4.212021437469831e-06, "loss": 0.2733, "step": 4739 }, { "epoch": 1.128279883381924, "grad_norm": 0.4045782091345007, "learning_rate": 4.210117989862213e-06, "loss": 0.3166, "step": 4740 }, { "epoch": 1.128517879454989, "grad_norm": 0.42245205724517865, "learning_rate": 4.208214659650603e-06, "loss": 0.3806, "step": 4741 }, { "epoch": 1.1287558755280538, "grad_norm": 0.37772806607684767, "learning_rate": 4.206311447117883e-06, "loss": 0.3229, "step": 4742 }, { "epoch": 1.1289938716011185, "grad_norm": 0.45042461811956686, "learning_rate": 4.204408352546914e-06, "loss": 0.2872, "step": 4743 }, { "epoch": 1.1292318676741835, "grad_norm": 0.41146418750242547, "learning_rate": 4.202505376220548e-06, "loss": 0.3595, "step": 4744 }, { "epoch": 1.1294698637472482, "grad_norm": 0.37141506761563525, "learning_rate": 4.2006025184216124e-06, "loss": 0.387, "step": 4745 }, { "epoch": 1.129707859820313, "grad_norm": 0.4033866279526767, "learning_rate": 4.198699779432918e-06, "loss": 0.2798, "step": 4746 }, { "epoch": 1.1299458558933777, "grad_norm": 0.39808863942798545, "learning_rate": 4.19679715953726e-06, "loss": 0.3075, "step": 4747 }, { "epoch": 1.1301838519664424, "grad_norm": 0.35618340589269987, "learning_rate": 4.194894659017415e-06, "loss": 0.3694, "step": 4748 }, { "epoch": 1.1304218480395074, "grad_norm": 0.3632549519200726, "learning_rate": 4.192992278156141e-06, "loss": 0.3462, "step": 4749 }, { "epoch": 1.1306598441125721, "grad_norm": 0.4018427851047768, "learning_rate": 4.191090017236177e-06, "loss": 0.2608, "step": 4750 }, { "epoch": 1.1308978401856369, "grad_norm": 0.3889863500503954, "learning_rate": 4.189187876540249e-06, "loss": 0.3352, "step": 4751 }, { "epoch": 1.1311358362587018, "grad_norm": 0.3495459311201199, "learning_rate": 4.187285856351061e-06, "loss": 0.3973, "step": 4752 }, { "epoch": 1.1313738323317666, "grad_norm": 0.3678770041190173, "learning_rate": 4.1853839569513015e-06, "loss": 0.295, "step": 4753 }, { "epoch": 1.1316118284048313, "grad_norm": 0.4160690563278992, "learning_rate": 4.1834821786236375e-06, "loss": 0.3, "step": 4754 }, { "epoch": 1.131849824477896, "grad_norm": 0.3814427610026273, "learning_rate": 4.181580521650722e-06, "loss": 0.3727, "step": 4755 }, { "epoch": 1.1320878205509608, "grad_norm": 0.35113445654594827, "learning_rate": 4.179678986315185e-06, "loss": 0.317, "step": 4756 }, { "epoch": 1.1323258166240258, "grad_norm": 0.38027676840048824, "learning_rate": 4.177777572899647e-06, "loss": 0.2754, "step": 4757 }, { "epoch": 1.1325638126970905, "grad_norm": 0.3539448580155885, "learning_rate": 4.175876281686702e-06, "loss": 0.3154, "step": 4758 }, { "epoch": 1.1328018087701552, "grad_norm": 0.3741958790250975, "learning_rate": 4.17397511295893e-06, "loss": 0.4037, "step": 4759 }, { "epoch": 1.1330398048432202, "grad_norm": 0.35759297486340763, "learning_rate": 4.17207406699889e-06, "loss": 0.2913, "step": 4760 }, { "epoch": 1.133277800916285, "grad_norm": 0.41006094302343415, "learning_rate": 4.170173144089127e-06, "loss": 0.3066, "step": 4761 }, { "epoch": 1.1335157969893497, "grad_norm": 0.3764612848174041, "learning_rate": 4.168272344512163e-06, "loss": 0.3449, "step": 4762 }, { "epoch": 1.1337537930624144, "grad_norm": 0.3676606468861143, "learning_rate": 4.1663716685505026e-06, "loss": 0.353, "step": 4763 }, { "epoch": 1.1339917891354792, "grad_norm": 0.43991654949679093, "learning_rate": 4.164471116486638e-06, "loss": 0.3147, "step": 4764 }, { "epoch": 1.1342297852085441, "grad_norm": 0.389125286946846, "learning_rate": 4.162570688603035e-06, "loss": 0.2964, "step": 4765 }, { "epoch": 1.1344677812816089, "grad_norm": 0.3953023155670322, "learning_rate": 4.160670385182144e-06, "loss": 0.3646, "step": 4766 }, { "epoch": 1.1347057773546736, "grad_norm": 0.3803124764959841, "learning_rate": 4.158770206506398e-06, "loss": 0.3455, "step": 4767 }, { "epoch": 1.1349437734277386, "grad_norm": 0.3932564435551, "learning_rate": 4.1568701528582105e-06, "loss": 0.3009, "step": 4768 }, { "epoch": 1.1351817695008033, "grad_norm": 0.37337164388298766, "learning_rate": 4.154970224519976e-06, "loss": 0.3482, "step": 4769 }, { "epoch": 1.135419765573868, "grad_norm": 0.3899160813781137, "learning_rate": 4.15307042177407e-06, "loss": 0.4065, "step": 4770 }, { "epoch": 1.1356577616469328, "grad_norm": 0.35579178905990316, "learning_rate": 4.151170744902852e-06, "loss": 0.3079, "step": 4771 }, { "epoch": 1.1358957577199975, "grad_norm": 0.37945121820654776, "learning_rate": 4.149271194188662e-06, "loss": 0.3105, "step": 4772 }, { "epoch": 1.1361337537930625, "grad_norm": 0.4165547341391943, "learning_rate": 4.147371769913817e-06, "loss": 0.3696, "step": 4773 }, { "epoch": 1.1363717498661272, "grad_norm": 0.376295683219943, "learning_rate": 4.145472472360621e-06, "loss": 0.3747, "step": 4774 }, { "epoch": 1.136609745939192, "grad_norm": 0.3886386600985121, "learning_rate": 4.143573301811355e-06, "loss": 0.2783, "step": 4775 }, { "epoch": 1.136847742012257, "grad_norm": 0.40835168558501084, "learning_rate": 4.141674258548284e-06, "loss": 0.3384, "step": 4776 }, { "epoch": 1.1370857380853217, "grad_norm": 0.371459747825813, "learning_rate": 4.13977534285365e-06, "loss": 0.383, "step": 4777 }, { "epoch": 1.1373237341583864, "grad_norm": 0.3944390627645514, "learning_rate": 4.137876555009684e-06, "loss": 0.3055, "step": 4778 }, { "epoch": 1.1375617302314511, "grad_norm": 0.41245043405705123, "learning_rate": 4.135977895298588e-06, "loss": 0.3053, "step": 4779 }, { "epoch": 1.1377997263045159, "grad_norm": 0.3846823535751807, "learning_rate": 4.134079364002552e-06, "loss": 0.3395, "step": 4780 }, { "epoch": 1.1380377223775808, "grad_norm": 0.39696399724800313, "learning_rate": 4.132180961403744e-06, "loss": 0.3519, "step": 4781 }, { "epoch": 1.1382757184506456, "grad_norm": 0.39325849294481, "learning_rate": 4.130282687784315e-06, "loss": 0.2742, "step": 4782 }, { "epoch": 1.1385137145237103, "grad_norm": 0.40331167287236297, "learning_rate": 4.1283845434263935e-06, "loss": 0.3036, "step": 4783 }, { "epoch": 1.1387517105967753, "grad_norm": 0.4029530512494824, "learning_rate": 4.12648652861209e-06, "loss": 0.3796, "step": 4784 }, { "epoch": 1.13898970666984, "grad_norm": 0.3698197939582225, "learning_rate": 4.1245886436235e-06, "loss": 0.3043, "step": 4785 }, { "epoch": 1.1392277027429047, "grad_norm": 0.37697533259800486, "learning_rate": 4.122690888742694e-06, "loss": 0.2891, "step": 4786 }, { "epoch": 1.1394656988159695, "grad_norm": 0.39557375149649615, "learning_rate": 4.120793264251726e-06, "loss": 0.3232, "step": 4787 }, { "epoch": 1.1397036948890342, "grad_norm": 0.364829176455308, "learning_rate": 4.118895770432629e-06, "loss": 0.3722, "step": 4788 }, { "epoch": 1.1399416909620992, "grad_norm": 0.3778167261258978, "learning_rate": 4.1169984075674184e-06, "loss": 0.2911, "step": 4789 }, { "epoch": 1.140179687035164, "grad_norm": 0.44220258438829385, "learning_rate": 4.115101175938088e-06, "loss": 0.3305, "step": 4790 }, { "epoch": 1.1404176831082287, "grad_norm": 0.36291623619174723, "learning_rate": 4.113204075826614e-06, "loss": 0.3847, "step": 4791 }, { "epoch": 1.1406556791812936, "grad_norm": 0.36770447829250785, "learning_rate": 4.111307107514953e-06, "loss": 0.3274, "step": 4792 }, { "epoch": 1.1408936752543584, "grad_norm": 0.35815092054120873, "learning_rate": 4.109410271285042e-06, "loss": 0.2641, "step": 4793 }, { "epoch": 1.141131671327423, "grad_norm": 0.36035533257546154, "learning_rate": 4.107513567418796e-06, "loss": 0.3437, "step": 4794 }, { "epoch": 1.1413696674004878, "grad_norm": 0.3963850334927369, "learning_rate": 4.105616996198113e-06, "loss": 0.3981, "step": 4795 }, { "epoch": 1.1416076634735526, "grad_norm": 0.3367518459228409, "learning_rate": 4.10372055790487e-06, "loss": 0.3095, "step": 4796 }, { "epoch": 1.1418456595466175, "grad_norm": 0.38016156127534406, "learning_rate": 4.101824252820926e-06, "loss": 0.3133, "step": 4797 }, { "epoch": 1.1420836556196823, "grad_norm": 0.3912648891515814, "learning_rate": 4.099928081228115e-06, "loss": 0.3705, "step": 4798 }, { "epoch": 1.142321651692747, "grad_norm": 0.37387297859975754, "learning_rate": 4.09803204340826e-06, "loss": 0.3113, "step": 4799 }, { "epoch": 1.142559647765812, "grad_norm": 0.3845831972941408, "learning_rate": 4.096136139643158e-06, "loss": 0.3138, "step": 4800 }, { "epoch": 1.1427976438388767, "grad_norm": 0.3742150865042121, "learning_rate": 4.094240370214585e-06, "loss": 0.3117, "step": 4801 }, { "epoch": 1.1430356399119415, "grad_norm": 0.39531922526195684, "learning_rate": 4.0923447354043e-06, "loss": 0.3866, "step": 4802 }, { "epoch": 1.1432736359850062, "grad_norm": 0.36395182823896827, "learning_rate": 4.090449235494043e-06, "loss": 0.2797, "step": 4803 }, { "epoch": 1.143511632058071, "grad_norm": 0.36855555988204036, "learning_rate": 4.088553870765529e-06, "loss": 0.2781, "step": 4804 }, { "epoch": 1.143749628131136, "grad_norm": 0.37855067257091296, "learning_rate": 4.086658641500458e-06, "loss": 0.3791, "step": 4805 }, { "epoch": 1.1439876242042006, "grad_norm": 0.3872640810576243, "learning_rate": 4.0847635479805095e-06, "loss": 0.3539, "step": 4806 }, { "epoch": 1.1442256202772654, "grad_norm": 0.47053726164577175, "learning_rate": 4.082868590487339e-06, "loss": 0.2798, "step": 4807 }, { "epoch": 1.1444636163503303, "grad_norm": 0.36992308839073973, "learning_rate": 4.080973769302584e-06, "loss": 0.3663, "step": 4808 }, { "epoch": 1.144701612423395, "grad_norm": 0.3820408245781083, "learning_rate": 4.079079084707864e-06, "loss": 0.3765, "step": 4809 }, { "epoch": 1.1449396084964598, "grad_norm": 0.36508657238373526, "learning_rate": 4.077184536984773e-06, "loss": 0.2741, "step": 4810 }, { "epoch": 1.1451776045695246, "grad_norm": 0.3625603860149019, "learning_rate": 4.07529012641489e-06, "loss": 0.2954, "step": 4811 }, { "epoch": 1.1454156006425893, "grad_norm": 0.3529289410508482, "learning_rate": 4.073395853279768e-06, "loss": 0.3098, "step": 4812 }, { "epoch": 1.1456535967156543, "grad_norm": 0.3698173348748085, "learning_rate": 4.071501717860947e-06, "loss": 0.387, "step": 4813 }, { "epoch": 1.145891592788719, "grad_norm": 0.36193766430504687, "learning_rate": 4.069607720439942e-06, "loss": 0.269, "step": 4814 }, { "epoch": 1.1461295888617837, "grad_norm": 0.39085304104980867, "learning_rate": 4.067713861298246e-06, "loss": 0.3279, "step": 4815 }, { "epoch": 1.1463675849348487, "grad_norm": 0.3703336609246225, "learning_rate": 4.0658201407173335e-06, "loss": 0.3719, "step": 4816 }, { "epoch": 1.1466055810079134, "grad_norm": 0.3642397937157319, "learning_rate": 4.063926558978657e-06, "loss": 0.3411, "step": 4817 }, { "epoch": 1.1468435770809782, "grad_norm": 0.3570075003209498, "learning_rate": 4.062033116363653e-06, "loss": 0.2743, "step": 4818 }, { "epoch": 1.147081573154043, "grad_norm": 0.3926987214457056, "learning_rate": 4.060139813153732e-06, "loss": 0.338, "step": 4819 }, { "epoch": 1.1473195692271076, "grad_norm": 0.36780611870947877, "learning_rate": 4.058246649630286e-06, "loss": 0.3727, "step": 4820 }, { "epoch": 1.1475575653001726, "grad_norm": 0.3501727672950386, "learning_rate": 4.056353626074685e-06, "loss": 0.2804, "step": 4821 }, { "epoch": 1.1477955613732373, "grad_norm": 0.4059745265643401, "learning_rate": 4.05446074276828e-06, "loss": 0.2883, "step": 4822 }, { "epoch": 1.148033557446302, "grad_norm": 0.4002543534646528, "learning_rate": 4.0525679999924e-06, "loss": 0.3783, "step": 4823 }, { "epoch": 1.148271553519367, "grad_norm": 0.37811094840803156, "learning_rate": 4.050675398028354e-06, "loss": 0.3345, "step": 4824 }, { "epoch": 1.1485095495924318, "grad_norm": 0.39558594949102993, "learning_rate": 4.048782937157427e-06, "loss": 0.2961, "step": 4825 }, { "epoch": 1.1487475456654965, "grad_norm": 0.39775175118133455, "learning_rate": 4.04689061766089e-06, "loss": 0.3206, "step": 4826 }, { "epoch": 1.1489855417385613, "grad_norm": 0.37445723489979743, "learning_rate": 4.044998439819986e-06, "loss": 0.3986, "step": 4827 }, { "epoch": 1.149223537811626, "grad_norm": 0.3739855904232471, "learning_rate": 4.043106403915938e-06, "loss": 0.2836, "step": 4828 }, { "epoch": 1.149461533884691, "grad_norm": 0.4283283093929408, "learning_rate": 4.041214510229952e-06, "loss": 0.3011, "step": 4829 }, { "epoch": 1.1496995299577557, "grad_norm": 0.40090894213533174, "learning_rate": 4.0393227590432085e-06, "loss": 0.3815, "step": 4830 }, { "epoch": 1.1499375260308204, "grad_norm": 0.3617108318222815, "learning_rate": 4.037431150636868e-06, "loss": 0.3683, "step": 4831 }, { "epoch": 1.1501755221038854, "grad_norm": 0.405107436573071, "learning_rate": 4.0355396852920735e-06, "loss": 0.2949, "step": 4832 }, { "epoch": 1.1504135181769501, "grad_norm": 0.4091009038760894, "learning_rate": 4.03364836328994e-06, "loss": 0.2922, "step": 4833 }, { "epoch": 1.1506515142500149, "grad_norm": 0.3723010060588749, "learning_rate": 4.0317571849115665e-06, "loss": 0.3649, "step": 4834 }, { "epoch": 1.1508895103230796, "grad_norm": 0.3486410506502387, "learning_rate": 4.029866150438029e-06, "loss": 0.2946, "step": 4835 }, { "epoch": 1.1511275063961444, "grad_norm": 0.40140072119081, "learning_rate": 4.02797526015038e-06, "loss": 0.2545, "step": 4836 }, { "epoch": 1.1513655024692093, "grad_norm": 0.36966831779652004, "learning_rate": 4.026084514329656e-06, "loss": 0.3484, "step": 4837 }, { "epoch": 1.151603498542274, "grad_norm": 0.3744156869841182, "learning_rate": 4.024193913256865e-06, "loss": 0.3852, "step": 4838 }, { "epoch": 1.1518414946153388, "grad_norm": 0.3862547285091858, "learning_rate": 4.022303457212998e-06, "loss": 0.2861, "step": 4839 }, { "epoch": 1.1520794906884038, "grad_norm": 0.3873257265237817, "learning_rate": 4.020413146479026e-06, "loss": 0.3121, "step": 4840 }, { "epoch": 1.1523174867614685, "grad_norm": 0.4522160266820443, "learning_rate": 4.018522981335894e-06, "loss": 0.3734, "step": 4841 }, { "epoch": 1.1525554828345332, "grad_norm": 0.3643381212709832, "learning_rate": 4.0166329620645275e-06, "loss": 0.3121, "step": 4842 }, { "epoch": 1.152793478907598, "grad_norm": 0.40744044121578465, "learning_rate": 4.01474308894583e-06, "loss": 0.302, "step": 4843 }, { "epoch": 1.1530314749806627, "grad_norm": 0.3799665007235963, "learning_rate": 4.012853362260683e-06, "loss": 0.325, "step": 4844 }, { "epoch": 1.1532694710537277, "grad_norm": 0.40864748898009884, "learning_rate": 4.010963782289948e-06, "loss": 0.3931, "step": 4845 }, { "epoch": 1.1535074671267924, "grad_norm": 0.39417564672662864, "learning_rate": 4.009074349314462e-06, "loss": 0.3143, "step": 4846 }, { "epoch": 1.1537454631998572, "grad_norm": 0.42660463794138176, "learning_rate": 4.007185063615043e-06, "loss": 0.2997, "step": 4847 }, { "epoch": 1.1539834592729221, "grad_norm": 0.45151329355721237, "learning_rate": 4.005295925472484e-06, "loss": 0.3525, "step": 4848 }, { "epoch": 1.1542214553459869, "grad_norm": 0.3534689223030634, "learning_rate": 4.003406935167558e-06, "loss": 0.3348, "step": 4849 }, { "epoch": 1.1544594514190516, "grad_norm": 0.3741669360663959, "learning_rate": 4.001518092981017e-06, "loss": 0.2907, "step": 4850 }, { "epoch": 1.1546974474921163, "grad_norm": 0.3795133464854993, "learning_rate": 3.999629399193589e-06, "loss": 0.3346, "step": 4851 }, { "epoch": 1.154935443565181, "grad_norm": 0.4063978193238941, "learning_rate": 3.997740854085979e-06, "loss": 0.369, "step": 4852 }, { "epoch": 1.155173439638246, "grad_norm": 0.35078236527156986, "learning_rate": 3.995852457938871e-06, "loss": 0.3191, "step": 4853 }, { "epoch": 1.1554114357113108, "grad_norm": 0.38716927785738015, "learning_rate": 3.993964211032931e-06, "loss": 0.2937, "step": 4854 }, { "epoch": 1.1556494317843755, "grad_norm": 0.41552238495054705, "learning_rate": 3.992076113648797e-06, "loss": 0.3745, "step": 4855 }, { "epoch": 1.1558874278574405, "grad_norm": 0.3909495828147318, "learning_rate": 3.990188166067088e-06, "loss": 0.3445, "step": 4856 }, { "epoch": 1.1561254239305052, "grad_norm": 0.3773512559043716, "learning_rate": 3.9883003685684e-06, "loss": 0.2753, "step": 4857 }, { "epoch": 1.15636342000357, "grad_norm": 0.3815969618990709, "learning_rate": 3.9864127214333035e-06, "loss": 0.3294, "step": 4858 }, { "epoch": 1.1566014160766347, "grad_norm": 0.3682808970267619, "learning_rate": 3.984525224942352e-06, "loss": 0.3877, "step": 4859 }, { "epoch": 1.1568394121496994, "grad_norm": 0.38593825650163915, "learning_rate": 3.982637879376075e-06, "loss": 0.3136, "step": 4860 }, { "epoch": 1.1570774082227644, "grad_norm": 0.4087706279356301, "learning_rate": 3.980750685014975e-06, "loss": 0.2707, "step": 4861 }, { "epoch": 1.1573154042958291, "grad_norm": 0.36407650716725964, "learning_rate": 3.978863642139541e-06, "loss": 0.3644, "step": 4862 }, { "epoch": 1.1575534003688939, "grad_norm": 0.391041663164892, "learning_rate": 3.97697675103023e-06, "loss": 0.3702, "step": 4863 }, { "epoch": 1.1577913964419588, "grad_norm": 0.36440676261441685, "learning_rate": 3.975090011967483e-06, "loss": 0.2939, "step": 4864 }, { "epoch": 1.1580293925150236, "grad_norm": 0.3944390664922029, "learning_rate": 3.973203425231715e-06, "loss": 0.3345, "step": 4865 }, { "epoch": 1.1582673885880883, "grad_norm": 0.37043202122893515, "learning_rate": 3.971316991103319e-06, "loss": 0.3966, "step": 4866 }, { "epoch": 1.158505384661153, "grad_norm": 0.3596933682030246, "learning_rate": 3.969430709862665e-06, "loss": 0.3262, "step": 4867 }, { "epoch": 1.1587433807342178, "grad_norm": 0.415916141016302, "learning_rate": 3.967544581790105e-06, "loss": 0.2955, "step": 4868 }, { "epoch": 1.1589813768072827, "grad_norm": 0.364949249649275, "learning_rate": 3.965658607165961e-06, "loss": 0.3108, "step": 4869 }, { "epoch": 1.1592193728803475, "grad_norm": 0.4038748930409246, "learning_rate": 3.9637727862705375e-06, "loss": 0.3811, "step": 4870 }, { "epoch": 1.1594573689534122, "grad_norm": 0.37034415569974766, "learning_rate": 3.961887119384111e-06, "loss": 0.2626, "step": 4871 }, { "epoch": 1.1596953650264772, "grad_norm": 0.37614882290717155, "learning_rate": 3.960001606786942e-06, "loss": 0.2926, "step": 4872 }, { "epoch": 1.159933361099542, "grad_norm": 0.6816306788345337, "learning_rate": 3.958116248759262e-06, "loss": 0.3556, "step": 4873 }, { "epoch": 1.1601713571726067, "grad_norm": 0.3601925064970214, "learning_rate": 3.9562310455812825e-06, "loss": 0.3386, "step": 4874 }, { "epoch": 1.1604093532456714, "grad_norm": 0.37272600080408436, "learning_rate": 3.9543459975331914e-06, "loss": 0.2821, "step": 4875 }, { "epoch": 1.1606473493187361, "grad_norm": 0.37365911547388664, "learning_rate": 3.952461104895153e-06, "loss": 0.304, "step": 4876 }, { "epoch": 1.160885345391801, "grad_norm": 0.4220848745812085, "learning_rate": 3.95057636794731e-06, "loss": 0.3819, "step": 4877 }, { "epoch": 1.1611233414648658, "grad_norm": 0.36664046373827575, "learning_rate": 3.9486917869697795e-06, "loss": 0.3174, "step": 4878 }, { "epoch": 1.1613613375379306, "grad_norm": 0.36434472833349824, "learning_rate": 3.9468073622426574e-06, "loss": 0.3156, "step": 4879 }, { "epoch": 1.1615993336109953, "grad_norm": 0.39980314450070703, "learning_rate": 3.944923094046016e-06, "loss": 0.3815, "step": 4880 }, { "epoch": 1.1618373296840603, "grad_norm": 0.3825456910870827, "learning_rate": 3.9430389826599026e-06, "loss": 0.3578, "step": 4881 }, { "epoch": 1.162075325757125, "grad_norm": 0.3901463596231784, "learning_rate": 3.9411550283643465e-06, "loss": 0.2817, "step": 4882 }, { "epoch": 1.1623133218301898, "grad_norm": 0.4095148078840206, "learning_rate": 3.939271231439348e-06, "loss": 0.3111, "step": 4883 }, { "epoch": 1.1625513179032545, "grad_norm": 0.40126458626689326, "learning_rate": 3.937387592164884e-06, "loss": 0.4164, "step": 4884 }, { "epoch": 1.1627893139763195, "grad_norm": 0.40524146316425336, "learning_rate": 3.935504110820912e-06, "loss": 0.3031, "step": 4885 }, { "epoch": 1.1630273100493842, "grad_norm": 0.41484043466115905, "learning_rate": 3.933620787687365e-06, "loss": 0.2683, "step": 4886 }, { "epoch": 1.163265306122449, "grad_norm": 0.42894419912735265, "learning_rate": 3.931737623044149e-06, "loss": 0.3446, "step": 4887 }, { "epoch": 1.1635033021955137, "grad_norm": 0.37569764295018193, "learning_rate": 3.929854617171149e-06, "loss": 0.3875, "step": 4888 }, { "epoch": 1.1637412982685786, "grad_norm": 0.38169828943839607, "learning_rate": 3.927971770348228e-06, "loss": 0.2981, "step": 4889 }, { "epoch": 1.1639792943416434, "grad_norm": 0.4326907379906986, "learning_rate": 3.9260890828552225e-06, "loss": 0.321, "step": 4890 }, { "epoch": 1.1642172904147081, "grad_norm": 0.37216325656330895, "learning_rate": 3.924206554971947e-06, "loss": 0.3721, "step": 4891 }, { "epoch": 1.1644552864877729, "grad_norm": 0.38522417456420815, "learning_rate": 3.92232418697819e-06, "loss": 0.3308, "step": 4892 }, { "epoch": 1.1646932825608378, "grad_norm": 0.40316171495665337, "learning_rate": 3.920441979153721e-06, "loss": 0.2768, "step": 4893 }, { "epoch": 1.1649312786339026, "grad_norm": 0.37987776460120803, "learning_rate": 3.918559931778277e-06, "loss": 0.3307, "step": 4894 }, { "epoch": 1.1651692747069673, "grad_norm": 0.3813096868034741, "learning_rate": 3.916678045131584e-06, "loss": 0.3603, "step": 4895 }, { "epoch": 1.165407270780032, "grad_norm": 0.351364277393361, "learning_rate": 3.914796319493333e-06, "loss": 0.2873, "step": 4896 }, { "epoch": 1.165645266853097, "grad_norm": 0.3926597142637336, "learning_rate": 3.912914755143196e-06, "loss": 0.3285, "step": 4897 }, { "epoch": 1.1658832629261617, "grad_norm": 0.3905874686541914, "learning_rate": 3.911033352360818e-06, "loss": 0.3833, "step": 4898 }, { "epoch": 1.1661212589992265, "grad_norm": 0.35936466590080296, "learning_rate": 3.909152111425825e-06, "loss": 0.3381, "step": 4899 }, { "epoch": 1.1663592550722912, "grad_norm": 0.3725108986050583, "learning_rate": 3.907271032617815e-06, "loss": 0.2671, "step": 4900 }, { "epoch": 1.1665972511453562, "grad_norm": 0.3835107673363921, "learning_rate": 3.905390116216362e-06, "loss": 0.3108, "step": 4901 }, { "epoch": 1.166835247218421, "grad_norm": 0.3562168456730705, "learning_rate": 3.9035093625010164e-06, "loss": 0.379, "step": 4902 }, { "epoch": 1.1670732432914857, "grad_norm": 0.34281015797938996, "learning_rate": 3.901628771751306e-06, "loss": 0.3064, "step": 4903 }, { "epoch": 1.1673112393645504, "grad_norm": 0.40233559709709515, "learning_rate": 3.899748344246732e-06, "loss": 0.3005, "step": 4904 }, { "epoch": 1.1675492354376154, "grad_norm": 0.422059793045895, "learning_rate": 3.897868080266774e-06, "loss": 0.3804, "step": 4905 }, { "epoch": 1.16778723151068, "grad_norm": 0.4089575743526671, "learning_rate": 3.895987980090884e-06, "loss": 0.332, "step": 4906 }, { "epoch": 1.1680252275837448, "grad_norm": 0.3718624537861076, "learning_rate": 3.894108043998492e-06, "loss": 0.2945, "step": 4907 }, { "epoch": 1.1682632236568096, "grad_norm": 0.4028263680996291, "learning_rate": 3.8922282722690006e-06, "loss": 0.3229, "step": 4908 }, { "epoch": 1.1685012197298745, "grad_norm": 0.45190473479232407, "learning_rate": 3.890348665181796e-06, "loss": 0.3799, "step": 4909 }, { "epoch": 1.1687392158029393, "grad_norm": 0.37951311105400887, "learning_rate": 3.888469223016231e-06, "loss": 0.3311, "step": 4910 }, { "epoch": 1.168977211876004, "grad_norm": 0.398817375650266, "learning_rate": 3.886589946051637e-06, "loss": 0.3005, "step": 4911 }, { "epoch": 1.1692152079490687, "grad_norm": 0.3788321557640797, "learning_rate": 3.884710834567321e-06, "loss": 0.3668, "step": 4912 }, { "epoch": 1.1694532040221337, "grad_norm": 0.3679850869739202, "learning_rate": 3.882831888842566e-06, "loss": 0.4159, "step": 4913 }, { "epoch": 1.1696912000951984, "grad_norm": 0.38977532532858084, "learning_rate": 3.880953109156631e-06, "loss": 0.2969, "step": 4914 }, { "epoch": 1.1699291961682632, "grad_norm": 0.3673051411020341, "learning_rate": 3.879074495788746e-06, "loss": 0.3382, "step": 4915 }, { "epoch": 1.170167192241328, "grad_norm": 0.40474641214394, "learning_rate": 3.8771960490181226e-06, "loss": 0.3647, "step": 4916 }, { "epoch": 1.1704051883143929, "grad_norm": 0.3677584145288695, "learning_rate": 3.875317769123943e-06, "loss": 0.3355, "step": 4917 }, { "epoch": 1.1706431843874576, "grad_norm": 0.38928549279284, "learning_rate": 3.873439656385367e-06, "loss": 0.2852, "step": 4918 }, { "epoch": 1.1708811804605224, "grad_norm": 0.40919373160433137, "learning_rate": 3.871561711081526e-06, "loss": 0.3662, "step": 4919 }, { "epoch": 1.171119176533587, "grad_norm": 0.3911851721735495, "learning_rate": 3.869683933491533e-06, "loss": 0.3833, "step": 4920 }, { "epoch": 1.171357172606652, "grad_norm": 0.382574197170867, "learning_rate": 3.8678063238944674e-06, "loss": 0.31, "step": 4921 }, { "epoch": 1.1715951686797168, "grad_norm": 0.37949281565255283, "learning_rate": 3.865928882569392e-06, "loss": 0.3181, "step": 4922 }, { "epoch": 1.1718331647527815, "grad_norm": 0.38403965708871085, "learning_rate": 3.86405160979534e-06, "loss": 0.3739, "step": 4923 }, { "epoch": 1.1720711608258463, "grad_norm": 0.38512569570032895, "learning_rate": 3.8621745058513225e-06, "loss": 0.3229, "step": 4924 }, { "epoch": 1.1723091568989112, "grad_norm": 0.3957163771288543, "learning_rate": 3.8602975710163205e-06, "loss": 0.2844, "step": 4925 }, { "epoch": 1.172547152971976, "grad_norm": 0.39701056868163465, "learning_rate": 3.858420805569295e-06, "loss": 0.338, "step": 4926 }, { "epoch": 1.1727851490450407, "grad_norm": 0.41141027404117647, "learning_rate": 3.856544209789179e-06, "loss": 0.4213, "step": 4927 }, { "epoch": 1.1730231451181055, "grad_norm": 0.356743827281772, "learning_rate": 3.854667783954882e-06, "loss": 0.3054, "step": 4928 }, { "epoch": 1.1732611411911704, "grad_norm": 0.36020835139169016, "learning_rate": 3.852791528345286e-06, "loss": 0.2547, "step": 4929 }, { "epoch": 1.1734991372642352, "grad_norm": 0.39353940700048456, "learning_rate": 3.85091544323925e-06, "loss": 0.3495, "step": 4930 }, { "epoch": 1.1737371333373, "grad_norm": 0.3739662096404696, "learning_rate": 3.849039528915605e-06, "loss": 0.3416, "step": 4931 }, { "epoch": 1.1739751294103646, "grad_norm": 0.4021690846442395, "learning_rate": 3.847163785653159e-06, "loss": 0.2926, "step": 4932 }, { "epoch": 1.1742131254834296, "grad_norm": 0.35310815717245986, "learning_rate": 3.845288213730695e-06, "loss": 0.3188, "step": 4933 }, { "epoch": 1.1744511215564943, "grad_norm": 0.37742127338548703, "learning_rate": 3.843412813426967e-06, "loss": 0.3857, "step": 4934 }, { "epoch": 1.174689117629559, "grad_norm": 0.36700195619184406, "learning_rate": 3.84153758502071e-06, "loss": 0.3065, "step": 4935 }, { "epoch": 1.1749271137026238, "grad_norm": 0.37862390020379844, "learning_rate": 3.839662528790625e-06, "loss": 0.2822, "step": 4936 }, { "epoch": 1.1751651097756888, "grad_norm": 0.37919008323525716, "learning_rate": 3.837787645015395e-06, "loss": 0.3459, "step": 4937 }, { "epoch": 1.1754031058487535, "grad_norm": 0.3805425537038319, "learning_rate": 3.835912933973671e-06, "loss": 0.4043, "step": 4938 }, { "epoch": 1.1756411019218183, "grad_norm": 0.3812253553869978, "learning_rate": 3.834038395944084e-06, "loss": 0.2991, "step": 4939 }, { "epoch": 1.175879097994883, "grad_norm": 0.3885217222382259, "learning_rate": 3.832164031205237e-06, "loss": 0.3469, "step": 4940 }, { "epoch": 1.176117094067948, "grad_norm": 0.3496306031416618, "learning_rate": 3.830289840035705e-06, "loss": 0.39, "step": 4941 }, { "epoch": 1.1763550901410127, "grad_norm": 0.3725490258794375, "learning_rate": 3.82841582271404e-06, "loss": 0.3009, "step": 4942 }, { "epoch": 1.1765930862140774, "grad_norm": 0.3961940913795775, "learning_rate": 3.8265419795187675e-06, "loss": 0.3007, "step": 4943 }, { "epoch": 1.1768310822871422, "grad_norm": 0.40324342500628063, "learning_rate": 3.824668310728387e-06, "loss": 0.3132, "step": 4944 }, { "epoch": 1.1770690783602071, "grad_norm": 0.36079064416493883, "learning_rate": 3.822794816621371e-06, "loss": 0.4038, "step": 4945 }, { "epoch": 1.1773070744332719, "grad_norm": 0.40154958109483985, "learning_rate": 3.8209214974761685e-06, "loss": 0.2703, "step": 4946 }, { "epoch": 1.1775450705063366, "grad_norm": 0.40470924949834836, "learning_rate": 3.819048353571201e-06, "loss": 0.3144, "step": 4947 }, { "epoch": 1.1777830665794013, "grad_norm": 0.4251133239570864, "learning_rate": 3.817175385184861e-06, "loss": 0.3686, "step": 4948 }, { "epoch": 1.1780210626524663, "grad_norm": 0.3824740738540186, "learning_rate": 3.815302592595522e-06, "loss": 0.3313, "step": 4949 }, { "epoch": 1.178259058725531, "grad_norm": 0.43168535718039347, "learning_rate": 3.813429976081526e-06, "loss": 0.2672, "step": 4950 }, { "epoch": 1.1784970547985958, "grad_norm": 0.3844314375612966, "learning_rate": 3.8115575359211905e-06, "loss": 0.3406, "step": 4951 }, { "epoch": 1.1787350508716605, "grad_norm": 0.38263513845378433, "learning_rate": 3.809685272392804e-06, "loss": 0.3849, "step": 4952 }, { "epoch": 1.1789730469447255, "grad_norm": 0.3613570750787706, "learning_rate": 3.8078131857746346e-06, "loss": 0.2964, "step": 4953 }, { "epoch": 1.1792110430177902, "grad_norm": 0.38485653076788723, "learning_rate": 3.8059412763449187e-06, "loss": 0.3033, "step": 4954 }, { "epoch": 1.179449039090855, "grad_norm": 0.3490639146215179, "learning_rate": 3.804069544381869e-06, "loss": 0.3481, "step": 4955 }, { "epoch": 1.1796870351639197, "grad_norm": 0.3856291558218839, "learning_rate": 3.802197990163671e-06, "loss": 0.3471, "step": 4956 }, { "epoch": 1.1799250312369847, "grad_norm": 0.39550282927113356, "learning_rate": 3.8003266139684832e-06, "loss": 0.2598, "step": 4957 }, { "epoch": 1.1801630273100494, "grad_norm": 0.3862155603983133, "learning_rate": 3.798455416074439e-06, "loss": 0.3378, "step": 4958 }, { "epoch": 1.1804010233831141, "grad_norm": 0.3839437629883702, "learning_rate": 3.7965843967596453e-06, "loss": 0.3982, "step": 4959 }, { "epoch": 1.1806390194561789, "grad_norm": 0.4078427772915639, "learning_rate": 3.7947135563021814e-06, "loss": 0.2971, "step": 4960 }, { "epoch": 1.1808770155292438, "grad_norm": 0.35307107716990815, "learning_rate": 3.7928428949800996e-06, "loss": 0.2988, "step": 4961 }, { "epoch": 1.1811150116023086, "grad_norm": 0.3992799282030228, "learning_rate": 3.7909724130714277e-06, "loss": 0.3522, "step": 4962 }, { "epoch": 1.1813530076753733, "grad_norm": 0.381054998500261, "learning_rate": 3.7891021108541642e-06, "loss": 0.3713, "step": 4963 }, { "epoch": 1.181591003748438, "grad_norm": 0.40058461166635667, "learning_rate": 3.787231988606284e-06, "loss": 0.3104, "step": 4964 }, { "epoch": 1.181828999821503, "grad_norm": 0.36686697436707344, "learning_rate": 3.785362046605732e-06, "loss": 0.3072, "step": 4965 }, { "epoch": 1.1820669958945678, "grad_norm": 0.4149878680339965, "learning_rate": 3.7834922851304297e-06, "loss": 0.4061, "step": 4966 }, { "epoch": 1.1823049919676325, "grad_norm": 0.3695370875718639, "learning_rate": 3.7816227044582687e-06, "loss": 0.3289, "step": 4967 }, { "epoch": 1.1825429880406972, "grad_norm": 0.3596942240313325, "learning_rate": 3.7797533048671146e-06, "loss": 0.2954, "step": 4968 }, { "epoch": 1.1827809841137622, "grad_norm": 0.39500031375483247, "learning_rate": 3.7778840866348075e-06, "loss": 0.3261, "step": 4969 }, { "epoch": 1.183018980186827, "grad_norm": 0.41374777726632095, "learning_rate": 3.7760150500391584e-06, "loss": 0.3944, "step": 4970 }, { "epoch": 1.1832569762598917, "grad_norm": 0.3537235485993441, "learning_rate": 3.7741461953579527e-06, "loss": 0.2971, "step": 4971 }, { "epoch": 1.1834949723329564, "grad_norm": 0.3748434274707176, "learning_rate": 3.772277522868949e-06, "loss": 0.3244, "step": 4972 }, { "epoch": 1.1837329684060214, "grad_norm": 0.38166375878188813, "learning_rate": 3.770409032849878e-06, "loss": 0.3598, "step": 4973 }, { "epoch": 1.1839709644790861, "grad_norm": 0.3922577890411974, "learning_rate": 3.7685407255784424e-06, "loss": 0.3348, "step": 4974 }, { "epoch": 1.1842089605521509, "grad_norm": 0.3845300676159576, "learning_rate": 3.766672601332319e-06, "loss": 0.2835, "step": 4975 }, { "epoch": 1.1844469566252156, "grad_norm": 0.3864290171184251, "learning_rate": 3.76480466038916e-06, "loss": 0.3286, "step": 4976 }, { "epoch": 1.1846849526982806, "grad_norm": 0.40164860660518437, "learning_rate": 3.7629369030265834e-06, "loss": 0.4308, "step": 4977 }, { "epoch": 1.1849229487713453, "grad_norm": 0.38101030100174693, "learning_rate": 3.7610693295221885e-06, "loss": 0.278, "step": 4978 }, { "epoch": 1.18516094484441, "grad_norm": 0.3827769312151021, "learning_rate": 3.7592019401535397e-06, "loss": 0.2915, "step": 4979 }, { "epoch": 1.1853989409174748, "grad_norm": 0.39348943008486564, "learning_rate": 3.7573347351981785e-06, "loss": 0.3532, "step": 4980 }, { "epoch": 1.1856369369905397, "grad_norm": 0.35366811317076075, "learning_rate": 3.7554677149336186e-06, "loss": 0.355, "step": 4981 }, { "epoch": 1.1858749330636045, "grad_norm": 0.3730839491699928, "learning_rate": 3.7536008796373447e-06, "loss": 0.2738, "step": 4982 }, { "epoch": 1.1861129291366692, "grad_norm": 0.36612366371157234, "learning_rate": 3.7517342295868142e-06, "loss": 0.3053, "step": 4983 }, { "epoch": 1.186350925209734, "grad_norm": 0.3823419705705663, "learning_rate": 3.7498677650594585e-06, "loss": 0.3915, "step": 4984 }, { "epoch": 1.186588921282799, "grad_norm": 0.36531157147493043, "learning_rate": 3.7480014863326786e-06, "loss": 0.2942, "step": 4985 }, { "epoch": 1.1868269173558637, "grad_norm": 0.401505598688106, "learning_rate": 3.746135393683851e-06, "loss": 0.2788, "step": 4986 }, { "epoch": 1.1870649134289284, "grad_norm": 0.38093798207572305, "learning_rate": 3.7442694873903236e-06, "loss": 0.3348, "step": 4987 }, { "epoch": 1.1873029095019931, "grad_norm": 0.38132115350268836, "learning_rate": 3.742403767729414e-06, "loss": 0.3639, "step": 4988 }, { "epoch": 1.187540905575058, "grad_norm": 0.39346856748575865, "learning_rate": 3.740538234978417e-06, "loss": 0.2893, "step": 4989 }, { "epoch": 1.1877789016481228, "grad_norm": 0.3613470905129032, "learning_rate": 3.7386728894145965e-06, "loss": 0.3158, "step": 4990 }, { "epoch": 1.1880168977211876, "grad_norm": 0.3715632977391001, "learning_rate": 3.7368077313151866e-06, "loss": 0.4092, "step": 4991 }, { "epoch": 1.1882548937942523, "grad_norm": 0.43216241244974535, "learning_rate": 3.7349427609573985e-06, "loss": 0.3197, "step": 4992 }, { "epoch": 1.1884928898673173, "grad_norm": 0.38373124295883176, "learning_rate": 3.7330779786184122e-06, "loss": 0.3042, "step": 4993 }, { "epoch": 1.188730885940382, "grad_norm": 0.36301878619729605, "learning_rate": 3.731213384575381e-06, "loss": 0.3551, "step": 4994 }, { "epoch": 1.1889688820134467, "grad_norm": 0.3633370905617256, "learning_rate": 3.7293489791054293e-06, "loss": 0.381, "step": 4995 }, { "epoch": 1.1892068780865115, "grad_norm": 0.36132719462940693, "learning_rate": 3.727484762485653e-06, "loss": 0.2943, "step": 4996 }, { "epoch": 1.1894448741595764, "grad_norm": 0.3626513454326024, "learning_rate": 3.7256207349931216e-06, "loss": 0.3158, "step": 4997 }, { "epoch": 1.1896828702326412, "grad_norm": 0.39265659273339876, "learning_rate": 3.7237568969048766e-06, "loss": 0.3878, "step": 4998 }, { "epoch": 1.189920866305706, "grad_norm": 0.35277130854248157, "learning_rate": 3.7218932484979287e-06, "loss": 0.338, "step": 4999 }, { "epoch": 1.1901588623787707, "grad_norm": 0.39473980233793865, "learning_rate": 3.7200297900492632e-06, "loss": 0.2937, "step": 5000 }, { "epoch": 1.1903968584518356, "grad_norm": 0.3618280922673368, "learning_rate": 3.7181665218358354e-06, "loss": 0.3374, "step": 5001 }, { "epoch": 1.1906348545249004, "grad_norm": 0.39899118970440267, "learning_rate": 3.7163034441345725e-06, "loss": 0.3833, "step": 5002 }, { "epoch": 1.190872850597965, "grad_norm": 0.36708868631705965, "learning_rate": 3.7144405572223762e-06, "loss": 0.3217, "step": 5003 }, { "epoch": 1.1911108466710298, "grad_norm": 0.36066225989423434, "learning_rate": 3.7125778613761164e-06, "loss": 0.3079, "step": 5004 }, { "epoch": 1.1913488427440948, "grad_norm": 0.4031849072858775, "learning_rate": 3.710715356872634e-06, "loss": 0.3519, "step": 5005 }, { "epoch": 1.1915868388171595, "grad_norm": 0.39768511346134466, "learning_rate": 3.708853043988746e-06, "loss": 0.3612, "step": 5006 }, { "epoch": 1.1918248348902243, "grad_norm": 0.3746114220221446, "learning_rate": 3.7069909230012376e-06, "loss": 0.3005, "step": 5007 }, { "epoch": 1.192062830963289, "grad_norm": 0.3935100573355243, "learning_rate": 3.705128994186865e-06, "loss": 0.3071, "step": 5008 }, { "epoch": 1.192300827036354, "grad_norm": 0.3544256856276741, "learning_rate": 3.7032672578223583e-06, "loss": 0.3868, "step": 5009 }, { "epoch": 1.1925388231094187, "grad_norm": 0.37426633165695083, "learning_rate": 3.701405714184416e-06, "loss": 0.2953, "step": 5010 }, { "epoch": 1.1927768191824835, "grad_norm": 0.42034731649414875, "learning_rate": 3.699544363549711e-06, "loss": 0.3005, "step": 5011 }, { "epoch": 1.1930148152555482, "grad_norm": 0.4082503637885382, "learning_rate": 3.6976832061948845e-06, "loss": 0.3477, "step": 5012 }, { "epoch": 1.1932528113286132, "grad_norm": 0.8416333609801906, "learning_rate": 3.695822242396552e-06, "loss": 0.3687, "step": 5013 }, { "epoch": 1.193490807401678, "grad_norm": 0.37912063370018584, "learning_rate": 3.693961472431298e-06, "loss": 0.2979, "step": 5014 }, { "epoch": 1.1937288034747426, "grad_norm": 0.401423257824879, "learning_rate": 3.6921008965756775e-06, "loss": 0.3199, "step": 5015 }, { "epoch": 1.1939667995478074, "grad_norm": 0.3800891163492014, "learning_rate": 3.690240515106221e-06, "loss": 0.3765, "step": 5016 }, { "epoch": 1.1942047956208723, "grad_norm": 0.3669803455039553, "learning_rate": 3.6883803282994256e-06, "loss": 0.3246, "step": 5017 }, { "epoch": 1.194442791693937, "grad_norm": 0.4200743670629511, "learning_rate": 3.6865203364317605e-06, "loss": 0.2893, "step": 5018 }, { "epoch": 1.1946807877670018, "grad_norm": 0.37720436817760933, "learning_rate": 3.6846605397796677e-06, "loss": 0.3496, "step": 5019 }, { "epoch": 1.1949187838400666, "grad_norm": 0.38267177297360994, "learning_rate": 3.6828009386195592e-06, "loss": 0.3647, "step": 5020 }, { "epoch": 1.1951567799131315, "grad_norm": 0.3855032217341476, "learning_rate": 3.680941533227817e-06, "loss": 0.2893, "step": 5021 }, { "epoch": 1.1953947759861963, "grad_norm": 0.41661785772795806, "learning_rate": 3.679082323880795e-06, "loss": 0.2966, "step": 5022 }, { "epoch": 1.195632772059261, "grad_norm": 0.38996824463256474, "learning_rate": 3.6772233108548182e-06, "loss": 0.3682, "step": 5023 }, { "epoch": 1.1958707681323257, "grad_norm": 0.3579485476112444, "learning_rate": 3.6753644944261806e-06, "loss": 0.3461, "step": 5024 }, { "epoch": 1.1961087642053907, "grad_norm": 0.38986130206284325, "learning_rate": 3.6735058748711492e-06, "loss": 0.2968, "step": 5025 }, { "epoch": 1.1963467602784554, "grad_norm": 0.3703854439689402, "learning_rate": 3.6716474524659608e-06, "loss": 0.3479, "step": 5026 }, { "epoch": 1.1965847563515202, "grad_norm": 0.4016888133573442, "learning_rate": 3.669789227486823e-06, "loss": 0.3763, "step": 5027 }, { "epoch": 1.196822752424585, "grad_norm": 0.3767473882222918, "learning_rate": 3.667931200209913e-06, "loss": 0.2947, "step": 5028 }, { "epoch": 1.1970607484976499, "grad_norm": 0.39627616648735325, "learning_rate": 3.6660733709113805e-06, "loss": 0.2915, "step": 5029 }, { "epoch": 1.1972987445707146, "grad_norm": 0.3824120166417069, "learning_rate": 3.664215739867345e-06, "loss": 0.3831, "step": 5030 }, { "epoch": 1.1975367406437794, "grad_norm": 0.3796189625607475, "learning_rate": 3.662358307353897e-06, "loss": 0.3594, "step": 5031 }, { "epoch": 1.197774736716844, "grad_norm": 0.4057709147757602, "learning_rate": 3.6605010736470945e-06, "loss": 0.3109, "step": 5032 }, { "epoch": 1.198012732789909, "grad_norm": 0.38925059478299473, "learning_rate": 3.6586440390229705e-06, "loss": 0.3187, "step": 5033 }, { "epoch": 1.1982507288629738, "grad_norm": 0.3795304534109489, "learning_rate": 3.656787203757527e-06, "loss": 0.4042, "step": 5034 }, { "epoch": 1.1984887249360385, "grad_norm": 0.33530850356764125, "learning_rate": 3.654930568126734e-06, "loss": 0.2998, "step": 5035 }, { "epoch": 1.1987267210091033, "grad_norm": 0.43587793237382827, "learning_rate": 3.6530741324065343e-06, "loss": 0.3011, "step": 5036 }, { "epoch": 1.1989647170821682, "grad_norm": 0.392590497330817, "learning_rate": 3.65121789687284e-06, "loss": 0.3432, "step": 5037 }, { "epoch": 1.199202713155233, "grad_norm": 0.4238986802151149, "learning_rate": 3.6493618618015335e-06, "loss": 0.3859, "step": 5038 }, { "epoch": 1.1994407092282977, "grad_norm": 0.3718207166623785, "learning_rate": 3.647506027468467e-06, "loss": 0.2891, "step": 5039 }, { "epoch": 1.1996787053013624, "grad_norm": 0.39411079054515186, "learning_rate": 3.645650394149465e-06, "loss": 0.3328, "step": 5040 }, { "epoch": 1.1999167013744274, "grad_norm": 0.36581910992126304, "learning_rate": 3.6437949621203184e-06, "loss": 0.3807, "step": 5041 }, { "epoch": 1.2001546974474921, "grad_norm": 0.3661757831955536, "learning_rate": 3.6419397316567902e-06, "loss": 0.3113, "step": 5042 }, { "epoch": 1.2003926935205569, "grad_norm": 0.45796517113773294, "learning_rate": 3.640084703034616e-06, "loss": 0.3018, "step": 5043 }, { "epoch": 1.2006306895936216, "grad_norm": 0.35520419192547475, "learning_rate": 3.6382298765294978e-06, "loss": 0.3468, "step": 5044 }, { "epoch": 1.2008686856666866, "grad_norm": 0.3770376085730245, "learning_rate": 3.6363752524171083e-06, "loss": 0.3914, "step": 5045 }, { "epoch": 1.2011066817397513, "grad_norm": 0.37954607416403163, "learning_rate": 3.6345208309730885e-06, "loss": 0.3111, "step": 5046 }, { "epoch": 1.201344677812816, "grad_norm": 0.3863432844778005, "learning_rate": 3.632666612473056e-06, "loss": 0.319, "step": 5047 }, { "epoch": 1.2015826738858808, "grad_norm": 0.4500532032678951, "learning_rate": 3.630812597192591e-06, "loss": 0.3771, "step": 5048 }, { "epoch": 1.2018206699589458, "grad_norm": 0.37110161136850156, "learning_rate": 3.628958785407246e-06, "loss": 0.3539, "step": 5049 }, { "epoch": 1.2020586660320105, "grad_norm": 0.38550398130650293, "learning_rate": 3.6271051773925434e-06, "loss": 0.2796, "step": 5050 }, { "epoch": 1.2022966621050752, "grad_norm": 0.41164229002791736, "learning_rate": 3.6252517734239757e-06, "loss": 0.3169, "step": 5051 }, { "epoch": 1.20253465817814, "grad_norm": 0.4273076385043179, "learning_rate": 3.6233985737770034e-06, "loss": 0.3748, "step": 5052 }, { "epoch": 1.202772654251205, "grad_norm": 0.38342810948189754, "learning_rate": 3.6215455787270587e-06, "loss": 0.3031, "step": 5053 }, { "epoch": 1.2030106503242697, "grad_norm": 0.3815063828509451, "learning_rate": 3.6196927885495426e-06, "loss": 0.3084, "step": 5054 }, { "epoch": 1.2032486463973344, "grad_norm": 0.39701942080159974, "learning_rate": 3.617840203519825e-06, "loss": 0.3449, "step": 5055 }, { "epoch": 1.2034866424703992, "grad_norm": 0.3826077784262676, "learning_rate": 3.6159878239132453e-06, "loss": 0.3861, "step": 5056 }, { "epoch": 1.2037246385434641, "grad_norm": 0.3761935038939176, "learning_rate": 3.614135650005115e-06, "loss": 0.2809, "step": 5057 }, { "epoch": 1.2039626346165289, "grad_norm": 0.38813967758028695, "learning_rate": 3.6122836820707107e-06, "loss": 0.3292, "step": 5058 }, { "epoch": 1.2042006306895936, "grad_norm": 0.4094613511832653, "learning_rate": 3.6104319203852826e-06, "loss": 0.3954, "step": 5059 }, { "epoch": 1.2044386267626583, "grad_norm": 0.35131103373017636, "learning_rate": 3.608580365224045e-06, "loss": 0.3157, "step": 5060 }, { "epoch": 1.2046766228357233, "grad_norm": 0.3779833656079706, "learning_rate": 3.60672901686219e-06, "loss": 0.3155, "step": 5061 }, { "epoch": 1.204914618908788, "grad_norm": 0.3801342639897726, "learning_rate": 3.60487787557487e-06, "loss": 0.3412, "step": 5062 }, { "epoch": 1.2051526149818528, "grad_norm": 0.3661980162834189, "learning_rate": 3.603026941637212e-06, "loss": 0.3519, "step": 5063 }, { "epoch": 1.2053906110549175, "grad_norm": 0.3839709384547976, "learning_rate": 3.6011762153243096e-06, "loss": 0.3023, "step": 5064 }, { "epoch": 1.2056286071279825, "grad_norm": 0.39407579614935423, "learning_rate": 3.5993256969112266e-06, "loss": 0.3062, "step": 5065 }, { "epoch": 1.2058666032010472, "grad_norm": 0.3593214219415961, "learning_rate": 3.5974753866729966e-06, "loss": 0.4125, "step": 5066 }, { "epoch": 1.206104599274112, "grad_norm": 0.3672574458694017, "learning_rate": 3.5956252848846206e-06, "loss": 0.3438, "step": 5067 }, { "epoch": 1.2063425953471767, "grad_norm": 0.3723709621082384, "learning_rate": 3.5937753918210705e-06, "loss": 0.2843, "step": 5068 }, { "epoch": 1.2065805914202417, "grad_norm": 0.3901256291535116, "learning_rate": 3.5919257077572835e-06, "loss": 0.3146, "step": 5069 }, { "epoch": 1.2068185874933064, "grad_norm": 0.37899864532094263, "learning_rate": 3.5900762329681717e-06, "loss": 0.3923, "step": 5070 }, { "epoch": 1.2070565835663711, "grad_norm": 0.35151540606068965, "learning_rate": 3.5882269677286117e-06, "loss": 0.2781, "step": 5071 }, { "epoch": 1.2072945796394359, "grad_norm": 0.3834887945303703, "learning_rate": 3.58637791231345e-06, "loss": 0.3403, "step": 5072 }, { "epoch": 1.2075325757125008, "grad_norm": 0.3971997277785191, "learning_rate": 3.5845290669975015e-06, "loss": 0.3797, "step": 5073 }, { "epoch": 1.2077705717855656, "grad_norm": 0.36240585843749556, "learning_rate": 3.5826804320555486e-06, "loss": 0.3157, "step": 5074 }, { "epoch": 1.2080085678586303, "grad_norm": 0.3767347339492281, "learning_rate": 3.5808320077623485e-06, "loss": 0.2653, "step": 5075 }, { "epoch": 1.208246563931695, "grad_norm": 0.38456131915052816, "learning_rate": 3.5789837943926208e-06, "loss": 0.3516, "step": 5076 }, { "epoch": 1.20848456000476, "grad_norm": 0.4128907315601859, "learning_rate": 3.5771357922210555e-06, "loss": 0.3727, "step": 5077 }, { "epoch": 1.2087225560778248, "grad_norm": 0.3723204263394461, "learning_rate": 3.5752880015223113e-06, "loss": 0.2884, "step": 5078 }, { "epoch": 1.2089605521508895, "grad_norm": 0.3637169168437252, "learning_rate": 3.5734404225710157e-06, "loss": 0.2807, "step": 5079 }, { "epoch": 1.2091985482239542, "grad_norm": 0.38764628752736946, "learning_rate": 3.5715930556417644e-06, "loss": 0.3578, "step": 5080 }, { "epoch": 1.2094365442970192, "grad_norm": 0.3450780601550473, "learning_rate": 3.569745901009123e-06, "loss": 0.3605, "step": 5081 }, { "epoch": 1.209674540370084, "grad_norm": 0.3581139717614697, "learning_rate": 3.5678989589476228e-06, "loss": 0.2712, "step": 5082 }, { "epoch": 1.2099125364431487, "grad_norm": 0.34988502902927887, "learning_rate": 3.5660522297317648e-06, "loss": 0.3645, "step": 5083 }, { "epoch": 1.2101505325162134, "grad_norm": 0.3708416415228171, "learning_rate": 3.5642057136360205e-06, "loss": 0.3858, "step": 5084 }, { "epoch": 1.2103885285892784, "grad_norm": 0.38942129205042847, "learning_rate": 3.562359410934827e-06, "loss": 0.3324, "step": 5085 }, { "epoch": 1.210626524662343, "grad_norm": 0.3806323119990266, "learning_rate": 3.560513321902591e-06, "loss": 0.29, "step": 5086 }, { "epoch": 1.2108645207354078, "grad_norm": 0.3700249289986975, "learning_rate": 3.5586674468136838e-06, "loss": 0.3722, "step": 5087 }, { "epoch": 1.2111025168084726, "grad_norm": 0.3719331028186071, "learning_rate": 3.5568217859424535e-06, "loss": 0.3775, "step": 5088 }, { "epoch": 1.2113405128815375, "grad_norm": 0.36400596249820083, "learning_rate": 3.554976339563209e-06, "loss": 0.3116, "step": 5089 }, { "epoch": 1.2115785089546023, "grad_norm": 0.38994792469616396, "learning_rate": 3.553131107950227e-06, "loss": 0.3026, "step": 5090 }, { "epoch": 1.211816505027667, "grad_norm": 0.3941320652284189, "learning_rate": 3.551286091377757e-06, "loss": 0.3698, "step": 5091 }, { "epoch": 1.2120545011007318, "grad_norm": 0.4046036167301338, "learning_rate": 3.549441290120013e-06, "loss": 0.3097, "step": 5092 }, { "epoch": 1.2122924971737967, "grad_norm": 0.4039353108910549, "learning_rate": 3.547596704451179e-06, "loss": 0.2699, "step": 5093 }, { "epoch": 1.2125304932468615, "grad_norm": 0.4363727370434713, "learning_rate": 3.545752334645405e-06, "loss": 0.3303, "step": 5094 }, { "epoch": 1.2127684893199262, "grad_norm": 0.3840952200590034, "learning_rate": 3.5439081809768103e-06, "loss": 0.3837, "step": 5095 }, { "epoch": 1.213006485392991, "grad_norm": 0.37439745222911935, "learning_rate": 3.5420642437194807e-06, "loss": 0.3031, "step": 5096 }, { "epoch": 1.213244481466056, "grad_norm": 0.40481232665765876, "learning_rate": 3.540220523147474e-06, "loss": 0.3129, "step": 5097 }, { "epoch": 1.2134824775391206, "grad_norm": 0.3706994690208066, "learning_rate": 3.53837701953481e-06, "loss": 0.3574, "step": 5098 }, { "epoch": 1.2137204736121854, "grad_norm": 0.36982293775644515, "learning_rate": 3.53653373315548e-06, "loss": 0.3361, "step": 5099 }, { "epoch": 1.2139584696852501, "grad_norm": 0.44289417728869535, "learning_rate": 3.534690664283441e-06, "loss": 0.2839, "step": 5100 }, { "epoch": 1.214196465758315, "grad_norm": 0.39709885081786933, "learning_rate": 3.5328478131926182e-06, "loss": 0.3385, "step": 5101 }, { "epoch": 1.2144344618313798, "grad_norm": 0.386735844300816, "learning_rate": 3.5310051801569077e-06, "loss": 0.3861, "step": 5102 }, { "epoch": 1.2146724579044446, "grad_norm": 0.38596849207457884, "learning_rate": 3.5291627654501683e-06, "loss": 0.3018, "step": 5103 }, { "epoch": 1.2149104539775093, "grad_norm": 0.3772909341628557, "learning_rate": 3.5273205693462294e-06, "loss": 0.276, "step": 5104 }, { "epoch": 1.2151484500505743, "grad_norm": 0.40038122463925, "learning_rate": 3.5254785921188855e-06, "loss": 0.3544, "step": 5105 }, { "epoch": 1.215386446123639, "grad_norm": 0.394545199953025, "learning_rate": 3.5236368340419015e-06, "loss": 0.3795, "step": 5106 }, { "epoch": 1.2156244421967037, "grad_norm": 0.37930731017600633, "learning_rate": 3.5217952953890065e-06, "loss": 0.2946, "step": 5107 }, { "epoch": 1.2158624382697685, "grad_norm": 0.3801188952039268, "learning_rate": 3.5199539764338995e-06, "loss": 0.3532, "step": 5108 }, { "epoch": 1.2161004343428334, "grad_norm": 0.3760239490498582, "learning_rate": 3.518112877450247e-06, "loss": 0.3925, "step": 5109 }, { "epoch": 1.2163384304158982, "grad_norm": 0.34044640736378307, "learning_rate": 3.51627199871168e-06, "loss": 0.2874, "step": 5110 }, { "epoch": 1.216576426488963, "grad_norm": 0.4167896879648559, "learning_rate": 3.5144313404918e-06, "loss": 0.3038, "step": 5111 }, { "epoch": 1.2168144225620277, "grad_norm": 0.375356242770381, "learning_rate": 3.512590903064175e-06, "loss": 0.3309, "step": 5112 }, { "epoch": 1.2170524186350926, "grad_norm": 0.37061202523878695, "learning_rate": 3.5107506867023377e-06, "loss": 0.373, "step": 5113 }, { "epoch": 1.2172904147081574, "grad_norm": 0.36705161824794263, "learning_rate": 3.508910691679791e-06, "loss": 0.3056, "step": 5114 }, { "epoch": 1.217528410781222, "grad_norm": 0.3939317720079383, "learning_rate": 3.5070709182700007e-06, "loss": 0.3024, "step": 5115 }, { "epoch": 1.2177664068542868, "grad_norm": 0.4054053311852339, "learning_rate": 3.5052313667464075e-06, "loss": 0.3854, "step": 5116 }, { "epoch": 1.2180044029273518, "grad_norm": 0.33993185418916133, "learning_rate": 3.5033920373824125e-06, "loss": 0.3238, "step": 5117 }, { "epoch": 1.2182423990004165, "grad_norm": 0.3434027285290983, "learning_rate": 3.5015529304513845e-06, "loss": 0.2779, "step": 5118 }, { "epoch": 1.2184803950734813, "grad_norm": 0.39857723088957964, "learning_rate": 3.499714046226661e-06, "loss": 0.3658, "step": 5119 }, { "epoch": 1.218718391146546, "grad_norm": 0.3991383073000782, "learning_rate": 3.4978753849815457e-06, "loss": 0.3682, "step": 5120 }, { "epoch": 1.218956387219611, "grad_norm": 0.3685347968394098, "learning_rate": 3.4960369469893087e-06, "loss": 0.2886, "step": 5121 }, { "epoch": 1.2191943832926757, "grad_norm": 0.3833481773800204, "learning_rate": 3.4941987325231873e-06, "loss": 0.3273, "step": 5122 }, { "epoch": 1.2194323793657404, "grad_norm": 0.3827498561469543, "learning_rate": 3.4923607418563855e-06, "loss": 0.3803, "step": 5123 }, { "epoch": 1.2196703754388052, "grad_norm": 0.3628595561239951, "learning_rate": 3.490522975262076e-06, "loss": 0.3226, "step": 5124 }, { "epoch": 1.2199083715118701, "grad_norm": 0.3427380203291989, "learning_rate": 3.4886854330133947e-06, "loss": 0.2499, "step": 5125 }, { "epoch": 1.2201463675849349, "grad_norm": 0.3902097175795296, "learning_rate": 3.4868481153834454e-06, "loss": 0.328, "step": 5126 }, { "epoch": 1.2203843636579996, "grad_norm": 0.3953893414153582, "learning_rate": 3.485011022645301e-06, "loss": 0.4049, "step": 5127 }, { "epoch": 1.2206223597310644, "grad_norm": 0.36238449295552555, "learning_rate": 3.4831741550719964e-06, "loss": 0.3147, "step": 5128 }, { "epoch": 1.2208603558041293, "grad_norm": 0.3978994446444037, "learning_rate": 3.4813375129365357e-06, "loss": 0.2917, "step": 5129 }, { "epoch": 1.221098351877194, "grad_norm": 0.3669821823525373, "learning_rate": 3.4795010965118926e-06, "loss": 0.3574, "step": 5130 }, { "epoch": 1.2213363479502588, "grad_norm": 0.3692786501186213, "learning_rate": 3.477664906071001e-06, "loss": 0.3685, "step": 5131 }, { "epoch": 1.2215743440233235, "grad_norm": 0.3751050152516497, "learning_rate": 3.4758289418867665e-06, "loss": 0.2776, "step": 5132 }, { "epoch": 1.2218123400963885, "grad_norm": 0.3598859162181419, "learning_rate": 3.473993204232056e-06, "loss": 0.3547, "step": 5133 }, { "epoch": 1.2220503361694532, "grad_norm": 0.36721640794062704, "learning_rate": 3.4721576933797072e-06, "loss": 0.3836, "step": 5134 }, { "epoch": 1.222288332242518, "grad_norm": 0.37753613271798336, "learning_rate": 3.470322409602523e-06, "loss": 0.3061, "step": 5135 }, { "epoch": 1.2225263283155827, "grad_norm": 0.36033685671415805, "learning_rate": 3.4684873531732704e-06, "loss": 0.2989, "step": 5136 }, { "epoch": 1.2227643243886477, "grad_norm": 0.4190827209156023, "learning_rate": 3.4666525243646845e-06, "loss": 0.3299, "step": 5137 }, { "epoch": 1.2230023204617124, "grad_norm": 0.41563040679604657, "learning_rate": 3.464817923449467e-06, "loss": 0.3757, "step": 5138 }, { "epoch": 1.2232403165347772, "grad_norm": 0.4017327361796809, "learning_rate": 3.4629835507002853e-06, "loss": 0.2852, "step": 5139 }, { "epoch": 1.223478312607842, "grad_norm": 0.39000305457626766, "learning_rate": 3.461149406389771e-06, "loss": 0.306, "step": 5140 }, { "epoch": 1.2237163086809069, "grad_norm": 0.3870106967773287, "learning_rate": 3.4593154907905246e-06, "loss": 0.3645, "step": 5141 }, { "epoch": 1.2239543047539716, "grad_norm": 0.38791895997961046, "learning_rate": 3.4574818041751113e-06, "loss": 0.3239, "step": 5142 }, { "epoch": 1.2241923008270363, "grad_norm": 0.3959723469109777, "learning_rate": 3.45564834681606e-06, "loss": 0.2933, "step": 5143 }, { "epoch": 1.224430296900101, "grad_norm": 0.36905913364646237, "learning_rate": 3.4538151189858717e-06, "loss": 0.3453, "step": 5144 }, { "epoch": 1.224668292973166, "grad_norm": 0.3837400246280125, "learning_rate": 3.451982120957007e-06, "loss": 0.3988, "step": 5145 }, { "epoch": 1.2249062890462308, "grad_norm": 0.35298513550582294, "learning_rate": 3.450149353001896e-06, "loss": 0.3025, "step": 5146 }, { "epoch": 1.2251442851192955, "grad_norm": 0.372652851908798, "learning_rate": 3.4483168153929324e-06, "loss": 0.2998, "step": 5147 }, { "epoch": 1.2253822811923603, "grad_norm": 0.3571143797030416, "learning_rate": 3.4464845084024767e-06, "loss": 0.3579, "step": 5148 }, { "epoch": 1.2256202772654252, "grad_norm": 0.3669338824803904, "learning_rate": 3.444652432302855e-06, "loss": 0.3227, "step": 5149 }, { "epoch": 1.22585827333849, "grad_norm": 0.38715345328480644, "learning_rate": 3.4428205873663584e-06, "loss": 0.2854, "step": 5150 }, { "epoch": 1.2260962694115547, "grad_norm": 0.3667959879928667, "learning_rate": 3.440988973865246e-06, "loss": 0.3279, "step": 5151 }, { "epoch": 1.2263342654846194, "grad_norm": 0.39136089925706374, "learning_rate": 3.4391575920717407e-06, "loss": 0.4016, "step": 5152 }, { "epoch": 1.2265722615576844, "grad_norm": 0.38499224408384597, "learning_rate": 3.4373264422580305e-06, "loss": 0.2915, "step": 5153 }, { "epoch": 1.2268102576307491, "grad_norm": 0.3893740742863143, "learning_rate": 3.4354955246962694e-06, "loss": 0.2866, "step": 5154 }, { "epoch": 1.2270482537038139, "grad_norm": 0.38314543939198864, "learning_rate": 3.4336648396585777e-06, "loss": 0.3695, "step": 5155 }, { "epoch": 1.2272862497768786, "grad_norm": 0.3553545241693441, "learning_rate": 3.4318343874170378e-06, "loss": 0.3653, "step": 5156 }, { "epoch": 1.2275242458499434, "grad_norm": 0.3721824699789484, "learning_rate": 3.4300041682437046e-06, "loss": 0.3055, "step": 5157 }, { "epoch": 1.2277622419230083, "grad_norm": 0.34746845955767125, "learning_rate": 3.428174182410592e-06, "loss": 0.3031, "step": 5158 }, { "epoch": 1.228000237996073, "grad_norm": 0.39404533678367354, "learning_rate": 3.4263444301896805e-06, "loss": 0.3667, "step": 5159 }, { "epoch": 1.2282382340691378, "grad_norm": 0.3725519922822452, "learning_rate": 3.424514911852917e-06, "loss": 0.2965, "step": 5160 }, { "epoch": 1.2284762301422028, "grad_norm": 0.38747366594584076, "learning_rate": 3.4226856276722133e-06, "loss": 0.2872, "step": 5161 }, { "epoch": 1.2287142262152675, "grad_norm": 0.38316424613293093, "learning_rate": 3.4208565779194467e-06, "loss": 0.3519, "step": 5162 }, { "epoch": 1.2289522222883322, "grad_norm": 0.38446797539419886, "learning_rate": 3.4190277628664583e-06, "loss": 0.3526, "step": 5163 }, { "epoch": 1.229190218361397, "grad_norm": 0.3794454331861941, "learning_rate": 3.417199182785055e-06, "loss": 0.2841, "step": 5164 }, { "epoch": 1.2294282144344617, "grad_norm": 0.3878874853038905, "learning_rate": 3.4153708379470107e-06, "loss": 0.3286, "step": 5165 }, { "epoch": 1.2296662105075267, "grad_norm": 0.40528158013767834, "learning_rate": 3.4135427286240613e-06, "loss": 0.3699, "step": 5166 }, { "epoch": 1.2299042065805914, "grad_norm": 0.3725084616775476, "learning_rate": 3.41171485508791e-06, "loss": 0.3351, "step": 5167 }, { "epoch": 1.2301422026536561, "grad_norm": 0.3539613656037255, "learning_rate": 3.409887217610223e-06, "loss": 0.2858, "step": 5168 }, { "epoch": 1.230380198726721, "grad_norm": 0.3923246643319525, "learning_rate": 3.4080598164626333e-06, "loss": 0.3168, "step": 5169 }, { "epoch": 1.2306181947997858, "grad_norm": 0.3783741339958614, "learning_rate": 3.4062326519167354e-06, "loss": 0.3958, "step": 5170 }, { "epoch": 1.2308561908728506, "grad_norm": 0.34812779553971557, "learning_rate": 3.4044057242440954e-06, "loss": 0.2827, "step": 5171 }, { "epoch": 1.2310941869459153, "grad_norm": 0.3862300590706353, "learning_rate": 3.402579033716238e-06, "loss": 0.3068, "step": 5172 }, { "epoch": 1.23133218301898, "grad_norm": 0.41044439207246175, "learning_rate": 3.400752580604655e-06, "loss": 0.3579, "step": 5173 }, { "epoch": 1.231570179092045, "grad_norm": 0.35576711138677686, "learning_rate": 3.3989263651808013e-06, "loss": 0.3522, "step": 5174 }, { "epoch": 1.2318081751651098, "grad_norm": 0.37907648138016425, "learning_rate": 3.397100387716098e-06, "loss": 0.2591, "step": 5175 }, { "epoch": 1.2320461712381745, "grad_norm": 0.4047380890985508, "learning_rate": 3.395274648481932e-06, "loss": 0.3778, "step": 5176 }, { "epoch": 1.2322841673112395, "grad_norm": 0.3767213089270945, "learning_rate": 3.39344914774965e-06, "loss": 0.3612, "step": 5177 }, { "epoch": 1.2325221633843042, "grad_norm": 0.3618213335927789, "learning_rate": 3.391623885790571e-06, "loss": 0.2956, "step": 5178 }, { "epoch": 1.232760159457369, "grad_norm": 0.37715805015205855, "learning_rate": 3.3897988628759714e-06, "loss": 0.3076, "step": 5179 }, { "epoch": 1.2329981555304337, "grad_norm": 0.36354382791582407, "learning_rate": 3.387974079277095e-06, "loss": 0.3843, "step": 5180 }, { "epoch": 1.2332361516034984, "grad_norm": 0.3828085727627118, "learning_rate": 3.3861495352651504e-06, "loss": 0.3464, "step": 5181 }, { "epoch": 1.2334741476765634, "grad_norm": 0.40137098436265456, "learning_rate": 3.3843252311113095e-06, "loss": 0.2923, "step": 5182 }, { "epoch": 1.2337121437496281, "grad_norm": 0.4180250525495085, "learning_rate": 3.3825011670867086e-06, "loss": 0.3647, "step": 5183 }, { "epoch": 1.2339501398226929, "grad_norm": 0.3742865593493896, "learning_rate": 3.3806773434624475e-06, "loss": 0.3939, "step": 5184 }, { "epoch": 1.2341881358957578, "grad_norm": 0.3799380469090617, "learning_rate": 3.3788537605095957e-06, "loss": 0.3187, "step": 5185 }, { "epoch": 1.2344261319688226, "grad_norm": 0.3845177336003192, "learning_rate": 3.37703041849918e-06, "loss": 0.2822, "step": 5186 }, { "epoch": 1.2346641280418873, "grad_norm": 0.38597150172129496, "learning_rate": 3.375207317702194e-06, "loss": 0.3609, "step": 5187 }, { "epoch": 1.234902124114952, "grad_norm": 0.3806826446535519, "learning_rate": 3.373384458389597e-06, "loss": 0.3542, "step": 5188 }, { "epoch": 1.2351401201880168, "grad_norm": 0.3664699692758197, "learning_rate": 3.371561840832309e-06, "loss": 0.2939, "step": 5189 }, { "epoch": 1.2353781162610817, "grad_norm": 0.3855337340082918, "learning_rate": 3.3697394653012185e-06, "loss": 0.3241, "step": 5190 }, { "epoch": 1.2356161123341465, "grad_norm": 0.3949003682130631, "learning_rate": 3.367917332067172e-06, "loss": 0.3747, "step": 5191 }, { "epoch": 1.2358541084072112, "grad_norm": 0.3916469796647736, "learning_rate": 3.3660954414009872e-06, "loss": 0.2907, "step": 5192 }, { "epoch": 1.2360921044802762, "grad_norm": 0.41103529668836525, "learning_rate": 3.3642737935734403e-06, "loss": 0.266, "step": 5193 }, { "epoch": 1.236330100553341, "grad_norm": 0.4135491847771145, "learning_rate": 3.3624523888552734e-06, "loss": 0.3251, "step": 5194 }, { "epoch": 1.2365680966264057, "grad_norm": 0.41366473528894604, "learning_rate": 3.3606312275171928e-06, "loss": 0.3663, "step": 5195 }, { "epoch": 1.2368060926994704, "grad_norm": 0.3584651473821333, "learning_rate": 3.358810309829868e-06, "loss": 0.2973, "step": 5196 }, { "epoch": 1.2370440887725351, "grad_norm": 0.44390275619305986, "learning_rate": 3.356989636063932e-06, "loss": 0.3176, "step": 5197 }, { "epoch": 1.2372820848456, "grad_norm": 0.408274658661175, "learning_rate": 3.3551692064899806e-06, "loss": 0.388, "step": 5198 }, { "epoch": 1.2375200809186648, "grad_norm": 0.3676982361906048, "learning_rate": 3.353349021378578e-06, "loss": 0.3239, "step": 5199 }, { "epoch": 1.2377580769917296, "grad_norm": 0.38263348645456147, "learning_rate": 3.3515290810002464e-06, "loss": 0.3018, "step": 5200 }, { "epoch": 1.2379960730647945, "grad_norm": 0.37234902877710807, "learning_rate": 3.3497093856254757e-06, "loss": 0.3607, "step": 5201 }, { "epoch": 1.2382340691378593, "grad_norm": 0.396040248592026, "learning_rate": 3.347889935524716e-06, "loss": 0.4182, "step": 5202 }, { "epoch": 1.238472065210924, "grad_norm": 0.3999889104147471, "learning_rate": 3.3460707309683826e-06, "loss": 0.304, "step": 5203 }, { "epoch": 1.2387100612839888, "grad_norm": 0.40354368811724745, "learning_rate": 3.3442517722268543e-06, "loss": 0.2784, "step": 5204 }, { "epoch": 1.2389480573570535, "grad_norm": 0.4088049571035965, "learning_rate": 3.342433059570475e-06, "loss": 0.3927, "step": 5205 }, { "epoch": 1.2391860534301185, "grad_norm": 0.3784733136722843, "learning_rate": 3.340614593269549e-06, "loss": 0.3523, "step": 5206 }, { "epoch": 1.2394240495031832, "grad_norm": 0.6754063547956777, "learning_rate": 3.338796373594346e-06, "loss": 0.2846, "step": 5207 }, { "epoch": 1.239662045576248, "grad_norm": 0.37479104141833847, "learning_rate": 3.336978400815098e-06, "loss": 0.3278, "step": 5208 }, { "epoch": 1.239900041649313, "grad_norm": 0.40369056554447125, "learning_rate": 3.3351606752020004e-06, "loss": 0.4141, "step": 5209 }, { "epoch": 1.2401380377223776, "grad_norm": 0.3592088638519854, "learning_rate": 3.333343197025213e-06, "loss": 0.3133, "step": 5210 }, { "epoch": 1.2403760337954424, "grad_norm": 0.4150892541339133, "learning_rate": 3.3315259665548554e-06, "loss": 0.2712, "step": 5211 }, { "epoch": 1.240614029868507, "grad_norm": 0.3717429857265201, "learning_rate": 3.3297089840610173e-06, "loss": 0.3615, "step": 5212 }, { "epoch": 1.2408520259415718, "grad_norm": 0.37643019244681736, "learning_rate": 3.3278922498137455e-06, "loss": 0.376, "step": 5213 }, { "epoch": 1.2410900220146368, "grad_norm": 0.3735812988579749, "learning_rate": 3.326075764083051e-06, "loss": 0.2832, "step": 5214 }, { "epoch": 1.2413280180877015, "grad_norm": 0.4247489715555179, "learning_rate": 3.3242595271389087e-06, "loss": 0.3459, "step": 5215 }, { "epoch": 1.2415660141607663, "grad_norm": 0.3782694036825223, "learning_rate": 3.3224435392512565e-06, "loss": 0.3574, "step": 5216 }, { "epoch": 1.2418040102338312, "grad_norm": 0.3593379490467785, "learning_rate": 3.320627800689996e-06, "loss": 0.3358, "step": 5217 }, { "epoch": 1.242042006306896, "grad_norm": 0.38439219808924074, "learning_rate": 3.3188123117249884e-06, "loss": 0.2689, "step": 5218 }, { "epoch": 1.2422800023799607, "grad_norm": 0.40058679929549906, "learning_rate": 3.3169970726260625e-06, "loss": 0.3514, "step": 5219 }, { "epoch": 1.2425179984530255, "grad_norm": 0.3765460507646511, "learning_rate": 3.3151820836630074e-06, "loss": 0.3873, "step": 5220 }, { "epoch": 1.2427559945260902, "grad_norm": 0.4315150126863899, "learning_rate": 3.313367345105575e-06, "loss": 0.274, "step": 5221 }, { "epoch": 1.2429939905991552, "grad_norm": 0.3785008936826988, "learning_rate": 3.31155285722348e-06, "loss": 0.3163, "step": 5222 }, { "epoch": 1.24323198667222, "grad_norm": 0.3967888293663607, "learning_rate": 3.309738620286401e-06, "loss": 0.3529, "step": 5223 }, { "epoch": 1.2434699827452846, "grad_norm": 0.3861441173261868, "learning_rate": 3.307924634563978e-06, "loss": 0.3399, "step": 5224 }, { "epoch": 1.2437079788183496, "grad_norm": 0.3750655532874109, "learning_rate": 3.306110900325813e-06, "loss": 0.3173, "step": 5225 }, { "epoch": 1.2439459748914143, "grad_norm": 0.3925378776235632, "learning_rate": 3.304297417841474e-06, "loss": 0.3065, "step": 5226 }, { "epoch": 1.244183970964479, "grad_norm": 0.39518178594914705, "learning_rate": 3.3024841873804885e-06, "loss": 0.3659, "step": 5227 }, { "epoch": 1.2444219670375438, "grad_norm": 0.37902705321028035, "learning_rate": 3.300671209212347e-06, "loss": 0.3182, "step": 5228 }, { "epoch": 1.2446599631106086, "grad_norm": 0.3720334846385885, "learning_rate": 3.298858483606504e-06, "loss": 0.2705, "step": 5229 }, { "epoch": 1.2448979591836735, "grad_norm": 0.3733999529266795, "learning_rate": 3.2970460108323744e-06, "loss": 0.3387, "step": 5230 }, { "epoch": 1.2451359552567383, "grad_norm": 0.3772749257271583, "learning_rate": 3.295233791159336e-06, "loss": 0.3739, "step": 5231 }, { "epoch": 1.245373951329803, "grad_norm": 0.3695783892022936, "learning_rate": 3.2934218248567294e-06, "loss": 0.283, "step": 5232 }, { "epoch": 1.245611947402868, "grad_norm": 0.4014905086469289, "learning_rate": 3.29161011219386e-06, "loss": 0.3269, "step": 5233 }, { "epoch": 1.2458499434759327, "grad_norm": 0.4628150231356265, "learning_rate": 3.2897986534399908e-06, "loss": 0.39, "step": 5234 }, { "epoch": 1.2460879395489974, "grad_norm": 0.35110622672123937, "learning_rate": 3.2879874488643504e-06, "loss": 0.3049, "step": 5235 }, { "epoch": 1.2463259356220622, "grad_norm": 0.39624400624138423, "learning_rate": 3.2861764987361276e-06, "loss": 0.2845, "step": 5236 }, { "epoch": 1.246563931695127, "grad_norm": 0.3548580217470856, "learning_rate": 3.284365803324476e-06, "loss": 0.3565, "step": 5237 }, { "epoch": 1.2468019277681919, "grad_norm": 0.3795495986882351, "learning_rate": 3.2825553628985078e-06, "loss": 0.3632, "step": 5238 }, { "epoch": 1.2470399238412566, "grad_norm": 0.4043537341500232, "learning_rate": 3.280745177727299e-06, "loss": 0.3075, "step": 5239 }, { "epoch": 1.2472779199143214, "grad_norm": 0.40712561384193874, "learning_rate": 3.278935248079891e-06, "loss": 0.2944, "step": 5240 }, { "epoch": 1.2475159159873863, "grad_norm": 0.3557393968961097, "learning_rate": 3.2771255742252817e-06, "loss": 0.3703, "step": 5241 }, { "epoch": 1.247753912060451, "grad_norm": 0.360568352135061, "learning_rate": 3.2753161564324344e-06, "loss": 0.3443, "step": 5242 }, { "epoch": 1.2479919081335158, "grad_norm": 0.3773441969979045, "learning_rate": 3.2735069949702723e-06, "loss": 0.3008, "step": 5243 }, { "epoch": 1.2482299042065805, "grad_norm": 0.35789698002368536, "learning_rate": 3.271698090107682e-06, "loss": 0.3428, "step": 5244 }, { "epoch": 1.2484679002796453, "grad_norm": 0.3799994603077194, "learning_rate": 3.269889442113512e-06, "loss": 0.3857, "step": 5245 }, { "epoch": 1.2487058963527102, "grad_norm": 0.3732392024366841, "learning_rate": 3.2680810512565714e-06, "loss": 0.2917, "step": 5246 }, { "epoch": 1.248943892425775, "grad_norm": 0.40137116851946253, "learning_rate": 3.266272917805633e-06, "loss": 0.273, "step": 5247 }, { "epoch": 1.2491818884988397, "grad_norm": 0.36249398408452443, "learning_rate": 3.2644650420294288e-06, "loss": 0.3821, "step": 5248 }, { "epoch": 1.2494198845719047, "grad_norm": 0.38050458261938763, "learning_rate": 3.2626574241966546e-06, "loss": 0.3256, "step": 5249 }, { "epoch": 1.2496578806449694, "grad_norm": 0.3726179289456404, "learning_rate": 3.2608500645759673e-06, "loss": 0.2807, "step": 5250 }, { "epoch": 1.2498958767180341, "grad_norm": 0.35938712373080584, "learning_rate": 3.2590429634359845e-06, "loss": 0.3395, "step": 5251 }, { "epoch": 1.2501338727910989, "grad_norm": 0.4040569210815416, "learning_rate": 3.257236121045286e-06, "loss": 0.386, "step": 5252 }, { "epoch": 1.2503718688641636, "grad_norm": 0.3844777853293955, "learning_rate": 3.2554295376724134e-06, "loss": 0.2991, "step": 5253 }, { "epoch": 1.2506098649372286, "grad_norm": 0.4040805562877999, "learning_rate": 3.253623213585871e-06, "loss": 0.2617, "step": 5254 }, { "epoch": 1.2508478610102933, "grad_norm": 0.3728383275268288, "learning_rate": 3.2518171490541222e-06, "loss": 0.3587, "step": 5255 }, { "epoch": 1.251085857083358, "grad_norm": 0.37161586537379476, "learning_rate": 3.250011344345594e-06, "loss": 0.351, "step": 5256 }, { "epoch": 1.251323853156423, "grad_norm": 0.3923833529850557, "learning_rate": 3.2482057997286716e-06, "loss": 0.2907, "step": 5257 }, { "epoch": 1.2515618492294878, "grad_norm": 0.46418423651277113, "learning_rate": 3.2464005154717036e-06, "loss": 0.3159, "step": 5258 }, { "epoch": 1.2517998453025525, "grad_norm": 0.391141738921714, "learning_rate": 3.244595491843003e-06, "loss": 0.3681, "step": 5259 }, { "epoch": 1.2520378413756172, "grad_norm": 0.3998036064404465, "learning_rate": 3.2427907291108383e-06, "loss": 0.3235, "step": 5260 }, { "epoch": 1.252275837448682, "grad_norm": 0.3895934764387763, "learning_rate": 3.2409862275434425e-06, "loss": 0.2746, "step": 5261 }, { "epoch": 1.252513833521747, "grad_norm": 0.3704415894740105, "learning_rate": 3.239181987409009e-06, "loss": 0.3457, "step": 5262 }, { "epoch": 1.2527518295948117, "grad_norm": 0.37132868730200563, "learning_rate": 3.2373780089756934e-06, "loss": 0.3927, "step": 5263 }, { "epoch": 1.2529898256678764, "grad_norm": 0.3434397334645843, "learning_rate": 3.2355742925116103e-06, "loss": 0.2817, "step": 5264 }, { "epoch": 1.2532278217409414, "grad_norm": 0.4033979941592838, "learning_rate": 3.233770838284837e-06, "loss": 0.3391, "step": 5265 }, { "epoch": 1.2534658178140061, "grad_norm": 0.4863421240520152, "learning_rate": 3.231967646563412e-06, "loss": 0.3836, "step": 5266 }, { "epoch": 1.2537038138870709, "grad_norm": 0.3863926541257452, "learning_rate": 3.230164717615331e-06, "loss": 0.3292, "step": 5267 }, { "epoch": 1.2539418099601356, "grad_norm": 0.35788876853240303, "learning_rate": 3.228362051708559e-06, "loss": 0.2755, "step": 5268 }, { "epoch": 1.2541798060332003, "grad_norm": 0.3946622156577876, "learning_rate": 3.2265596491110145e-06, "loss": 0.3349, "step": 5269 }, { "epoch": 1.2544178021062653, "grad_norm": 0.3862161899983044, "learning_rate": 3.2247575100905794e-06, "loss": 0.4002, "step": 5270 }, { "epoch": 1.25465579817933, "grad_norm": 0.3970524252747173, "learning_rate": 3.2229556349150947e-06, "loss": 0.2879, "step": 5271 }, { "epoch": 1.2548937942523948, "grad_norm": 0.38366178715626265, "learning_rate": 3.221154023852364e-06, "loss": 0.3337, "step": 5272 }, { "epoch": 1.2551317903254597, "grad_norm": 0.37924724516016045, "learning_rate": 3.2193526771701526e-06, "loss": 0.3732, "step": 5273 }, { "epoch": 1.2553697863985245, "grad_norm": 0.3720944559532506, "learning_rate": 3.2175515951361844e-06, "loss": 0.3384, "step": 5274 }, { "epoch": 1.2556077824715892, "grad_norm": 0.37916136226673736, "learning_rate": 3.2157507780181457e-06, "loss": 0.2804, "step": 5275 }, { "epoch": 1.255845778544654, "grad_norm": 0.37290254799955885, "learning_rate": 3.2139502260836806e-06, "loss": 0.3394, "step": 5276 }, { "epoch": 1.2560837746177187, "grad_norm": 0.4120937160799173, "learning_rate": 3.2121499396003974e-06, "loss": 0.3896, "step": 5277 }, { "epoch": 1.2563217706907837, "grad_norm": 0.3631101334404687, "learning_rate": 3.2103499188358623e-06, "loss": 0.3017, "step": 5278 }, { "epoch": 1.2565597667638484, "grad_norm": 0.38219111111630644, "learning_rate": 3.2085501640576035e-06, "loss": 0.2705, "step": 5279 }, { "epoch": 1.2567977628369131, "grad_norm": 0.4099100476999923, "learning_rate": 3.206750675533106e-06, "loss": 0.368, "step": 5280 }, { "epoch": 1.257035758909978, "grad_norm": 0.37534175826439325, "learning_rate": 3.2049514535298227e-06, "loss": 0.333, "step": 5281 }, { "epoch": 1.2572737549830428, "grad_norm": 0.3958938531308809, "learning_rate": 3.2031524983151606e-06, "loss": 0.3224, "step": 5282 }, { "epoch": 1.2575117510561076, "grad_norm": 0.3755193173874838, "learning_rate": 3.20135381015649e-06, "loss": 0.3333, "step": 5283 }, { "epoch": 1.2577497471291723, "grad_norm": 0.405190126827234, "learning_rate": 3.1995553893211395e-06, "loss": 0.3538, "step": 5284 }, { "epoch": 1.257987743202237, "grad_norm": 0.3554947984040851, "learning_rate": 3.1977572360763977e-06, "loss": 0.3086, "step": 5285 }, { "epoch": 1.258225739275302, "grad_norm": 0.36944294131448824, "learning_rate": 3.195959350689517e-06, "loss": 0.3169, "step": 5286 }, { "epoch": 1.2584637353483668, "grad_norm": 0.38302961275351355, "learning_rate": 3.1941617334277066e-06, "loss": 0.3534, "step": 5287 }, { "epoch": 1.2587017314214315, "grad_norm": 0.34830251003999074, "learning_rate": 3.1923643845581364e-06, "loss": 0.3458, "step": 5288 }, { "epoch": 1.2589397274944965, "grad_norm": 0.4007008524746251, "learning_rate": 3.190567304347938e-06, "loss": 0.2737, "step": 5289 }, { "epoch": 1.2591777235675612, "grad_norm": 0.3743627078891674, "learning_rate": 3.1887704930642016e-06, "loss": 0.3119, "step": 5290 }, { "epoch": 1.259415719640626, "grad_norm": 0.39475666902200535, "learning_rate": 3.1869739509739772e-06, "loss": 0.3684, "step": 5291 }, { "epoch": 1.2596537157136907, "grad_norm": 0.3507214620255833, "learning_rate": 3.1851776783442754e-06, "loss": 0.3523, "step": 5292 }, { "epoch": 1.2598917117867554, "grad_norm": 0.4174143666004117, "learning_rate": 3.1833816754420678e-06, "loss": 0.2806, "step": 5293 }, { "epoch": 1.2601297078598204, "grad_norm": 0.3626773824316747, "learning_rate": 3.181585942534282e-06, "loss": 0.3449, "step": 5294 }, { "epoch": 1.260367703932885, "grad_norm": 0.3746333463473584, "learning_rate": 3.179790479887812e-06, "loss": 0.3698, "step": 5295 }, { "epoch": 1.2606057000059498, "grad_norm": 0.39419946032729236, "learning_rate": 3.1779952877695077e-06, "loss": 0.2622, "step": 5296 }, { "epoch": 1.2608436960790148, "grad_norm": 0.3665818295311167, "learning_rate": 3.176200366446177e-06, "loss": 0.2874, "step": 5297 }, { "epoch": 1.2610816921520795, "grad_norm": 0.41342588700331717, "learning_rate": 3.174405716184591e-06, "loss": 0.3516, "step": 5298 }, { "epoch": 1.2613196882251443, "grad_norm": 0.35359012506797194, "learning_rate": 3.1726113372514767e-06, "loss": 0.3293, "step": 5299 }, { "epoch": 1.261557684298209, "grad_norm": 0.40608712887436516, "learning_rate": 3.1708172299135266e-06, "loss": 0.2651, "step": 5300 }, { "epoch": 1.2617956803712738, "grad_norm": 0.39078363662670146, "learning_rate": 3.1690233944373883e-06, "loss": 0.3475, "step": 5301 }, { "epoch": 1.2620336764443387, "grad_norm": 0.39442057764959976, "learning_rate": 3.1672298310896693e-06, "loss": 0.448, "step": 5302 }, { "epoch": 1.2622716725174035, "grad_norm": 0.3870252703040663, "learning_rate": 3.165436540136938e-06, "loss": 0.3393, "step": 5303 }, { "epoch": 1.2625096685904682, "grad_norm": 0.35385684836054004, "learning_rate": 3.1636435218457216e-06, "loss": 0.2895, "step": 5304 }, { "epoch": 1.2627476646635332, "grad_norm": 0.38830448667646283, "learning_rate": 3.161850776482508e-06, "loss": 0.3622, "step": 5305 }, { "epoch": 1.262985660736598, "grad_norm": 0.3724314662342735, "learning_rate": 3.160058304313742e-06, "loss": 0.3318, "step": 5306 }, { "epoch": 1.2632236568096626, "grad_norm": 0.4014989473946267, "learning_rate": 3.1582661056058294e-06, "loss": 0.2739, "step": 5307 }, { "epoch": 1.2634616528827274, "grad_norm": 0.37792045811506125, "learning_rate": 3.156474180625134e-06, "loss": 0.3098, "step": 5308 }, { "epoch": 1.2636996489557921, "grad_norm": 0.3923195999280906, "learning_rate": 3.1546825296379845e-06, "loss": 0.3802, "step": 5309 }, { "epoch": 1.263937645028857, "grad_norm": 0.36757724848018525, "learning_rate": 3.152891152910662e-06, "loss": 0.3026, "step": 5310 }, { "epoch": 1.2641756411019218, "grad_norm": 0.3689543106637064, "learning_rate": 3.1511000507094097e-06, "loss": 0.3073, "step": 5311 }, { "epoch": 1.2644136371749866, "grad_norm": 0.36026728365035765, "learning_rate": 3.149309223300428e-06, "loss": 0.3464, "step": 5312 }, { "epoch": 1.2646516332480515, "grad_norm": 0.3666885097703029, "learning_rate": 3.1475186709498806e-06, "loss": 0.397, "step": 5313 }, { "epoch": 1.2648896293211163, "grad_norm": 0.35535524090766163, "learning_rate": 3.145728393923887e-06, "loss": 0.2911, "step": 5314 }, { "epoch": 1.265127625394181, "grad_norm": 0.3699552572065724, "learning_rate": 3.143938392488527e-06, "loss": 0.3085, "step": 5315 }, { "epoch": 1.2653656214672457, "grad_norm": 0.371881166524415, "learning_rate": 3.1421486669098376e-06, "loss": 0.3925, "step": 5316 }, { "epoch": 1.2656036175403105, "grad_norm": 0.3979148251780867, "learning_rate": 3.1403592174538176e-06, "loss": 0.3259, "step": 5317 }, { "epoch": 1.2658416136133754, "grad_norm": 0.35728138026407263, "learning_rate": 3.138570044386424e-06, "loss": 0.2795, "step": 5318 }, { "epoch": 1.2660796096864402, "grad_norm": 0.40000777660528697, "learning_rate": 3.136781147973571e-06, "loss": 0.318, "step": 5319 }, { "epoch": 1.266317605759505, "grad_norm": 0.37847837862392103, "learning_rate": 3.134992528481133e-06, "loss": 0.3983, "step": 5320 }, { "epoch": 1.2665556018325699, "grad_norm": 0.36014200615492253, "learning_rate": 3.133204186174942e-06, "loss": 0.3218, "step": 5321 }, { "epoch": 1.2667935979056346, "grad_norm": 0.3823696624234763, "learning_rate": 3.131416121320792e-06, "loss": 0.2918, "step": 5322 }, { "epoch": 1.2670315939786994, "grad_norm": 0.3722726581374576, "learning_rate": 3.129628334184433e-06, "loss": 0.3727, "step": 5323 }, { "epoch": 1.267269590051764, "grad_norm": 0.3659346216629232, "learning_rate": 3.127840825031575e-06, "loss": 0.3274, "step": 5324 }, { "epoch": 1.2675075861248288, "grad_norm": 0.38942693904105447, "learning_rate": 3.126053594127885e-06, "loss": 0.3065, "step": 5325 }, { "epoch": 1.2677455821978938, "grad_norm": 0.37772681354730653, "learning_rate": 3.1242666417389896e-06, "loss": 0.3058, "step": 5326 }, { "epoch": 1.2679835782709585, "grad_norm": 0.38030394224311925, "learning_rate": 3.122479968130475e-06, "loss": 0.3953, "step": 5327 }, { "epoch": 1.2682215743440233, "grad_norm": 0.38391257852782784, "learning_rate": 3.1206935735678855e-06, "loss": 0.2879, "step": 5328 }, { "epoch": 1.2684595704170882, "grad_norm": 0.3731989200180512, "learning_rate": 3.118907458316722e-06, "loss": 0.2993, "step": 5329 }, { "epoch": 1.268697566490153, "grad_norm": 0.6492017938323883, "learning_rate": 3.1171216226424466e-06, "loss": 0.359, "step": 5330 }, { "epoch": 1.2689355625632177, "grad_norm": 0.389677366563528, "learning_rate": 3.115336066810478e-06, "loss": 0.3899, "step": 5331 }, { "epoch": 1.2691735586362825, "grad_norm": 0.38739411432567555, "learning_rate": 3.113550791086195e-06, "loss": 0.2738, "step": 5332 }, { "epoch": 1.2694115547093472, "grad_norm": 0.3697993858168483, "learning_rate": 3.1117657957349325e-06, "loss": 0.3336, "step": 5333 }, { "epoch": 1.2696495507824122, "grad_norm": 0.3851139381884033, "learning_rate": 3.109981081021985e-06, "loss": 0.4039, "step": 5334 }, { "epoch": 1.269887546855477, "grad_norm": 0.3663302911358832, "learning_rate": 3.108196647212605e-06, "loss": 0.3209, "step": 5335 }, { "epoch": 1.2701255429285416, "grad_norm": 0.41294325410097843, "learning_rate": 3.106412494572004e-06, "loss": 0.2828, "step": 5336 }, { "epoch": 1.2703635390016066, "grad_norm": 0.35880515050491946, "learning_rate": 3.104628623365352e-06, "loss": 0.3626, "step": 5337 }, { "epoch": 1.2706015350746713, "grad_norm": 0.4004043215019132, "learning_rate": 3.1028450338577767e-06, "loss": 0.353, "step": 5338 }, { "epoch": 1.270839531147736, "grad_norm": 0.3743970698217683, "learning_rate": 3.1010617263143616e-06, "loss": 0.2735, "step": 5339 }, { "epoch": 1.2710775272208008, "grad_norm": 0.4001724562613586, "learning_rate": 3.099278701000152e-06, "loss": 0.3294, "step": 5340 }, { "epoch": 1.2713155232938655, "grad_norm": 0.3606181579759712, "learning_rate": 3.097495958180149e-06, "loss": 0.3885, "step": 5341 }, { "epoch": 1.2715535193669305, "grad_norm": 0.3757180444725599, "learning_rate": 3.0957134981193125e-06, "loss": 0.2869, "step": 5342 }, { "epoch": 1.2717915154399952, "grad_norm": 0.37476881387920113, "learning_rate": 3.09393132108256e-06, "loss": 0.2862, "step": 5343 }, { "epoch": 1.27202951151306, "grad_norm": 0.36713464862325923, "learning_rate": 3.0921494273347676e-06, "loss": 0.3074, "step": 5344 }, { "epoch": 1.272267507586125, "grad_norm": 0.3747964323798518, "learning_rate": 3.090367817140767e-06, "loss": 0.4127, "step": 5345 }, { "epoch": 1.2725055036591897, "grad_norm": 0.39120167273162276, "learning_rate": 3.0885864907653517e-06, "loss": 0.34, "step": 5346 }, { "epoch": 1.2727434997322544, "grad_norm": 0.3851975033032519, "learning_rate": 3.0868054484732696e-06, "loss": 0.3095, "step": 5347 }, { "epoch": 1.2729814958053192, "grad_norm": 0.4511302802067385, "learning_rate": 3.085024690529227e-06, "loss": 0.3931, "step": 5348 }, { "epoch": 1.273219491878384, "grad_norm": 0.41851016514587525, "learning_rate": 3.0832442171978895e-06, "loss": 0.3374, "step": 5349 }, { "epoch": 1.2734574879514489, "grad_norm": 0.3763563477399065, "learning_rate": 3.0814640287438813e-06, "loss": 0.2577, "step": 5350 }, { "epoch": 1.2736954840245136, "grad_norm": 0.39491317152340816, "learning_rate": 3.07968412543178e-06, "loss": 0.3377, "step": 5351 }, { "epoch": 1.2739334800975783, "grad_norm": 0.42386611214893466, "learning_rate": 3.0779045075261237e-06, "loss": 0.3715, "step": 5352 }, { "epoch": 1.2741714761706433, "grad_norm": 0.35087910158459273, "learning_rate": 3.0761251752914077e-06, "loss": 0.3094, "step": 5353 }, { "epoch": 1.274409472243708, "grad_norm": 0.347152988488092, "learning_rate": 3.074346128992086e-06, "loss": 0.2898, "step": 5354 }, { "epoch": 1.2746474683167728, "grad_norm": 0.37229430513448064, "learning_rate": 3.072567368892567e-06, "loss": 0.3509, "step": 5355 }, { "epoch": 1.2748854643898375, "grad_norm": 0.38027963988847857, "learning_rate": 3.0707888952572206e-06, "loss": 0.3587, "step": 5356 }, { "epoch": 1.2751234604629023, "grad_norm": 0.3518662269310278, "learning_rate": 3.06901070835037e-06, "loss": 0.2739, "step": 5357 }, { "epoch": 1.2753614565359672, "grad_norm": 0.371700159550814, "learning_rate": 3.067232808436299e-06, "loss": 0.3261, "step": 5358 }, { "epoch": 1.275599452609032, "grad_norm": 0.36480653010967556, "learning_rate": 3.0654551957792465e-06, "loss": 0.3602, "step": 5359 }, { "epoch": 1.2758374486820967, "grad_norm": 0.3755693780387078, "learning_rate": 3.0636778706434104e-06, "loss": 0.3067, "step": 5360 }, { "epoch": 1.2760754447551617, "grad_norm": 0.4104166990367289, "learning_rate": 3.061900833292945e-06, "loss": 0.288, "step": 5361 }, { "epoch": 1.2763134408282264, "grad_norm": 0.3860546262996206, "learning_rate": 3.060124083991961e-06, "loss": 0.3287, "step": 5362 }, { "epoch": 1.2765514369012911, "grad_norm": 0.41369962174697755, "learning_rate": 3.0583476230045283e-06, "loss": 0.3812, "step": 5363 }, { "epoch": 1.2767894329743559, "grad_norm": 0.36361020540067207, "learning_rate": 3.0565714505946744e-06, "loss": 0.2886, "step": 5364 }, { "epoch": 1.2770274290474206, "grad_norm": 0.39741880740743823, "learning_rate": 3.0547955670263805e-06, "loss": 0.301, "step": 5365 }, { "epoch": 1.2772654251204856, "grad_norm": 0.450792782481189, "learning_rate": 3.0530199725635868e-06, "loss": 0.3703, "step": 5366 }, { "epoch": 1.2775034211935503, "grad_norm": 0.3897194119813881, "learning_rate": 3.0512446674701923e-06, "loss": 0.3263, "step": 5367 }, { "epoch": 1.277741417266615, "grad_norm": 0.38367874389511963, "learning_rate": 3.04946965201005e-06, "loss": 0.2804, "step": 5368 }, { "epoch": 1.27797941333968, "grad_norm": 0.39417790396645735, "learning_rate": 3.047694926446971e-06, "loss": 0.3121, "step": 5369 }, { "epoch": 1.2782174094127448, "grad_norm": 0.3857971707373409, "learning_rate": 3.0459204910447236e-06, "loss": 0.39, "step": 5370 }, { "epoch": 1.2784554054858095, "grad_norm": 0.3471166269383708, "learning_rate": 3.044146346067033e-06, "loss": 0.2831, "step": 5371 }, { "epoch": 1.2786934015588742, "grad_norm": 0.42964523510831726, "learning_rate": 3.0423724917775806e-06, "loss": 0.3029, "step": 5372 }, { "epoch": 1.278931397631939, "grad_norm": 0.39624435945594666, "learning_rate": 3.040598928440005e-06, "loss": 0.3685, "step": 5373 }, { "epoch": 1.279169393705004, "grad_norm": 0.400417141293227, "learning_rate": 3.0388256563179024e-06, "loss": 0.3483, "step": 5374 }, { "epoch": 1.2794073897780687, "grad_norm": 0.41684013264207087, "learning_rate": 3.037052675674823e-06, "loss": 0.2727, "step": 5375 }, { "epoch": 1.2796453858511334, "grad_norm": 0.3675883946338833, "learning_rate": 3.0352799867742788e-06, "loss": 0.3194, "step": 5376 }, { "epoch": 1.2798833819241984, "grad_norm": 0.35438374829565183, "learning_rate": 3.0335075898797315e-06, "loss": 0.3911, "step": 5377 }, { "epoch": 1.280121377997263, "grad_norm": 0.40473636844510946, "learning_rate": 3.0317354852546067e-06, "loss": 0.3176, "step": 5378 }, { "epoch": 1.2803593740703278, "grad_norm": 0.3699615041682992, "learning_rate": 3.02996367316228e-06, "loss": 0.2816, "step": 5379 }, { "epoch": 1.2805973701433926, "grad_norm": 0.3870198992613291, "learning_rate": 3.0281921538660885e-06, "loss": 0.3498, "step": 5380 }, { "epoch": 1.2808353662164573, "grad_norm": 0.39893170072412276, "learning_rate": 3.026420927629323e-06, "loss": 0.3506, "step": 5381 }, { "epoch": 1.2810733622895223, "grad_norm": 0.39285551881065733, "learning_rate": 3.024649994715233e-06, "loss": 0.2825, "step": 5382 }, { "epoch": 1.281311358362587, "grad_norm": 0.4255415402851682, "learning_rate": 3.0228793553870204e-06, "loss": 0.3282, "step": 5383 }, { "epoch": 1.2815493544356518, "grad_norm": 0.37137193613780684, "learning_rate": 3.0211090099078475e-06, "loss": 0.3586, "step": 5384 }, { "epoch": 1.2817873505087167, "grad_norm": 0.3897265976197935, "learning_rate": 3.019338958540831e-06, "loss": 0.314, "step": 5385 }, { "epoch": 1.2820253465817815, "grad_norm": 0.43026997209773205, "learning_rate": 3.0175692015490443e-06, "loss": 0.2804, "step": 5386 }, { "epoch": 1.2822633426548462, "grad_norm": 0.39280510289196574, "learning_rate": 3.0157997391955172e-06, "loss": 0.3495, "step": 5387 }, { "epoch": 1.282501338727911, "grad_norm": 0.3808724957489551, "learning_rate": 3.014030571743236e-06, "loss": 0.384, "step": 5388 }, { "epoch": 1.2827393348009757, "grad_norm": 0.4445927409783158, "learning_rate": 3.0122616994551413e-06, "loss": 0.2838, "step": 5389 }, { "epoch": 1.2829773308740406, "grad_norm": 0.3877029737610317, "learning_rate": 3.0104931225941335e-06, "loss": 0.3451, "step": 5390 }, { "epoch": 1.2832153269471054, "grad_norm": 0.40529519211735904, "learning_rate": 3.008724841423064e-06, "loss": 0.3857, "step": 5391 }, { "epoch": 1.2834533230201701, "grad_norm": 0.36044362418622544, "learning_rate": 3.006956856204747e-06, "loss": 0.2969, "step": 5392 }, { "epoch": 1.283691319093235, "grad_norm": 0.4635471433725574, "learning_rate": 3.005189167201945e-06, "loss": 0.3053, "step": 5393 }, { "epoch": 1.2839293151662998, "grad_norm": 0.3878593945568825, "learning_rate": 3.003421774677383e-06, "loss": 0.3243, "step": 5394 }, { "epoch": 1.2841673112393646, "grad_norm": 0.4323624105558046, "learning_rate": 3.0016546788937385e-06, "loss": 0.3795, "step": 5395 }, { "epoch": 1.2844053073124293, "grad_norm": 0.37258405194326444, "learning_rate": 2.9998878801136442e-06, "loss": 0.2922, "step": 5396 }, { "epoch": 1.284643303385494, "grad_norm": 0.3984352432024578, "learning_rate": 2.9981213785996925e-06, "loss": 0.3179, "step": 5397 }, { "epoch": 1.284881299458559, "grad_norm": 0.3451845968169443, "learning_rate": 2.996355174614428e-06, "loss": 0.3662, "step": 5398 }, { "epoch": 1.2851192955316237, "grad_norm": 0.3786688602776222, "learning_rate": 2.994589268420352e-06, "loss": 0.3069, "step": 5399 }, { "epoch": 1.2853572916046885, "grad_norm": 0.41067641786263726, "learning_rate": 2.992823660279922e-06, "loss": 0.2652, "step": 5400 }, { "epoch": 1.2855952876777534, "grad_norm": 0.3922515926989098, "learning_rate": 2.9910583504555516e-06, "loss": 0.3191, "step": 5401 }, { "epoch": 1.2858332837508182, "grad_norm": 0.38153653409480226, "learning_rate": 2.989293339209608e-06, "loss": 0.3894, "step": 5402 }, { "epoch": 1.286071279823883, "grad_norm": 0.37037061821185474, "learning_rate": 2.987528626804418e-06, "loss": 0.2799, "step": 5403 }, { "epoch": 1.2863092758969477, "grad_norm": 0.39954728304549386, "learning_rate": 2.98576421350226e-06, "loss": 0.2984, "step": 5404 }, { "epoch": 1.2865472719700124, "grad_norm": 0.42121203353357217, "learning_rate": 2.9840000995653684e-06, "loss": 0.3576, "step": 5405 }, { "epoch": 1.2867852680430774, "grad_norm": 0.378600084659916, "learning_rate": 2.9822362852559363e-06, "loss": 0.3538, "step": 5406 }, { "epoch": 1.287023264116142, "grad_norm": 0.37558983767345905, "learning_rate": 2.9804727708361094e-06, "loss": 0.288, "step": 5407 }, { "epoch": 1.2872612601892068, "grad_norm": 0.4097981060709528, "learning_rate": 2.9787095565679893e-06, "loss": 0.3418, "step": 5408 }, { "epoch": 1.2874992562622718, "grad_norm": 0.3880613286304227, "learning_rate": 2.976946642713634e-06, "loss": 0.3919, "step": 5409 }, { "epoch": 1.2877372523353365, "grad_norm": 0.3576965119071218, "learning_rate": 2.9751840295350554e-06, "loss": 0.3108, "step": 5410 }, { "epoch": 1.2879752484084013, "grad_norm": 0.386930113400703, "learning_rate": 2.9734217172942216e-06, "loss": 0.2923, "step": 5411 }, { "epoch": 1.288213244481466, "grad_norm": 0.37377948379351883, "learning_rate": 2.971659706253055e-06, "loss": 0.3518, "step": 5412 }, { "epoch": 1.2884512405545308, "grad_norm": 0.3907730616845053, "learning_rate": 2.9698979966734353e-06, "loss": 0.3765, "step": 5413 }, { "epoch": 1.2886892366275957, "grad_norm": 0.35233568134416837, "learning_rate": 2.968136588817196e-06, "loss": 0.2804, "step": 5414 }, { "epoch": 1.2889272327006605, "grad_norm": 0.3695879561285849, "learning_rate": 2.966375482946125e-06, "loss": 0.3208, "step": 5415 }, { "epoch": 1.2891652287737252, "grad_norm": 0.383905634810903, "learning_rate": 2.964614679321966e-06, "loss": 0.3657, "step": 5416 }, { "epoch": 1.2894032248467902, "grad_norm": 0.36362432939576694, "learning_rate": 2.962854178206419e-06, "loss": 0.3283, "step": 5417 }, { "epoch": 1.289641220919855, "grad_norm": 0.37983820596988416, "learning_rate": 2.961093979861137e-06, "loss": 0.2758, "step": 5418 }, { "epoch": 1.2898792169929196, "grad_norm": 0.3719877524530933, "learning_rate": 2.9593340845477315e-06, "loss": 0.3312, "step": 5419 }, { "epoch": 1.2901172130659844, "grad_norm": 0.39592619284538855, "learning_rate": 2.9575744925277626e-06, "loss": 0.3998, "step": 5420 }, { "epoch": 1.290355209139049, "grad_norm": 0.36564015551941653, "learning_rate": 2.955815204062753e-06, "loss": 0.2775, "step": 5421 }, { "epoch": 1.290593205212114, "grad_norm": 0.3883816093497894, "learning_rate": 2.954056219414174e-06, "loss": 0.3132, "step": 5422 }, { "epoch": 1.2908312012851788, "grad_norm": 0.3830741599542019, "learning_rate": 2.952297538843456e-06, "loss": 0.3843, "step": 5423 }, { "epoch": 1.2910691973582435, "grad_norm": 0.3735082644605664, "learning_rate": 2.9505391626119804e-06, "loss": 0.3218, "step": 5424 }, { "epoch": 1.2913071934313085, "grad_norm": 0.39360349279204443, "learning_rate": 2.9487810909810876e-06, "loss": 0.2838, "step": 5425 }, { "epoch": 1.2915451895043732, "grad_norm": 0.39199734550206006, "learning_rate": 2.947023324212069e-06, "loss": 0.3363, "step": 5426 }, { "epoch": 1.291783185577438, "grad_norm": 0.3946282023306039, "learning_rate": 2.945265862566172e-06, "loss": 0.3958, "step": 5427 }, { "epoch": 1.2920211816505027, "grad_norm": 0.3833972335654247, "learning_rate": 2.9435087063045997e-06, "loss": 0.3168, "step": 5428 }, { "epoch": 1.2922591777235675, "grad_norm": 0.36324852808260755, "learning_rate": 2.9417518556885085e-06, "loss": 0.3138, "step": 5429 }, { "epoch": 1.2924971737966324, "grad_norm": 0.4118882053851258, "learning_rate": 2.9399953109790104e-06, "loss": 0.364, "step": 5430 }, { "epoch": 1.2927351698696972, "grad_norm": 0.4094387139947238, "learning_rate": 2.938239072437171e-06, "loss": 0.358, "step": 5431 }, { "epoch": 1.292973165942762, "grad_norm": 0.3746366818760013, "learning_rate": 2.936483140324011e-06, "loss": 0.3152, "step": 5432 }, { "epoch": 1.2932111620158269, "grad_norm": 0.3905453061521183, "learning_rate": 2.9347275149005046e-06, "loss": 0.3501, "step": 5433 }, { "epoch": 1.2934491580888916, "grad_norm": 0.4541439412535773, "learning_rate": 2.9329721964275827e-06, "loss": 0.401, "step": 5434 }, { "epoch": 1.2936871541619563, "grad_norm": 0.398933342343559, "learning_rate": 2.9312171851661285e-06, "loss": 0.3134, "step": 5435 }, { "epoch": 1.293925150235021, "grad_norm": 0.42307597168640104, "learning_rate": 2.9294624813769795e-06, "loss": 0.2636, "step": 5436 }, { "epoch": 1.2941631463080858, "grad_norm": 0.4037706037517579, "learning_rate": 2.9277080853209284e-06, "loss": 0.3405, "step": 5437 }, { "epoch": 1.2944011423811508, "grad_norm": 0.3812267379791281, "learning_rate": 2.9259539972587227e-06, "loss": 0.382, "step": 5438 }, { "epoch": 1.2946391384542155, "grad_norm": 0.364830365933242, "learning_rate": 2.9242002174510613e-06, "loss": 0.2903, "step": 5439 }, { "epoch": 1.2948771345272803, "grad_norm": 0.3897401469920776, "learning_rate": 2.922446746158601e-06, "loss": 0.3097, "step": 5440 }, { "epoch": 1.2951151306003452, "grad_norm": 0.44911236787123654, "learning_rate": 2.92069358364195e-06, "loss": 0.4018, "step": 5441 }, { "epoch": 1.29535312667341, "grad_norm": 0.37167943879371046, "learning_rate": 2.918940730161672e-06, "loss": 0.3206, "step": 5442 }, { "epoch": 1.2955911227464747, "grad_norm": 0.35873134087334885, "learning_rate": 2.9171881859782854e-06, "loss": 0.2722, "step": 5443 }, { "epoch": 1.2958291188195394, "grad_norm": 0.37574331573192493, "learning_rate": 2.91543595135226e-06, "loss": 0.3285, "step": 5444 }, { "epoch": 1.2960671148926042, "grad_norm": 0.37774497295843434, "learning_rate": 2.9136840265440213e-06, "loss": 0.3934, "step": 5445 }, { "epoch": 1.2963051109656691, "grad_norm": 0.37035405909980235, "learning_rate": 2.9119324118139482e-06, "loss": 0.2877, "step": 5446 }, { "epoch": 1.2965431070387339, "grad_norm": 0.3719053824299425, "learning_rate": 2.9101811074223762e-06, "loss": 0.3049, "step": 5447 }, { "epoch": 1.2967811031117986, "grad_norm": 0.38418916465237035, "learning_rate": 2.9084301136295922e-06, "loss": 0.3589, "step": 5448 }, { "epoch": 1.2970190991848636, "grad_norm": 0.38154835318426467, "learning_rate": 2.9066794306958356e-06, "loss": 0.3518, "step": 5449 }, { "epoch": 1.2972570952579283, "grad_norm": 0.37098490527058375, "learning_rate": 2.904929058881302e-06, "loss": 0.2862, "step": 5450 }, { "epoch": 1.297495091330993, "grad_norm": 0.35922712949248786, "learning_rate": 2.90317899844614e-06, "loss": 0.3084, "step": 5451 }, { "epoch": 1.2977330874040578, "grad_norm": 0.3752626421355902, "learning_rate": 2.9014292496504493e-06, "loss": 0.3723, "step": 5452 }, { "epoch": 1.2979710834771225, "grad_norm": 0.3660609605786775, "learning_rate": 2.89967981275429e-06, "loss": 0.3029, "step": 5453 }, { "epoch": 1.2982090795501875, "grad_norm": 0.383302664535824, "learning_rate": 2.8979306880176706e-06, "loss": 0.3021, "step": 5454 }, { "epoch": 1.2984470756232522, "grad_norm": 0.3994458439573466, "learning_rate": 2.8961818757005533e-06, "loss": 0.3402, "step": 5455 }, { "epoch": 1.298685071696317, "grad_norm": 0.3971982374786896, "learning_rate": 2.894433376062855e-06, "loss": 0.3528, "step": 5456 }, { "epoch": 1.298923067769382, "grad_norm": 0.3882804894172824, "learning_rate": 2.892685189364447e-06, "loss": 0.2716, "step": 5457 }, { "epoch": 1.2991610638424467, "grad_norm": 0.3899579933994844, "learning_rate": 2.8909373158651523e-06, "loss": 0.3196, "step": 5458 }, { "epoch": 1.2993990599155114, "grad_norm": 0.3855353642705331, "learning_rate": 2.889189755824747e-06, "loss": 0.3789, "step": 5459 }, { "epoch": 1.2996370559885762, "grad_norm": 0.3622462412263447, "learning_rate": 2.887442509502962e-06, "loss": 0.2928, "step": 5460 }, { "epoch": 1.299875052061641, "grad_norm": 0.38805537438834, "learning_rate": 2.885695577159484e-06, "loss": 0.2817, "step": 5461 }, { "epoch": 1.3001130481347059, "grad_norm": 0.4137674453573019, "learning_rate": 2.8839489590539482e-06, "loss": 0.3426, "step": 5462 }, { "epoch": 1.3003510442077706, "grad_norm": 0.37778518714576936, "learning_rate": 2.882202655445946e-06, "loss": 0.3454, "step": 5463 }, { "epoch": 1.3005890402808353, "grad_norm": 0.34689911133155854, "learning_rate": 2.8804566665950207e-06, "loss": 0.2948, "step": 5464 }, { "epoch": 1.3008270363539003, "grad_norm": 0.3881888255918, "learning_rate": 2.8787109927606704e-06, "loss": 0.3178, "step": 5465 }, { "epoch": 1.301065032426965, "grad_norm": 0.3689773814104203, "learning_rate": 2.8769656342023445e-06, "loss": 0.3571, "step": 5466 }, { "epoch": 1.3013030285000298, "grad_norm": 0.37850711600818543, "learning_rate": 2.8752205911794463e-06, "loss": 0.3288, "step": 5467 }, { "epoch": 1.3015410245730945, "grad_norm": 0.3848381260854274, "learning_rate": 2.8734758639513327e-06, "loss": 0.2831, "step": 5468 }, { "epoch": 1.3017790206461592, "grad_norm": 0.3517844887890128, "learning_rate": 2.8717314527773134e-06, "loss": 0.3123, "step": 5469 }, { "epoch": 1.3020170167192242, "grad_norm": 0.380584564601741, "learning_rate": 2.8699873579166517e-06, "loss": 0.3761, "step": 5470 }, { "epoch": 1.302255012792289, "grad_norm": 0.3577811945107307, "learning_rate": 2.8682435796285617e-06, "loss": 0.285, "step": 5471 }, { "epoch": 1.3024930088653537, "grad_norm": 0.3819440878979895, "learning_rate": 2.8665001181722134e-06, "loss": 0.2917, "step": 5472 }, { "epoch": 1.3027310049384186, "grad_norm": 0.38014911424877107, "learning_rate": 2.8647569738067262e-06, "loss": 0.3719, "step": 5473 }, { "epoch": 1.3029690010114834, "grad_norm": 0.38123121571081403, "learning_rate": 2.8630141467911777e-06, "loss": 0.3411, "step": 5474 }, { "epoch": 1.3032069970845481, "grad_norm": 0.3930220532783937, "learning_rate": 2.8612716373845927e-06, "loss": 0.2891, "step": 5475 }, { "epoch": 1.3034449931576129, "grad_norm": 0.4003883784524193, "learning_rate": 2.859529445845953e-06, "loss": 0.3191, "step": 5476 }, { "epoch": 1.3036829892306776, "grad_norm": 0.39728799061286724, "learning_rate": 2.8577875724341897e-06, "loss": 0.3656, "step": 5477 }, { "epoch": 1.3039209853037426, "grad_norm": 0.36276882877846156, "learning_rate": 2.8560460174081896e-06, "loss": 0.3009, "step": 5478 }, { "epoch": 1.3041589813768073, "grad_norm": 0.34834226783278643, "learning_rate": 2.8543047810267876e-06, "loss": 0.2964, "step": 5479 }, { "epoch": 1.304396977449872, "grad_norm": 0.37989807168764456, "learning_rate": 2.852563863548779e-06, "loss": 0.3486, "step": 5480 }, { "epoch": 1.304634973522937, "grad_norm": 0.36552520765488317, "learning_rate": 2.850823265232906e-06, "loss": 0.3672, "step": 5481 }, { "epoch": 1.3048729695960017, "grad_norm": 0.36751382218934403, "learning_rate": 2.849082986337863e-06, "loss": 0.251, "step": 5482 }, { "epoch": 1.3051109656690665, "grad_norm": 0.40093768891686205, "learning_rate": 2.8473430271222994e-06, "loss": 0.3348, "step": 5483 }, { "epoch": 1.3053489617421312, "grad_norm": 0.3780458249964564, "learning_rate": 2.845603387844817e-06, "loss": 0.3953, "step": 5484 }, { "epoch": 1.305586957815196, "grad_norm": 0.3807478074730611, "learning_rate": 2.8438640687639675e-06, "loss": 0.3076, "step": 5485 }, { "epoch": 1.305824953888261, "grad_norm": 0.3703942594376492, "learning_rate": 2.842125070138258e-06, "loss": 0.2822, "step": 5486 }, { "epoch": 1.3060629499613257, "grad_norm": 0.3600318688567636, "learning_rate": 2.8403863922261444e-06, "loss": 0.3293, "step": 5487 }, { "epoch": 1.3063009460343904, "grad_norm": 0.3609650481015529, "learning_rate": 2.8386480352860414e-06, "loss": 0.3507, "step": 5488 }, { "epoch": 1.3065389421074554, "grad_norm": 0.36174141337750676, "learning_rate": 2.8369099995763088e-06, "loss": 0.2888, "step": 5489 }, { "epoch": 1.30677693818052, "grad_norm": 0.3641547780000098, "learning_rate": 2.835172285355263e-06, "loss": 0.2959, "step": 5490 }, { "epoch": 1.3070149342535848, "grad_norm": 0.35270388989031864, "learning_rate": 2.833434892881171e-06, "loss": 0.3901, "step": 5491 }, { "epoch": 1.3072529303266496, "grad_norm": 0.41231868386069426, "learning_rate": 2.831697822412252e-06, "loss": 0.3232, "step": 5492 }, { "epoch": 1.3074909263997143, "grad_norm": 0.4084581682764854, "learning_rate": 2.8299610742066778e-06, "loss": 0.2764, "step": 5493 }, { "epoch": 1.3077289224727793, "grad_norm": 0.391038761585091, "learning_rate": 2.8282246485225722e-06, "loss": 0.3169, "step": 5494 }, { "epoch": 1.307966918545844, "grad_norm": 0.392419286404586, "learning_rate": 2.826488545618011e-06, "loss": 0.41, "step": 5495 }, { "epoch": 1.3082049146189088, "grad_norm": 0.36111528650542957, "learning_rate": 2.824752765751022e-06, "loss": 0.2922, "step": 5496 }, { "epoch": 1.3084429106919737, "grad_norm": 0.40433967227880035, "learning_rate": 2.8230173091795853e-06, "loss": 0.312, "step": 5497 }, { "epoch": 1.3086809067650385, "grad_norm": 0.4150889724294196, "learning_rate": 2.8212821761616314e-06, "loss": 0.3416, "step": 5498 }, { "epoch": 1.3089189028381032, "grad_norm": 0.3784389771780397, "learning_rate": 2.819547366955046e-06, "loss": 0.3283, "step": 5499 }, { "epoch": 1.309156898911168, "grad_norm": 0.39020777536186346, "learning_rate": 2.817812881817663e-06, "loss": 0.3039, "step": 5500 }, { "epoch": 1.3093948949842327, "grad_norm": 0.38412700599020266, "learning_rate": 2.8160787210072695e-06, "loss": 0.335, "step": 5501 }, { "epoch": 1.3096328910572976, "grad_norm": 0.37696789257027463, "learning_rate": 2.814344884781607e-06, "loss": 0.3842, "step": 5502 }, { "epoch": 1.3098708871303624, "grad_norm": 0.3592980808212725, "learning_rate": 2.812611373398365e-06, "loss": 0.2864, "step": 5503 }, { "epoch": 1.310108883203427, "grad_norm": 0.3932282988991156, "learning_rate": 2.8108781871151866e-06, "loss": 0.2911, "step": 5504 }, { "epoch": 1.310346879276492, "grad_norm": 0.366771028303434, "learning_rate": 2.8091453261896657e-06, "loss": 0.3637, "step": 5505 }, { "epoch": 1.3105848753495568, "grad_norm": 0.3612811617097267, "learning_rate": 2.8074127908793464e-06, "loss": 0.3796, "step": 5506 }, { "epoch": 1.3108228714226215, "grad_norm": 0.3522436714920477, "learning_rate": 2.8056805814417305e-06, "loss": 0.2732, "step": 5507 }, { "epoch": 1.3110608674956863, "grad_norm": 0.40918249461660394, "learning_rate": 2.8039486981342647e-06, "loss": 0.3357, "step": 5508 }, { "epoch": 1.311298863568751, "grad_norm": 0.4104488620009276, "learning_rate": 2.8022171412143504e-06, "loss": 0.3646, "step": 5509 }, { "epoch": 1.311536859641816, "grad_norm": 0.3643401976041939, "learning_rate": 2.800485910939339e-06, "loss": 0.3133, "step": 5510 }, { "epoch": 1.3117748557148807, "grad_norm": 0.3894095595689637, "learning_rate": 2.7987550075665356e-06, "loss": 0.2957, "step": 5511 }, { "epoch": 1.3120128517879455, "grad_norm": 0.38028414239770225, "learning_rate": 2.7970244313531935e-06, "loss": 0.3483, "step": 5512 }, { "epoch": 1.3122508478610104, "grad_norm": 0.383503362557972, "learning_rate": 2.7952941825565193e-06, "loss": 0.3826, "step": 5513 }, { "epoch": 1.3124888439340752, "grad_norm": 0.3753460629109302, "learning_rate": 2.793564261433672e-06, "loss": 0.2777, "step": 5514 }, { "epoch": 1.31272684000714, "grad_norm": 0.38982091819158365, "learning_rate": 2.7918346682417585e-06, "loss": 0.3217, "step": 5515 }, { "epoch": 1.3129648360802046, "grad_norm": 0.39383363481041805, "learning_rate": 2.7901054032378426e-06, "loss": 0.3899, "step": 5516 }, { "epoch": 1.3132028321532694, "grad_norm": 0.43566741874589754, "learning_rate": 2.7883764666789336e-06, "loss": 0.2971, "step": 5517 }, { "epoch": 1.3134408282263343, "grad_norm": 0.38349493003039725, "learning_rate": 2.7866478588219945e-06, "loss": 0.2766, "step": 5518 }, { "epoch": 1.313678824299399, "grad_norm": 0.3767513973073563, "learning_rate": 2.784919579923939e-06, "loss": 0.3182, "step": 5519 }, { "epoch": 1.3139168203724638, "grad_norm": 0.37130947209320847, "learning_rate": 2.783191630241633e-06, "loss": 0.3982, "step": 5520 }, { "epoch": 1.3141548164455288, "grad_norm": 0.36376573221358194, "learning_rate": 2.7814640100318917e-06, "loss": 0.2772, "step": 5521 }, { "epoch": 1.3143928125185935, "grad_norm": 0.504440580572112, "learning_rate": 2.7797367195514825e-06, "loss": 0.3145, "step": 5522 }, { "epoch": 1.3146308085916583, "grad_norm": 0.3834097822594875, "learning_rate": 2.778009759057123e-06, "loss": 0.3448, "step": 5523 }, { "epoch": 1.314868804664723, "grad_norm": 0.3670665934349199, "learning_rate": 2.7762831288054836e-06, "loss": 0.3409, "step": 5524 }, { "epoch": 1.3151068007377877, "grad_norm": 0.37382482389619465, "learning_rate": 2.7745568290531827e-06, "loss": 0.2827, "step": 5525 }, { "epoch": 1.3153447968108527, "grad_norm": 0.3685314429800074, "learning_rate": 2.772830860056792e-06, "loss": 0.3201, "step": 5526 }, { "epoch": 1.3155827928839174, "grad_norm": 0.36043420452079505, "learning_rate": 2.771105222072833e-06, "loss": 0.426, "step": 5527 }, { "epoch": 1.3158207889569822, "grad_norm": 0.36717830372844196, "learning_rate": 2.769379915357776e-06, "loss": 0.2985, "step": 5528 }, { "epoch": 1.3160587850300471, "grad_norm": 0.37554243694911554, "learning_rate": 2.7676549401680486e-06, "loss": 0.2885, "step": 5529 }, { "epoch": 1.3162967811031119, "grad_norm": 0.3741544659657053, "learning_rate": 2.7659302967600226e-06, "loss": 0.3648, "step": 5530 }, { "epoch": 1.3165347771761766, "grad_norm": 0.38217417099471307, "learning_rate": 2.764205985390023e-06, "loss": 0.3696, "step": 5531 }, { "epoch": 1.3167727732492414, "grad_norm": 0.3587235824649817, "learning_rate": 2.762482006314324e-06, "loss": 0.2703, "step": 5532 }, { "epoch": 1.317010769322306, "grad_norm": 0.37109675762063177, "learning_rate": 2.760758359789151e-06, "loss": 0.3151, "step": 5533 }, { "epoch": 1.317248765395371, "grad_norm": 0.4298879106634171, "learning_rate": 2.7590350460706845e-06, "loss": 0.3901, "step": 5534 }, { "epoch": 1.3174867614684358, "grad_norm": 0.3555539814561159, "learning_rate": 2.757312065415048e-06, "loss": 0.2981, "step": 5535 }, { "epoch": 1.3177247575415005, "grad_norm": 0.3717826964329484, "learning_rate": 2.7555894180783203e-06, "loss": 0.3002, "step": 5536 }, { "epoch": 1.3179627536145655, "grad_norm": 0.41601130773589784, "learning_rate": 2.7538671043165295e-06, "loss": 0.3482, "step": 5537 }, { "epoch": 1.3182007496876302, "grad_norm": 0.3901057217046057, "learning_rate": 2.7521451243856533e-06, "loss": 0.3883, "step": 5538 }, { "epoch": 1.318438745760695, "grad_norm": 0.4228120609797883, "learning_rate": 2.7504234785416206e-06, "loss": 0.3078, "step": 5539 }, { "epoch": 1.3186767418337597, "grad_norm": 0.3842260920241478, "learning_rate": 2.7487021670403115e-06, "loss": 0.315, "step": 5540 }, { "epoch": 1.3189147379068245, "grad_norm": 0.41311984790557893, "learning_rate": 2.746981190137554e-06, "loss": 0.3725, "step": 5541 }, { "epoch": 1.3191527339798894, "grad_norm": 0.3577865374099851, "learning_rate": 2.7452605480891276e-06, "loss": 0.3359, "step": 5542 }, { "epoch": 1.3193907300529542, "grad_norm": 0.3845593269549925, "learning_rate": 2.743540241150765e-06, "loss": 0.2928, "step": 5543 }, { "epoch": 1.319628726126019, "grad_norm": 0.3988246990305431, "learning_rate": 2.7418202695781443e-06, "loss": 0.3563, "step": 5544 }, { "epoch": 1.3198667221990839, "grad_norm": 0.3926174352866639, "learning_rate": 2.7401006336268966e-06, "loss": 0.3319, "step": 5545 }, { "epoch": 1.3201047182721486, "grad_norm": 0.3877729503505814, "learning_rate": 2.738381333552601e-06, "loss": 0.2998, "step": 5546 }, { "epoch": 1.3203427143452133, "grad_norm": 0.40291838283798237, "learning_rate": 2.73666236961079e-06, "loss": 0.3096, "step": 5547 }, { "epoch": 1.320580710418278, "grad_norm": 0.36068628191130936, "learning_rate": 2.734943742056943e-06, "loss": 0.3678, "step": 5548 }, { "epoch": 1.3208187064913428, "grad_norm": 0.3709054992297036, "learning_rate": 2.7332254511464906e-06, "loss": 0.3333, "step": 5549 }, { "epoch": 1.3210567025644078, "grad_norm": 0.3673136445373127, "learning_rate": 2.7315074971348133e-06, "loss": 0.303, "step": 5550 }, { "epoch": 1.3212946986374725, "grad_norm": 0.3626370946230582, "learning_rate": 2.729789880277242e-06, "loss": 0.3252, "step": 5551 }, { "epoch": 1.3215326947105372, "grad_norm": 0.3761315640459379, "learning_rate": 2.728072600829057e-06, "loss": 0.4076, "step": 5552 }, { "epoch": 1.3217706907836022, "grad_norm": 0.38806642072912284, "learning_rate": 2.7263556590454874e-06, "loss": 0.3095, "step": 5553 }, { "epoch": 1.322008686856667, "grad_norm": 0.3827363695116688, "learning_rate": 2.724639055181715e-06, "loss": 0.3012, "step": 5554 }, { "epoch": 1.3222466829297317, "grad_norm": 0.406266926754827, "learning_rate": 2.7229227894928666e-06, "loss": 0.3563, "step": 5555 }, { "epoch": 1.3224846790027964, "grad_norm": 0.3601509616480053, "learning_rate": 2.721206862234026e-06, "loss": 0.3359, "step": 5556 }, { "epoch": 1.3227226750758612, "grad_norm": 0.39622194453014564, "learning_rate": 2.71949127366022e-06, "loss": 0.3009, "step": 5557 }, { "epoch": 1.322960671148926, "grad_norm": 0.40428113767002444, "learning_rate": 2.7177760240264273e-06, "loss": 0.3141, "step": 5558 }, { "epoch": 1.3231986672219909, "grad_norm": 0.39692065016659434, "learning_rate": 2.7160611135875774e-06, "loss": 0.3779, "step": 5559 }, { "epoch": 1.3234366632950556, "grad_norm": 0.3546448081276393, "learning_rate": 2.714346542598546e-06, "loss": 0.3072, "step": 5560 }, { "epoch": 1.3236746593681203, "grad_norm": 0.40729641787879445, "learning_rate": 2.712632311314165e-06, "loss": 0.2691, "step": 5561 }, { "epoch": 1.3239126554411853, "grad_norm": 0.374238994477962, "learning_rate": 2.7109184199892093e-06, "loss": 0.3453, "step": 5562 }, { "epoch": 1.32415065151425, "grad_norm": 0.3684026547826237, "learning_rate": 2.7092048688784046e-06, "loss": 0.3535, "step": 5563 }, { "epoch": 1.3243886475873148, "grad_norm": 0.3957287753476546, "learning_rate": 2.7074916582364284e-06, "loss": 0.2692, "step": 5564 }, { "epoch": 1.3246266436603795, "grad_norm": 0.3807784927555232, "learning_rate": 2.705778788317906e-06, "loss": 0.3471, "step": 5565 }, { "epoch": 1.3248646397334443, "grad_norm": 0.36315758859429115, "learning_rate": 2.7040662593774114e-06, "loss": 0.384, "step": 5566 }, { "epoch": 1.3251026358065092, "grad_norm": 0.3570108097887906, "learning_rate": 2.70235407166947e-06, "loss": 0.3291, "step": 5567 }, { "epoch": 1.325340631879574, "grad_norm": 0.38719520188943835, "learning_rate": 2.700642225448554e-06, "loss": 0.3024, "step": 5568 }, { "epoch": 1.3255786279526387, "grad_norm": 0.3896705832913739, "learning_rate": 2.698930720969087e-06, "loss": 0.3169, "step": 5569 }, { "epoch": 1.3258166240257037, "grad_norm": 0.3888507607782142, "learning_rate": 2.697219558485439e-06, "loss": 0.3841, "step": 5570 }, { "epoch": 1.3260546200987684, "grad_norm": 0.3670049338221428, "learning_rate": 2.695508738251934e-06, "loss": 0.2712, "step": 5571 }, { "epoch": 1.3262926161718331, "grad_norm": 0.39842737745653384, "learning_rate": 2.693798260522841e-06, "loss": 0.2985, "step": 5572 }, { "epoch": 1.3265306122448979, "grad_norm": 0.37727527288531104, "learning_rate": 2.692088125552379e-06, "loss": 0.3754, "step": 5573 }, { "epoch": 1.3267686083179626, "grad_norm": 0.43871454005089827, "learning_rate": 2.690378333594717e-06, "loss": 0.3219, "step": 5574 }, { "epoch": 1.3270066043910276, "grad_norm": 0.39318467207824387, "learning_rate": 2.6886688849039717e-06, "loss": 0.2756, "step": 5575 }, { "epoch": 1.3272446004640923, "grad_norm": 0.3994623481607067, "learning_rate": 2.6869597797342096e-06, "loss": 0.3249, "step": 5576 }, { "epoch": 1.327482596537157, "grad_norm": 0.44155749097845365, "learning_rate": 2.685251018339446e-06, "loss": 0.3763, "step": 5577 }, { "epoch": 1.327720592610222, "grad_norm": 0.3665656601981612, "learning_rate": 2.6835426009736455e-06, "loss": 0.3239, "step": 5578 }, { "epoch": 1.3279585886832868, "grad_norm": 0.4022642181119508, "learning_rate": 2.681834527890721e-06, "loss": 0.2761, "step": 5579 }, { "epoch": 1.3281965847563515, "grad_norm": 0.3788048689911649, "learning_rate": 2.6801267993445335e-06, "loss": 0.3626, "step": 5580 }, { "epoch": 1.3284345808294162, "grad_norm": 0.399964703157908, "learning_rate": 2.6784194155888953e-06, "loss": 0.364, "step": 5581 }, { "epoch": 1.328672576902481, "grad_norm": 0.46248131169930756, "learning_rate": 2.6767123768775627e-06, "loss": 0.3001, "step": 5582 }, { "epoch": 1.328910572975546, "grad_norm": 0.3790430115317474, "learning_rate": 2.675005683464248e-06, "loss": 0.3368, "step": 5583 }, { "epoch": 1.3291485690486107, "grad_norm": 0.42296015749472615, "learning_rate": 2.6732993356026073e-06, "loss": 0.3964, "step": 5584 }, { "epoch": 1.3293865651216754, "grad_norm": 0.37767108037602987, "learning_rate": 2.671593333546244e-06, "loss": 0.3186, "step": 5585 }, { "epoch": 1.3296245611947404, "grad_norm": 0.3556314352660897, "learning_rate": 2.669887677548712e-06, "loss": 0.2928, "step": 5586 }, { "epoch": 1.3298625572678051, "grad_norm": 0.3646511968328883, "learning_rate": 2.6681823678635177e-06, "loss": 0.3345, "step": 5587 }, { "epoch": 1.3301005533408699, "grad_norm": 0.38411250506226, "learning_rate": 2.6664774047441087e-06, "loss": 0.3821, "step": 5588 }, { "epoch": 1.3303385494139346, "grad_norm": 0.3780727074870984, "learning_rate": 2.6647727884438866e-06, "loss": 0.287, "step": 5589 }, { "epoch": 1.3305765454869993, "grad_norm": 0.3595186487413398, "learning_rate": 2.6630685192161995e-06, "loss": 0.3187, "step": 5590 }, { "epoch": 1.3308145415600643, "grad_norm": 0.352373082985822, "learning_rate": 2.6613645973143427e-06, "loss": 0.3637, "step": 5591 }, { "epoch": 1.331052537633129, "grad_norm": 0.3611789272199898, "learning_rate": 2.659661022991562e-06, "loss": 0.3405, "step": 5592 }, { "epoch": 1.3312905337061938, "grad_norm": 0.4065306276115284, "learning_rate": 2.65795779650105e-06, "loss": 0.293, "step": 5593 }, { "epoch": 1.3315285297792587, "grad_norm": 0.3987698770035065, "learning_rate": 2.656254918095949e-06, "loss": 0.3428, "step": 5594 }, { "epoch": 1.3317665258523235, "grad_norm": 0.3943243833309216, "learning_rate": 2.654552388029349e-06, "loss": 0.3822, "step": 5595 }, { "epoch": 1.3320045219253882, "grad_norm": 0.36509611859681235, "learning_rate": 2.652850206554287e-06, "loss": 0.2925, "step": 5596 }, { "epoch": 1.332242517998453, "grad_norm": 0.38035780331790836, "learning_rate": 2.6511483739237508e-06, "loss": 0.3156, "step": 5597 }, { "epoch": 1.3324805140715177, "grad_norm": 0.37332659121072254, "learning_rate": 2.649446890390671e-06, "loss": 0.3465, "step": 5598 }, { "epoch": 1.3327185101445826, "grad_norm": 0.3665673689467114, "learning_rate": 2.647745756207937e-06, "loss": 0.3658, "step": 5599 }, { "epoch": 1.3329565062176474, "grad_norm": 0.3820284830332031, "learning_rate": 2.646044971628374e-06, "loss": 0.2949, "step": 5600 }, { "epoch": 1.3331945022907121, "grad_norm": 0.37188294924637955, "learning_rate": 2.6443445369047625e-06, "loss": 0.3224, "step": 5601 }, { "epoch": 1.333432498363777, "grad_norm": 0.3735185472412557, "learning_rate": 2.6426444522898286e-06, "loss": 0.3703, "step": 5602 }, { "epoch": 1.3336704944368418, "grad_norm": 0.35518226222360766, "learning_rate": 2.640944718036248e-06, "loss": 0.3015, "step": 5603 }, { "epoch": 1.3339084905099066, "grad_norm": 0.389450698522127, "learning_rate": 2.6392453343966422e-06, "loss": 0.2587, "step": 5604 }, { "epoch": 1.3341464865829713, "grad_norm": 0.3971444862417287, "learning_rate": 2.6375463016235826e-06, "loss": 0.3954, "step": 5605 }, { "epoch": 1.334384482656036, "grad_norm": 0.3502692387454883, "learning_rate": 2.6358476199695858e-06, "loss": 0.3411, "step": 5606 }, { "epoch": 1.334622478729101, "grad_norm": 0.36914607170334995, "learning_rate": 2.6341492896871198e-06, "loss": 0.2936, "step": 5607 }, { "epoch": 1.3348604748021657, "grad_norm": 0.4088428743135435, "learning_rate": 2.632451311028598e-06, "loss": 0.3102, "step": 5608 }, { "epoch": 1.3350984708752305, "grad_norm": 0.37074985008059036, "learning_rate": 2.630753684246378e-06, "loss": 0.3687, "step": 5609 }, { "epoch": 1.3353364669482954, "grad_norm": 0.35775847771800834, "learning_rate": 2.6290564095927762e-06, "loss": 0.3196, "step": 5610 }, { "epoch": 1.3355744630213602, "grad_norm": 0.3834131260856632, "learning_rate": 2.627359487320046e-06, "loss": 0.2913, "step": 5611 }, { "epoch": 1.335812459094425, "grad_norm": 0.37855664847349857, "learning_rate": 2.6256629176803925e-06, "loss": 0.3398, "step": 5612 }, { "epoch": 1.3360504551674897, "grad_norm": 0.3915920080407146, "learning_rate": 2.623966700925965e-06, "loss": 0.3756, "step": 5613 }, { "epoch": 1.3362884512405544, "grad_norm": 0.3936719541417686, "learning_rate": 2.622270837308869e-06, "loss": 0.2686, "step": 5614 }, { "epoch": 1.3365264473136194, "grad_norm": 0.37541566356978545, "learning_rate": 2.620575327081148e-06, "loss": 0.3409, "step": 5615 }, { "epoch": 1.336764443386684, "grad_norm": 0.39242038775201626, "learning_rate": 2.6188801704947976e-06, "loss": 0.4255, "step": 5616 }, { "epoch": 1.3370024394597488, "grad_norm": 0.3738300495004616, "learning_rate": 2.61718536780176e-06, "loss": 0.3243, "step": 5617 }, { "epoch": 1.3372404355328138, "grad_norm": 0.37526710784159806, "learning_rate": 2.6154909192539248e-06, "loss": 0.2954, "step": 5618 }, { "epoch": 1.3374784316058785, "grad_norm": 0.3831635215027392, "learning_rate": 2.613796825103129e-06, "loss": 0.3692, "step": 5619 }, { "epoch": 1.3377164276789433, "grad_norm": 0.3928912025057844, "learning_rate": 2.6121030856011562e-06, "loss": 0.4076, "step": 5620 }, { "epoch": 1.337954423752008, "grad_norm": 0.3747902265725176, "learning_rate": 2.61040970099974e-06, "loss": 0.3122, "step": 5621 }, { "epoch": 1.3381924198250728, "grad_norm": 0.3852536780569142, "learning_rate": 2.6087166715505563e-06, "loss": 0.3281, "step": 5622 }, { "epoch": 1.3384304158981377, "grad_norm": 0.4203309500020824, "learning_rate": 2.6070239975052334e-06, "loss": 0.3748, "step": 5623 }, { "epoch": 1.3386684119712025, "grad_norm": 0.37552479294518815, "learning_rate": 2.605331679115344e-06, "loss": 0.3544, "step": 5624 }, { "epoch": 1.3389064080442672, "grad_norm": 0.3796552948357275, "learning_rate": 2.6036397166324062e-06, "loss": 0.257, "step": 5625 }, { "epoch": 1.3391444041173322, "grad_norm": 0.36585880934173215, "learning_rate": 2.6019481103078912e-06, "loss": 0.3224, "step": 5626 }, { "epoch": 1.339382400190397, "grad_norm": 0.3784318209352287, "learning_rate": 2.6002568603932127e-06, "loss": 0.4231, "step": 5627 }, { "epoch": 1.3396203962634616, "grad_norm": 0.3581778615226102, "learning_rate": 2.598565967139731e-06, "loss": 0.269, "step": 5628 }, { "epoch": 1.3398583923365264, "grad_norm": 0.3613572419065405, "learning_rate": 2.5968754307987556e-06, "loss": 0.3059, "step": 5629 }, { "epoch": 1.340096388409591, "grad_norm": 0.41685880452742957, "learning_rate": 2.5951852516215415e-06, "loss": 0.3547, "step": 5630 }, { "epoch": 1.340334384482656, "grad_norm": 0.3476538727617446, "learning_rate": 2.593495429859291e-06, "loss": 0.3522, "step": 5631 }, { "epoch": 1.3405723805557208, "grad_norm": 0.38372719904096886, "learning_rate": 2.5918059657631532e-06, "loss": 0.2839, "step": 5632 }, { "epoch": 1.3408103766287855, "grad_norm": 0.4168082237276136, "learning_rate": 2.5901168595842256e-06, "loss": 0.3166, "step": 5633 }, { "epoch": 1.3410483727018505, "grad_norm": 0.3990911280449388, "learning_rate": 2.58842811157355e-06, "loss": 0.3948, "step": 5634 }, { "epoch": 1.3412863687749152, "grad_norm": 0.3619981264622829, "learning_rate": 2.5867397219821166e-06, "loss": 0.3115, "step": 5635 }, { "epoch": 1.34152436484798, "grad_norm": 0.37183438540217123, "learning_rate": 2.5850516910608596e-06, "loss": 0.299, "step": 5636 }, { "epoch": 1.3417623609210447, "grad_norm": 0.3821320851640882, "learning_rate": 2.5833640190606663e-06, "loss": 0.3677, "step": 5637 }, { "epoch": 1.3420003569941095, "grad_norm": 0.4340280479794182, "learning_rate": 2.5816767062323646e-06, "loss": 0.3653, "step": 5638 }, { "epoch": 1.3422383530671744, "grad_norm": 0.3935957665861045, "learning_rate": 2.5799897528267304e-06, "loss": 0.3125, "step": 5639 }, { "epoch": 1.3424763491402392, "grad_norm": 0.3551306900169191, "learning_rate": 2.5783031590944853e-06, "loss": 0.3321, "step": 5640 }, { "epoch": 1.342714345213304, "grad_norm": 0.39027530241074593, "learning_rate": 2.5766169252863026e-06, "loss": 0.3719, "step": 5641 }, { "epoch": 1.3429523412863689, "grad_norm": 0.3669775255066402, "learning_rate": 2.574931051652796e-06, "loss": 0.3106, "step": 5642 }, { "epoch": 1.3431903373594336, "grad_norm": 0.36467746278063334, "learning_rate": 2.573245538444529e-06, "loss": 0.2812, "step": 5643 }, { "epoch": 1.3434283334324983, "grad_norm": 0.44665190837644525, "learning_rate": 2.5715603859120095e-06, "loss": 0.3282, "step": 5644 }, { "epoch": 1.343666329505563, "grad_norm": 0.36150659846601496, "learning_rate": 2.569875594305694e-06, "loss": 0.3898, "step": 5645 }, { "epoch": 1.3439043255786278, "grad_norm": 0.36749221425362993, "learning_rate": 2.5681911638759837e-06, "loss": 0.2971, "step": 5646 }, { "epoch": 1.3441423216516928, "grad_norm": 0.37962067008941414, "learning_rate": 2.5665070948732258e-06, "loss": 0.2935, "step": 5647 }, { "epoch": 1.3443803177247575, "grad_norm": 0.4350753894556899, "learning_rate": 2.564823387547716e-06, "loss": 0.3688, "step": 5648 }, { "epoch": 1.3446183137978223, "grad_norm": 0.36978176336633634, "learning_rate": 2.5631400421496934e-06, "loss": 0.3408, "step": 5649 }, { "epoch": 1.3448563098708872, "grad_norm": 0.3672540601703578, "learning_rate": 2.5614570589293457e-06, "loss": 0.3011, "step": 5650 }, { "epoch": 1.345094305943952, "grad_norm": 0.3767220367127715, "learning_rate": 2.5597744381368063e-06, "loss": 0.3395, "step": 5651 }, { "epoch": 1.3453323020170167, "grad_norm": 0.38145663607283764, "learning_rate": 2.558092180022153e-06, "loss": 0.3896, "step": 5652 }, { "epoch": 1.3455702980900814, "grad_norm": 0.36950737323434774, "learning_rate": 2.5564102848354098e-06, "loss": 0.3138, "step": 5653 }, { "epoch": 1.3458082941631462, "grad_norm": 0.3764771683321259, "learning_rate": 2.5547287528265517e-06, "loss": 0.282, "step": 5654 }, { "epoch": 1.3460462902362111, "grad_norm": 0.4283336061272312, "learning_rate": 2.553047584245495e-06, "loss": 0.372, "step": 5655 }, { "epoch": 1.3462842863092759, "grad_norm": 0.3676206486750502, "learning_rate": 2.5513667793421002e-06, "loss": 0.3684, "step": 5656 }, { "epoch": 1.3465222823823406, "grad_norm": 0.40021565693988703, "learning_rate": 2.5496863383661797e-06, "loss": 0.2643, "step": 5657 }, { "epoch": 1.3467602784554056, "grad_norm": 0.3777923710780302, "learning_rate": 2.548006261567487e-06, "loss": 0.3046, "step": 5658 }, { "epoch": 1.3469982745284703, "grad_norm": 0.38725995517731304, "learning_rate": 2.5463265491957224e-06, "loss": 0.3831, "step": 5659 }, { "epoch": 1.347236270601535, "grad_norm": 0.3576244257854606, "learning_rate": 2.544647201500534e-06, "loss": 0.3197, "step": 5660 }, { "epoch": 1.3474742666745998, "grad_norm": 0.36464513110800595, "learning_rate": 2.542968218731514e-06, "loss": 0.2718, "step": 5661 }, { "epoch": 1.3477122627476645, "grad_norm": 0.3904440981846269, "learning_rate": 2.541289601138201e-06, "loss": 0.3488, "step": 5662 }, { "epoch": 1.3479502588207295, "grad_norm": 0.3894407299674647, "learning_rate": 2.539611348970077e-06, "loss": 0.3785, "step": 5663 }, { "epoch": 1.3481882548937942, "grad_norm": 0.36018047764775457, "learning_rate": 2.537933462476575e-06, "loss": 0.294, "step": 5664 }, { "epoch": 1.348426250966859, "grad_norm": 0.45032023637446106, "learning_rate": 2.5362559419070693e-06, "loss": 0.3058, "step": 5665 }, { "epoch": 1.348664247039924, "grad_norm": 0.3520211254562473, "learning_rate": 2.534578787510881e-06, "loss": 0.3629, "step": 5666 }, { "epoch": 1.3489022431129887, "grad_norm": 0.35778323923398264, "learning_rate": 2.532901999537274e-06, "loss": 0.3121, "step": 5667 }, { "epoch": 1.3491402391860534, "grad_norm": 0.40768614895424304, "learning_rate": 2.531225578235465e-06, "loss": 0.2942, "step": 5668 }, { "epoch": 1.3493782352591182, "grad_norm": 0.40587873605164554, "learning_rate": 2.52954952385461e-06, "loss": 0.3128, "step": 5669 }, { "epoch": 1.349616231332183, "grad_norm": 0.3665695548104563, "learning_rate": 2.527873836643811e-06, "loss": 0.3682, "step": 5670 }, { "epoch": 1.3498542274052479, "grad_norm": 0.3757527838138373, "learning_rate": 2.5261985168521174e-06, "loss": 0.2842, "step": 5671 }, { "epoch": 1.3500922234783126, "grad_norm": 0.38913613595462837, "learning_rate": 2.5245235647285238e-06, "loss": 0.3045, "step": 5672 }, { "epoch": 1.3503302195513773, "grad_norm": 0.39284781780088013, "learning_rate": 2.5228489805219684e-06, "loss": 0.3733, "step": 5673 }, { "epoch": 1.3505682156244423, "grad_norm": 0.39264836647519236, "learning_rate": 2.5211747644813367e-06, "loss": 0.323, "step": 5674 }, { "epoch": 1.350806211697507, "grad_norm": 0.38369466339855723, "learning_rate": 2.5195009168554572e-06, "loss": 0.2919, "step": 5675 }, { "epoch": 1.3510442077705718, "grad_norm": 0.37067590315834653, "learning_rate": 2.517827437893107e-06, "loss": 0.2875, "step": 5676 }, { "epoch": 1.3512822038436365, "grad_norm": 0.38302813218504755, "learning_rate": 2.5161543278430055e-06, "loss": 0.3684, "step": 5677 }, { "epoch": 1.3515201999167012, "grad_norm": 0.3605795300765205, "learning_rate": 2.5144815869538177e-06, "loss": 0.2855, "step": 5678 }, { "epoch": 1.3517581959897662, "grad_norm": 0.36372350908656326, "learning_rate": 2.512809215474155e-06, "loss": 0.2996, "step": 5679 }, { "epoch": 1.351996192062831, "grad_norm": 0.36974222068701523, "learning_rate": 2.5111372136525713e-06, "loss": 0.3311, "step": 5680 }, { "epoch": 1.3522341881358957, "grad_norm": 0.36626523624003543, "learning_rate": 2.509465581737571e-06, "loss": 0.3287, "step": 5681 }, { "epoch": 1.3524721842089606, "grad_norm": 0.3940460629565776, "learning_rate": 2.5077943199775978e-06, "loss": 0.29, "step": 5682 }, { "epoch": 1.3527101802820254, "grad_norm": 0.38718589993001984, "learning_rate": 2.5061234286210436e-06, "loss": 0.2943, "step": 5683 }, { "epoch": 1.3529481763550901, "grad_norm": 0.4250135569344329, "learning_rate": 2.5044529079162426e-06, "loss": 0.3941, "step": 5684 }, { "epoch": 1.3531861724281549, "grad_norm": 0.395297953846377, "learning_rate": 2.502782758111477e-06, "loss": 0.2984, "step": 5685 }, { "epoch": 1.3534241685012196, "grad_norm": 0.3758290085462567, "learning_rate": 2.5011129794549717e-06, "loss": 0.258, "step": 5686 }, { "epoch": 1.3536621645742846, "grad_norm": 0.3900626080187409, "learning_rate": 2.4994435721948966e-06, "loss": 0.3284, "step": 5687 }, { "epoch": 1.3539001606473493, "grad_norm": 0.38881872385050364, "learning_rate": 2.4977745365793676e-06, "loss": 0.3925, "step": 5688 }, { "epoch": 1.354138156720414, "grad_norm": 0.37508295198986963, "learning_rate": 2.4961058728564446e-06, "loss": 0.2848, "step": 5689 }, { "epoch": 1.354376152793479, "grad_norm": 0.4969361288451411, "learning_rate": 2.4944375812741304e-06, "loss": 0.3224, "step": 5690 }, { "epoch": 1.3546141488665437, "grad_norm": 0.4191683283997192, "learning_rate": 2.4927696620803783e-06, "loss": 0.3844, "step": 5691 }, { "epoch": 1.3548521449396085, "grad_norm": 0.3632365871635604, "learning_rate": 2.4911021155230804e-06, "loss": 0.3277, "step": 5692 }, { "epoch": 1.3550901410126732, "grad_norm": 0.36797859774918557, "learning_rate": 2.4894349418500745e-06, "loss": 0.2917, "step": 5693 }, { "epoch": 1.355328137085738, "grad_norm": 0.36534359001275923, "learning_rate": 2.487768141309144e-06, "loss": 0.3252, "step": 5694 }, { "epoch": 1.355566133158803, "grad_norm": 0.41390949501152025, "learning_rate": 2.486101714148018e-06, "loss": 0.3776, "step": 5695 }, { "epoch": 1.3558041292318677, "grad_norm": 0.3596335904706074, "learning_rate": 2.4844356606143687e-06, "loss": 0.3014, "step": 5696 }, { "epoch": 1.3560421253049324, "grad_norm": 0.3738157482380636, "learning_rate": 2.482769980955812e-06, "loss": 0.3108, "step": 5697 }, { "epoch": 1.3562801213779974, "grad_norm": 0.3712839660896256, "learning_rate": 2.4811046754199098e-06, "loss": 0.3732, "step": 5698 }, { "epoch": 1.356518117451062, "grad_norm": 0.3879484832908067, "learning_rate": 2.479439744254167e-06, "loss": 0.3083, "step": 5699 }, { "epoch": 1.3567561135241268, "grad_norm": 0.4129860781327451, "learning_rate": 2.4777751877060343e-06, "loss": 0.2727, "step": 5700 }, { "epoch": 1.3569941095971916, "grad_norm": 0.3695216250582914, "learning_rate": 2.476111006022905e-06, "loss": 0.3, "step": 5701 }, { "epoch": 1.3572321056702563, "grad_norm": 0.38079989838460043, "learning_rate": 2.4744471994521184e-06, "loss": 0.387, "step": 5702 }, { "epoch": 1.3574701017433213, "grad_norm": 0.3851371719901647, "learning_rate": 2.4727837682409574e-06, "loss": 0.2918, "step": 5703 }, { "epoch": 1.357708097816386, "grad_norm": 0.4030470108804071, "learning_rate": 2.4711207126366483e-06, "loss": 0.2774, "step": 5704 }, { "epoch": 1.3579460938894508, "grad_norm": 0.4043361359872621, "learning_rate": 2.4694580328863633e-06, "loss": 0.3715, "step": 5705 }, { "epoch": 1.3581840899625157, "grad_norm": 0.36653113787008357, "learning_rate": 2.4677957292372166e-06, "loss": 0.3627, "step": 5706 }, { "epoch": 1.3584220860355805, "grad_norm": 0.3850700049312921, "learning_rate": 2.4661338019362684e-06, "loss": 0.2887, "step": 5707 }, { "epoch": 1.3586600821086452, "grad_norm": 0.38782380526495214, "learning_rate": 2.46447225123052e-06, "loss": 0.35, "step": 5708 }, { "epoch": 1.35889807818171, "grad_norm": 0.3875168580573579, "learning_rate": 2.4628110773669235e-06, "loss": 0.3909, "step": 5709 }, { "epoch": 1.3591360742547747, "grad_norm": 0.3706961560048635, "learning_rate": 2.4611502805923677e-06, "loss": 0.3001, "step": 5710 }, { "epoch": 1.3593740703278396, "grad_norm": 0.41022795283685426, "learning_rate": 2.459489861153688e-06, "loss": 0.2858, "step": 5711 }, { "epoch": 1.3596120664009044, "grad_norm": 0.3507488121743938, "learning_rate": 2.4578298192976646e-06, "loss": 0.3481, "step": 5712 }, { "epoch": 1.3598500624739691, "grad_norm": 0.3882156595432714, "learning_rate": 2.4561701552710198e-06, "loss": 0.3727, "step": 5713 }, { "epoch": 1.360088058547034, "grad_norm": 0.41147742183986585, "learning_rate": 2.454510869320422e-06, "loss": 0.2848, "step": 5714 }, { "epoch": 1.3603260546200988, "grad_norm": 0.3932347054914899, "learning_rate": 2.4528519616924807e-06, "loss": 0.319, "step": 5715 }, { "epoch": 1.3605640506931636, "grad_norm": 0.3962467933758855, "learning_rate": 2.4511934326337516e-06, "loss": 0.3792, "step": 5716 }, { "epoch": 1.3608020467662283, "grad_norm": 0.38954161787628644, "learning_rate": 2.449535282390731e-06, "loss": 0.3375, "step": 5717 }, { "epoch": 1.361040042839293, "grad_norm": 0.3926513205788529, "learning_rate": 2.4478775112098644e-06, "loss": 0.271, "step": 5718 }, { "epoch": 1.361278038912358, "grad_norm": 0.4009062810034733, "learning_rate": 2.446220119337536e-06, "loss": 0.3599, "step": 5719 }, { "epoch": 1.3615160349854227, "grad_norm": 0.367410786702826, "learning_rate": 2.444563107020076e-06, "loss": 0.3562, "step": 5720 }, { "epoch": 1.3617540310584875, "grad_norm": 0.37374300698130064, "learning_rate": 2.4429064745037562e-06, "loss": 0.2799, "step": 5721 }, { "epoch": 1.3619920271315524, "grad_norm": 0.39466179262858975, "learning_rate": 2.441250222034792e-06, "loss": 0.3144, "step": 5722 }, { "epoch": 1.3622300232046172, "grad_norm": 0.4045572620870956, "learning_rate": 2.4395943498593476e-06, "loss": 0.3638, "step": 5723 }, { "epoch": 1.362468019277682, "grad_norm": 0.35753443357854653, "learning_rate": 2.4379388582235236e-06, "loss": 0.3324, "step": 5724 }, { "epoch": 1.3627060153507466, "grad_norm": 0.3827497837460359, "learning_rate": 2.436283747373368e-06, "loss": 0.2728, "step": 5725 }, { "epoch": 1.3629440114238114, "grad_norm": 0.369331720445923, "learning_rate": 2.43462901755487e-06, "loss": 0.3226, "step": 5726 }, { "epoch": 1.3631820074968763, "grad_norm": 0.3868589649921183, "learning_rate": 2.4329746690139656e-06, "loss": 0.4188, "step": 5727 }, { "epoch": 1.363420003569941, "grad_norm": 0.5212121670082667, "learning_rate": 2.4313207019965295e-06, "loss": 0.3194, "step": 5728 }, { "epoch": 1.3636579996430058, "grad_norm": 0.380202951444516, "learning_rate": 2.429667116748383e-06, "loss": 0.2932, "step": 5729 }, { "epoch": 1.3638959957160708, "grad_norm": 0.3965822543317094, "learning_rate": 2.4280139135152906e-06, "loss": 0.3701, "step": 5730 }, { "epoch": 1.3641339917891355, "grad_norm": 0.37353099551658364, "learning_rate": 2.4263610925429588e-06, "loss": 0.3374, "step": 5731 }, { "epoch": 1.3643719878622003, "grad_norm": 0.3661423191951257, "learning_rate": 2.4247086540770365e-06, "loss": 0.2938, "step": 5732 }, { "epoch": 1.364609983935265, "grad_norm": 0.3647519555796, "learning_rate": 2.4230565983631184e-06, "loss": 0.3204, "step": 5733 }, { "epoch": 1.3648479800083297, "grad_norm": 0.43532671681837853, "learning_rate": 2.42140492564674e-06, "loss": 0.3777, "step": 5734 }, { "epoch": 1.3650859760813947, "grad_norm": 0.38492642963237705, "learning_rate": 2.4197536361733792e-06, "loss": 0.3075, "step": 5735 }, { "epoch": 1.3653239721544594, "grad_norm": 0.3814772631917648, "learning_rate": 2.418102730188462e-06, "loss": 0.2729, "step": 5736 }, { "epoch": 1.3655619682275242, "grad_norm": 0.39209652486984, "learning_rate": 2.4164522079373525e-06, "loss": 0.344, "step": 5737 }, { "epoch": 1.3657999643005891, "grad_norm": 0.3664720899560753, "learning_rate": 2.4148020696653583e-06, "loss": 0.3635, "step": 5738 }, { "epoch": 1.3660379603736539, "grad_norm": 0.3534142149903722, "learning_rate": 2.413152315617732e-06, "loss": 0.2905, "step": 5739 }, { "epoch": 1.3662759564467186, "grad_norm": 0.3734213259184978, "learning_rate": 2.4115029460396666e-06, "loss": 0.2853, "step": 5740 }, { "epoch": 1.3665139525197834, "grad_norm": 0.45252181922728846, "learning_rate": 2.4098539611763e-06, "loss": 0.3878, "step": 5741 }, { "epoch": 1.366751948592848, "grad_norm": 0.4147075627953894, "learning_rate": 2.408205361272712e-06, "loss": 0.3316, "step": 5742 }, { "epoch": 1.366989944665913, "grad_norm": 0.355835782980996, "learning_rate": 2.4065571465739247e-06, "loss": 0.2902, "step": 5743 }, { "epoch": 1.3672279407389778, "grad_norm": 0.40862200982116076, "learning_rate": 2.4049093173249026e-06, "loss": 0.3258, "step": 5744 }, { "epoch": 1.3674659368120425, "grad_norm": 0.3792126875199552, "learning_rate": 2.403261873770557e-06, "loss": 0.3966, "step": 5745 }, { "epoch": 1.3677039328851075, "grad_norm": 0.39370562835081924, "learning_rate": 2.401614816155737e-06, "loss": 0.2929, "step": 5746 }, { "epoch": 1.3679419289581722, "grad_norm": 0.3985119119940615, "learning_rate": 2.399968144725237e-06, "loss": 0.3155, "step": 5747 }, { "epoch": 1.368179925031237, "grad_norm": 0.4030281878578675, "learning_rate": 2.3983218597237924e-06, "loss": 0.3834, "step": 5748 }, { "epoch": 1.3684179211043017, "grad_norm": 0.3690031365226529, "learning_rate": 2.3966759613960796e-06, "loss": 0.3083, "step": 5749 }, { "epoch": 1.3686559171773665, "grad_norm": 0.38052926649872104, "learning_rate": 2.3950304499867237e-06, "loss": 0.2933, "step": 5750 }, { "epoch": 1.3688939132504314, "grad_norm": 0.3970644105901736, "learning_rate": 2.393385325740287e-06, "loss": 0.3469, "step": 5751 }, { "epoch": 1.3691319093234962, "grad_norm": 0.38735698530577334, "learning_rate": 2.391740588901276e-06, "loss": 0.4109, "step": 5752 }, { "epoch": 1.369369905396561, "grad_norm": 0.37818783783560916, "learning_rate": 2.3900962397141375e-06, "loss": 0.2838, "step": 5753 }, { "epoch": 1.3696079014696259, "grad_norm": 0.3963292077588658, "learning_rate": 2.3884522784232645e-06, "loss": 0.2984, "step": 5754 }, { "epoch": 1.3698458975426906, "grad_norm": 0.3783349832246415, "learning_rate": 2.3868087052729893e-06, "loss": 0.3633, "step": 5755 }, { "epoch": 1.3700838936157553, "grad_norm": 0.3663177261232768, "learning_rate": 2.385165520507588e-06, "loss": 0.357, "step": 5756 }, { "epoch": 1.37032188968882, "grad_norm": 0.40376407001395825, "learning_rate": 2.383522724371278e-06, "loss": 0.3001, "step": 5757 }, { "epoch": 1.3705598857618848, "grad_norm": 0.3888540867491295, "learning_rate": 2.38188031710822e-06, "loss": 0.3109, "step": 5758 }, { "epoch": 1.3707978818349498, "grad_norm": 0.3823516378443356, "learning_rate": 2.3802382989625156e-06, "loss": 0.374, "step": 5759 }, { "epoch": 1.3710358779080145, "grad_norm": 0.38773823298396787, "learning_rate": 2.3785966701782098e-06, "loss": 0.3037, "step": 5760 }, { "epoch": 1.3712738739810792, "grad_norm": 0.4285193328439413, "learning_rate": 2.3769554309992894e-06, "loss": 0.2735, "step": 5761 }, { "epoch": 1.3715118700541442, "grad_norm": 0.36656643054155086, "learning_rate": 2.3753145816696827e-06, "loss": 0.3558, "step": 5762 }, { "epoch": 1.371749866127209, "grad_norm": 0.3731902128515065, "learning_rate": 2.3736741224332592e-06, "loss": 0.3605, "step": 5763 }, { "epoch": 1.3719878622002737, "grad_norm": 0.3743570952391234, "learning_rate": 2.372034053533835e-06, "loss": 0.279, "step": 5764 }, { "epoch": 1.3722258582733384, "grad_norm": 0.36798469228190406, "learning_rate": 2.370394375215163e-06, "loss": 0.3228, "step": 5765 }, { "epoch": 1.3724638543464032, "grad_norm": 0.40727564860454385, "learning_rate": 2.36875508772094e-06, "loss": 0.3691, "step": 5766 }, { "epoch": 1.3727018504194681, "grad_norm": 0.379396729169711, "learning_rate": 2.367116191294806e-06, "loss": 0.3249, "step": 5767 }, { "epoch": 1.3729398464925329, "grad_norm": 0.4026927710670588, "learning_rate": 2.365477686180339e-06, "loss": 0.2866, "step": 5768 }, { "epoch": 1.3731778425655976, "grad_norm": 0.37305525839630027, "learning_rate": 2.3638395726210633e-06, "loss": 0.3318, "step": 5769 }, { "epoch": 1.3734158386386626, "grad_norm": 0.39186058761949977, "learning_rate": 2.362201850860443e-06, "loss": 0.3883, "step": 5770 }, { "epoch": 1.3736538347117273, "grad_norm": 0.3880251678929161, "learning_rate": 2.3605645211418817e-06, "loss": 0.3171, "step": 5771 }, { "epoch": 1.373891830784792, "grad_norm": 0.38767893993417474, "learning_rate": 2.35892758370873e-06, "loss": 0.3205, "step": 5772 }, { "epoch": 1.3741298268578568, "grad_norm": 0.40297474576829545, "learning_rate": 2.357291038804277e-06, "loss": 0.3814, "step": 5773 }, { "epoch": 1.3743678229309215, "grad_norm": 0.3831088465518714, "learning_rate": 2.3556548866717532e-06, "loss": 0.3225, "step": 5774 }, { "epoch": 1.3746058190039865, "grad_norm": 0.4134284378393506, "learning_rate": 2.3540191275543313e-06, "loss": 0.2801, "step": 5775 }, { "epoch": 1.3748438150770512, "grad_norm": 0.42059665325132306, "learning_rate": 2.352383761695125e-06, "loss": 0.3214, "step": 5776 }, { "epoch": 1.375081811150116, "grad_norm": 0.39143587678806474, "learning_rate": 2.350748789337189e-06, "loss": 0.3738, "step": 5777 }, { "epoch": 1.375319807223181, "grad_norm": 0.349803339437346, "learning_rate": 2.349114210723524e-06, "loss": 0.2689, "step": 5778 }, { "epoch": 1.3755578032962457, "grad_norm": 0.3951646071549478, "learning_rate": 2.3474800260970663e-06, "loss": 0.3007, "step": 5779 }, { "epoch": 1.3757957993693104, "grad_norm": 0.3898501872812015, "learning_rate": 2.345846235700698e-06, "loss": 0.3639, "step": 5780 }, { "epoch": 1.3760337954423751, "grad_norm": 0.38912512748823874, "learning_rate": 2.3442128397772396e-06, "loss": 0.3618, "step": 5781 }, { "epoch": 1.3762717915154399, "grad_norm": 0.38556905651285667, "learning_rate": 2.3425798385694536e-06, "loss": 0.26, "step": 5782 }, { "epoch": 1.3765097875885048, "grad_norm": 0.4281982976966381, "learning_rate": 2.3409472323200456e-06, "loss": 0.3449, "step": 5783 }, { "epoch": 1.3767477836615696, "grad_norm": 0.39046675152439464, "learning_rate": 2.3393150212716604e-06, "loss": 0.3875, "step": 5784 }, { "epoch": 1.3769857797346343, "grad_norm": 0.3544336457158023, "learning_rate": 2.337683205666885e-06, "loss": 0.3233, "step": 5785 }, { "epoch": 1.3772237758076993, "grad_norm": 0.36474388790592926, "learning_rate": 2.336051785748248e-06, "loss": 0.285, "step": 5786 }, { "epoch": 1.377461771880764, "grad_norm": 0.3585335598025468, "learning_rate": 2.334420761758219e-06, "loss": 0.3508, "step": 5787 }, { "epoch": 1.3776997679538288, "grad_norm": 0.395098339023238, "learning_rate": 2.332790133939207e-06, "loss": 0.4056, "step": 5788 }, { "epoch": 1.3779377640268935, "grad_norm": 0.3916655159612242, "learning_rate": 2.3311599025335654e-06, "loss": 0.3117, "step": 5789 }, { "epoch": 1.3781757600999582, "grad_norm": 0.39352579920052977, "learning_rate": 2.3295300677835857e-06, "loss": 0.3329, "step": 5790 }, { "epoch": 1.3784137561730232, "grad_norm": 0.3949421237210347, "learning_rate": 2.327900629931501e-06, "loss": 0.4006, "step": 5791 }, { "epoch": 1.378651752246088, "grad_norm": 0.3930410571926244, "learning_rate": 2.3262715892194885e-06, "loss": 0.3191, "step": 5792 }, { "epoch": 1.3788897483191527, "grad_norm": 0.3767521685835201, "learning_rate": 2.3246429458896637e-06, "loss": 0.2756, "step": 5793 }, { "epoch": 1.3791277443922176, "grad_norm": 0.4252278529678899, "learning_rate": 2.3230147001840814e-06, "loss": 0.3496, "step": 5794 }, { "epoch": 1.3793657404652824, "grad_norm": 0.40177327803867513, "learning_rate": 2.3213868523447404e-06, "loss": 0.3888, "step": 5795 }, { "epoch": 1.3796037365383471, "grad_norm": 0.3991978634595426, "learning_rate": 2.3197594026135785e-06, "loss": 0.2992, "step": 5796 }, { "epoch": 1.3798417326114119, "grad_norm": 0.3820122275069646, "learning_rate": 2.3181323512324754e-06, "loss": 0.3062, "step": 5797 }, { "epoch": 1.3800797286844766, "grad_norm": 0.394013127202058, "learning_rate": 2.3165056984432493e-06, "loss": 0.3725, "step": 5798 }, { "epoch": 1.3803177247575416, "grad_norm": 0.3865299916518418, "learning_rate": 2.314879444487665e-06, "loss": 0.308, "step": 5799 }, { "epoch": 1.3805557208306063, "grad_norm": 0.3606035448903917, "learning_rate": 2.3132535896074213e-06, "loss": 0.2737, "step": 5800 }, { "epoch": 1.380793716903671, "grad_norm": 0.39435874636986457, "learning_rate": 2.3116281340441616e-06, "loss": 0.3243, "step": 5801 }, { "epoch": 1.381031712976736, "grad_norm": 0.3944881659871505, "learning_rate": 2.310003078039468e-06, "loss": 0.3845, "step": 5802 }, { "epoch": 1.3812697090498007, "grad_norm": 0.3739664004902333, "learning_rate": 2.3083784218348646e-06, "loss": 0.3068, "step": 5803 }, { "epoch": 1.3815077051228655, "grad_norm": 0.3784435268416916, "learning_rate": 2.3067541656718133e-06, "loss": 0.2978, "step": 5804 }, { "epoch": 1.3817457011959302, "grad_norm": 0.35412487368158974, "learning_rate": 2.305130309791723e-06, "loss": 0.3379, "step": 5805 }, { "epoch": 1.381983697268995, "grad_norm": 0.408945279084268, "learning_rate": 2.303506854435936e-06, "loss": 0.3552, "step": 5806 }, { "epoch": 1.38222169334206, "grad_norm": 0.3753005006752039, "learning_rate": 2.3018837998457384e-06, "loss": 0.2929, "step": 5807 }, { "epoch": 1.3824596894151246, "grad_norm": 0.36459640658547277, "learning_rate": 2.3002611462623563e-06, "loss": 0.3338, "step": 5808 }, { "epoch": 1.3826976854881894, "grad_norm": 0.3665121988763295, "learning_rate": 2.2986388939269567e-06, "loss": 0.3665, "step": 5809 }, { "epoch": 1.3829356815612543, "grad_norm": 0.3517995162869186, "learning_rate": 2.2970170430806455e-06, "loss": 0.2952, "step": 5810 }, { "epoch": 1.383173677634319, "grad_norm": 0.39818503253324855, "learning_rate": 2.29539559396447e-06, "loss": 0.2816, "step": 5811 }, { "epoch": 1.3834116737073838, "grad_norm": 0.3883965107678621, "learning_rate": 2.2937745468194186e-06, "loss": 0.3767, "step": 5812 }, { "epoch": 1.3836496697804486, "grad_norm": 0.34808683822000047, "learning_rate": 2.2921539018864177e-06, "loss": 0.3601, "step": 5813 }, { "epoch": 1.3838876658535133, "grad_norm": 0.3785593295383857, "learning_rate": 2.290533659406336e-06, "loss": 0.3008, "step": 5814 }, { "epoch": 1.3841256619265783, "grad_norm": 0.38995875252493734, "learning_rate": 2.2889138196199816e-06, "loss": 0.3139, "step": 5815 }, { "epoch": 1.384363657999643, "grad_norm": 0.37852442117564816, "learning_rate": 2.287294382768103e-06, "loss": 0.3529, "step": 5816 }, { "epoch": 1.3846016540727077, "grad_norm": 0.37788122292949944, "learning_rate": 2.285675349091388e-06, "loss": 0.3376, "step": 5817 }, { "epoch": 1.3848396501457727, "grad_norm": 0.42130823797471484, "learning_rate": 2.284056718830463e-06, "loss": 0.2815, "step": 5818 }, { "epoch": 1.3850776462188374, "grad_norm": 0.4103925080704719, "learning_rate": 2.2824384922259005e-06, "loss": 0.3086, "step": 5819 }, { "epoch": 1.3853156422919022, "grad_norm": 0.42671753513437, "learning_rate": 2.280820669518208e-06, "loss": 0.4045, "step": 5820 }, { "epoch": 1.385553638364967, "grad_norm": 0.39262412594080603, "learning_rate": 2.2792032509478335e-06, "loss": 0.2985, "step": 5821 }, { "epoch": 1.3857916344380317, "grad_norm": 0.3889466132697115, "learning_rate": 2.2775862367551642e-06, "loss": 0.3125, "step": 5822 }, { "epoch": 1.3860296305110966, "grad_norm": 0.3873197747905789, "learning_rate": 2.2759696271805298e-06, "loss": 0.3508, "step": 5823 }, { "epoch": 1.3862676265841614, "grad_norm": 0.38959234217638355, "learning_rate": 2.274353422464198e-06, "loss": 0.327, "step": 5824 }, { "epoch": 1.386505622657226, "grad_norm": 0.49119856662683686, "learning_rate": 2.2727376228463754e-06, "loss": 0.2518, "step": 5825 }, { "epoch": 1.386743618730291, "grad_norm": 0.3592727494766306, "learning_rate": 2.271122228567213e-06, "loss": 0.3103, "step": 5826 }, { "epoch": 1.3869816148033558, "grad_norm": 0.40339073061591146, "learning_rate": 2.2695072398667965e-06, "loss": 0.4171, "step": 5827 }, { "epoch": 1.3872196108764205, "grad_norm": 0.35219893549707026, "learning_rate": 2.2678926569851535e-06, "loss": 0.3219, "step": 5828 }, { "epoch": 1.3874576069494853, "grad_norm": 0.3964161658508986, "learning_rate": 2.26627848016225e-06, "loss": 0.2777, "step": 5829 }, { "epoch": 1.38769560302255, "grad_norm": 0.38942352677332387, "learning_rate": 2.2646647096379944e-06, "loss": 0.3602, "step": 5830 }, { "epoch": 1.387933599095615, "grad_norm": 0.42284500435613087, "learning_rate": 2.2630513456522315e-06, "loss": 0.3794, "step": 5831 }, { "epoch": 1.3881715951686797, "grad_norm": 0.3975269422746937, "learning_rate": 2.261438388444745e-06, "loss": 0.2931, "step": 5832 }, { "epoch": 1.3884095912417445, "grad_norm": 0.3911429557797404, "learning_rate": 2.259825838255265e-06, "loss": 0.3317, "step": 5833 }, { "epoch": 1.3886475873148094, "grad_norm": 0.40487348869777584, "learning_rate": 2.2582136953234544e-06, "loss": 0.3936, "step": 5834 }, { "epoch": 1.3888855833878742, "grad_norm": 0.3675010804576965, "learning_rate": 2.2566019598889168e-06, "loss": 0.301, "step": 5835 }, { "epoch": 1.389123579460939, "grad_norm": 0.3755478510541879, "learning_rate": 2.254990632191197e-06, "loss": 0.2646, "step": 5836 }, { "epoch": 1.3893615755340036, "grad_norm": 0.3717516657625129, "learning_rate": 2.2533797124697763e-06, "loss": 0.3402, "step": 5837 }, { "epoch": 1.3895995716070684, "grad_norm": 0.37873677832015795, "learning_rate": 2.2517692009640796e-06, "loss": 0.3532, "step": 5838 }, { "epoch": 1.3898375676801333, "grad_norm": 0.3812765678125732, "learning_rate": 2.250159097913467e-06, "loss": 0.2933, "step": 5839 }, { "epoch": 1.390075563753198, "grad_norm": 0.3833578003169235, "learning_rate": 2.2485494035572404e-06, "loss": 0.2927, "step": 5840 }, { "epoch": 1.3903135598262628, "grad_norm": 0.37497476870990365, "learning_rate": 2.24694011813464e-06, "loss": 0.3708, "step": 5841 }, { "epoch": 1.3905515558993278, "grad_norm": 0.35418256159670797, "learning_rate": 2.2453312418848454e-06, "loss": 0.318, "step": 5842 }, { "epoch": 1.3907895519723925, "grad_norm": 0.3892901098428216, "learning_rate": 2.243722775046976e-06, "loss": 0.3176, "step": 5843 }, { "epoch": 1.3910275480454573, "grad_norm": 0.37876686309642, "learning_rate": 2.2421147178600883e-06, "loss": 0.2907, "step": 5844 }, { "epoch": 1.391265544118522, "grad_norm": 0.3847753143788955, "learning_rate": 2.2405070705631808e-06, "loss": 0.3906, "step": 5845 }, { "epoch": 1.3915035401915867, "grad_norm": 0.3638169205713526, "learning_rate": 2.238899833395188e-06, "loss": 0.2749, "step": 5846 }, { "epoch": 1.3917415362646517, "grad_norm": 0.40407484545756533, "learning_rate": 2.2372930065949873e-06, "loss": 0.3018, "step": 5847 }, { "epoch": 1.3919795323377164, "grad_norm": 0.37766478316846785, "learning_rate": 2.235686590401392e-06, "loss": 0.3687, "step": 5848 }, { "epoch": 1.3922175284107812, "grad_norm": 0.34719174148467286, "learning_rate": 2.234080585053155e-06, "loss": 0.3441, "step": 5849 }, { "epoch": 1.3924555244838461, "grad_norm": 0.37713121646339276, "learning_rate": 2.232474990788969e-06, "loss": 0.2669, "step": 5850 }, { "epoch": 1.3926935205569109, "grad_norm": 0.40478233582795153, "learning_rate": 2.2308698078474645e-06, "loss": 0.3362, "step": 5851 }, { "epoch": 1.3929315166299756, "grad_norm": 0.4118362847556917, "learning_rate": 2.2292650364672096e-06, "loss": 0.3946, "step": 5852 }, { "epoch": 1.3931695127030403, "grad_norm": 0.3558802597442529, "learning_rate": 2.2276606768867172e-06, "loss": 0.2805, "step": 5853 }, { "epoch": 1.393407508776105, "grad_norm": 0.4691025727643871, "learning_rate": 2.226056729344432e-06, "loss": 0.2816, "step": 5854 }, { "epoch": 1.39364550484917, "grad_norm": 0.4030556981141061, "learning_rate": 2.2244531940787413e-06, "loss": 0.3574, "step": 5855 }, { "epoch": 1.3938835009222348, "grad_norm": 0.3809802135059961, "learning_rate": 2.22285007132797e-06, "loss": 0.3576, "step": 5856 }, { "epoch": 1.3941214969952995, "grad_norm": 0.35544990461765996, "learning_rate": 2.2212473613303807e-06, "loss": 0.2642, "step": 5857 }, { "epoch": 1.3943594930683645, "grad_norm": 0.3692152888228544, "learning_rate": 2.2196450643241768e-06, "loss": 0.3212, "step": 5858 }, { "epoch": 1.3945974891414292, "grad_norm": 0.4034006295018707, "learning_rate": 2.218043180547499e-06, "loss": 0.3548, "step": 5859 }, { "epoch": 1.394835485214494, "grad_norm": 0.3750782883132117, "learning_rate": 2.216441710238425e-06, "loss": 0.2913, "step": 5860 }, { "epoch": 1.3950734812875587, "grad_norm": 0.4040307357444328, "learning_rate": 2.214840653634977e-06, "loss": 0.2816, "step": 5861 }, { "epoch": 1.3953114773606234, "grad_norm": 0.3673705781593482, "learning_rate": 2.213240010975109e-06, "loss": 0.3495, "step": 5862 }, { "epoch": 1.3955494734336884, "grad_norm": 0.3704834222648198, "learning_rate": 2.211639782496717e-06, "loss": 0.3757, "step": 5863 }, { "epoch": 1.3957874695067531, "grad_norm": 0.37384507283033885, "learning_rate": 2.2100399684376333e-06, "loss": 0.2976, "step": 5864 }, { "epoch": 1.3960254655798179, "grad_norm": 0.38977052889591346, "learning_rate": 2.208440569035631e-06, "loss": 0.3131, "step": 5865 }, { "epoch": 1.3962634616528828, "grad_norm": 0.3803082135668662, "learning_rate": 2.2068415845284197e-06, "loss": 0.3629, "step": 5866 }, { "epoch": 1.3965014577259476, "grad_norm": 0.3428155698284938, "learning_rate": 2.2052430151536488e-06, "loss": 0.306, "step": 5867 }, { "epoch": 1.3967394537990123, "grad_norm": 0.4200376862637538, "learning_rate": 2.203644861148904e-06, "loss": 0.3033, "step": 5868 }, { "epoch": 1.396977449872077, "grad_norm": 0.41707618321368245, "learning_rate": 2.202047122751712e-06, "loss": 0.3557, "step": 5869 }, { "epoch": 1.3972154459451418, "grad_norm": 0.39243821595567624, "learning_rate": 2.2004498001995355e-06, "loss": 0.3608, "step": 5870 }, { "epoch": 1.3974534420182068, "grad_norm": 0.3533215441822008, "learning_rate": 2.1988528937297764e-06, "loss": 0.2832, "step": 5871 }, { "epoch": 1.3976914380912715, "grad_norm": 0.37292525643353835, "learning_rate": 2.1972564035797738e-06, "loss": 0.313, "step": 5872 }, { "epoch": 1.3979294341643362, "grad_norm": 0.41160282830855566, "learning_rate": 2.1956603299868052e-06, "loss": 0.3649, "step": 5873 }, { "epoch": 1.3981674302374012, "grad_norm": 0.3789548998028757, "learning_rate": 2.1940646731880887e-06, "loss": 0.3277, "step": 5874 }, { "epoch": 1.398405426310466, "grad_norm": 0.40108158442899056, "learning_rate": 2.1924694334207773e-06, "loss": 0.2789, "step": 5875 }, { "epoch": 1.3986434223835307, "grad_norm": 0.38479323451240566, "learning_rate": 2.1908746109219633e-06, "loss": 0.3232, "step": 5876 }, { "epoch": 1.3988814184565954, "grad_norm": 0.39277475179443505, "learning_rate": 2.189280205928676e-06, "loss": 0.4036, "step": 5877 }, { "epoch": 1.3991194145296602, "grad_norm": 0.3629626558888327, "learning_rate": 2.1876862186778847e-06, "loss": 0.292, "step": 5878 }, { "epoch": 1.3993574106027251, "grad_norm": 0.39386803443255086, "learning_rate": 2.186092649406492e-06, "loss": 0.2813, "step": 5879 }, { "epoch": 1.3995954066757899, "grad_norm": 0.4045307535442236, "learning_rate": 2.184499498351347e-06, "loss": 0.3579, "step": 5880 }, { "epoch": 1.3998334027488546, "grad_norm": 0.38562044486044106, "learning_rate": 2.182906765749228e-06, "loss": 0.3259, "step": 5881 }, { "epoch": 1.4000713988219196, "grad_norm": 0.3722054840630449, "learning_rate": 2.1813144518368556e-06, "loss": 0.2682, "step": 5882 }, { "epoch": 1.4003093948949843, "grad_norm": 0.4853537819335595, "learning_rate": 2.1797225568508863e-06, "loss": 0.3586, "step": 5883 }, { "epoch": 1.400547390968049, "grad_norm": 0.3917377497499103, "learning_rate": 2.1781310810279156e-06, "loss": 0.3889, "step": 5884 }, { "epoch": 1.4007853870411138, "grad_norm": 0.3820969906527712, "learning_rate": 2.1765400246044755e-06, "loss": 0.286, "step": 5885 }, { "epoch": 1.4010233831141785, "grad_norm": 0.3901672408734566, "learning_rate": 2.1749493878170368e-06, "loss": 0.268, "step": 5886 }, { "epoch": 1.4012613791872435, "grad_norm": 0.5012229103908235, "learning_rate": 2.173359170902006e-06, "loss": 0.3502, "step": 5887 }, { "epoch": 1.4014993752603082, "grad_norm": 0.4163979879875083, "learning_rate": 2.171769374095732e-06, "loss": 0.3584, "step": 5888 }, { "epoch": 1.401737371333373, "grad_norm": 0.3825230668134233, "learning_rate": 2.1701799976344956e-06, "loss": 0.2952, "step": 5889 }, { "epoch": 1.401975367406438, "grad_norm": 0.3692912522268186, "learning_rate": 2.168591041754518e-06, "loss": 0.3206, "step": 5890 }, { "epoch": 1.4022133634795027, "grad_norm": 0.4020957675464499, "learning_rate": 2.1670025066919575e-06, "loss": 0.3645, "step": 5891 }, { "epoch": 1.4024513595525674, "grad_norm": 0.3652682950372471, "learning_rate": 2.1654143926829095e-06, "loss": 0.3088, "step": 5892 }, { "epoch": 1.4026893556256321, "grad_norm": 0.39357600720054553, "learning_rate": 2.163826699963407e-06, "loss": 0.3002, "step": 5893 }, { "epoch": 1.4029273516986969, "grad_norm": 0.39176835973179347, "learning_rate": 2.1622394287694203e-06, "loss": 0.3524, "step": 5894 }, { "epoch": 1.4031653477717618, "grad_norm": 0.4030630624539821, "learning_rate": 2.1606525793368578e-06, "loss": 0.4, "step": 5895 }, { "epoch": 1.4034033438448266, "grad_norm": 0.3581093728731858, "learning_rate": 2.159066151901563e-06, "loss": 0.3042, "step": 5896 }, { "epoch": 1.4036413399178913, "grad_norm": 0.3958734156159978, "learning_rate": 2.1574801466993204e-06, "loss": 0.2925, "step": 5897 }, { "epoch": 1.4038793359909563, "grad_norm": 0.44017456212239137, "learning_rate": 2.155894563965848e-06, "loss": 0.3305, "step": 5898 }, { "epoch": 1.404117332064021, "grad_norm": 0.36546554037443685, "learning_rate": 2.1543094039368034e-06, "loss": 0.3465, "step": 5899 }, { "epoch": 1.4043553281370857, "grad_norm": 0.4261135558150708, "learning_rate": 2.15272466684778e-06, "loss": 0.2733, "step": 5900 }, { "epoch": 1.4045933242101505, "grad_norm": 0.42031143455545217, "learning_rate": 2.151140352934308e-06, "loss": 0.3398, "step": 5901 }, { "epoch": 1.4048313202832152, "grad_norm": 0.3751387002028948, "learning_rate": 2.149556462431859e-06, "loss": 0.3863, "step": 5902 }, { "epoch": 1.4050693163562802, "grad_norm": 0.3438466812731875, "learning_rate": 2.1479729955758354e-06, "loss": 0.3185, "step": 5903 }, { "epoch": 1.405307312429345, "grad_norm": 0.3693116279880859, "learning_rate": 2.146389952601581e-06, "loss": 0.2928, "step": 5904 }, { "epoch": 1.4055453085024097, "grad_norm": 0.39195086077989794, "learning_rate": 2.1448073337443743e-06, "loss": 0.3798, "step": 5905 }, { "epoch": 1.4057833045754746, "grad_norm": 0.379901101442375, "learning_rate": 2.1432251392394303e-06, "loss": 0.3315, "step": 5906 }, { "epoch": 1.4060213006485394, "grad_norm": 0.3827424931752997, "learning_rate": 2.141643369321905e-06, "loss": 0.2723, "step": 5907 }, { "epoch": 1.406259296721604, "grad_norm": 0.3816989292493151, "learning_rate": 2.1400620242268883e-06, "loss": 0.3374, "step": 5908 }, { "epoch": 1.4064972927946688, "grad_norm": 0.38230399719450614, "learning_rate": 2.1384811041894055e-06, "loss": 0.387, "step": 5909 }, { "epoch": 1.4067352888677336, "grad_norm": 0.3725751003550838, "learning_rate": 2.1369006094444215e-06, "loss": 0.2997, "step": 5910 }, { "epoch": 1.4069732849407985, "grad_norm": 0.385430704556038, "learning_rate": 2.1353205402268368e-06, "loss": 0.3085, "step": 5911 }, { "epoch": 1.4072112810138633, "grad_norm": 0.376139109287831, "learning_rate": 2.1337408967714883e-06, "loss": 0.3395, "step": 5912 }, { "epoch": 1.407449277086928, "grad_norm": 0.3715517206789897, "learning_rate": 2.1321616793131507e-06, "loss": 0.3763, "step": 5913 }, { "epoch": 1.407687273159993, "grad_norm": 0.39429770665412, "learning_rate": 2.130582888086534e-06, "loss": 0.2859, "step": 5914 }, { "epoch": 1.4079252692330577, "grad_norm": 0.3733749256636019, "learning_rate": 2.129004523326284e-06, "loss": 0.2936, "step": 5915 }, { "epoch": 1.4081632653061225, "grad_norm": 0.8974813867363939, "learning_rate": 2.1274265852669894e-06, "loss": 0.3692, "step": 5916 }, { "epoch": 1.4084012613791872, "grad_norm": 0.3681033154441353, "learning_rate": 2.125849074143168e-06, "loss": 0.3132, "step": 5917 }, { "epoch": 1.408639257452252, "grad_norm": 0.3871816903644566, "learning_rate": 2.124271990189277e-06, "loss": 0.2816, "step": 5918 }, { "epoch": 1.408877253525317, "grad_norm": 0.3733497998587013, "learning_rate": 2.1226953336397105e-06, "loss": 0.3519, "step": 5919 }, { "epoch": 1.4091152495983816, "grad_norm": 0.3581335746536029, "learning_rate": 2.1211191047287988e-06, "loss": 0.3848, "step": 5920 }, { "epoch": 1.4093532456714464, "grad_norm": 0.35647096310686627, "learning_rate": 2.119543303690808e-06, "loss": 0.2912, "step": 5921 }, { "epoch": 1.4095912417445113, "grad_norm": 0.3783590852785142, "learning_rate": 2.117967930759941e-06, "loss": 0.3103, "step": 5922 }, { "epoch": 1.409829237817576, "grad_norm": 0.39200620885506376, "learning_rate": 2.1163929861703383e-06, "loss": 0.3547, "step": 5923 }, { "epoch": 1.4100672338906408, "grad_norm": 0.3607566196572999, "learning_rate": 2.1148184701560742e-06, "loss": 0.3402, "step": 5924 }, { "epoch": 1.4103052299637056, "grad_norm": 0.36895223669749566, "learning_rate": 2.113244382951162e-06, "loss": 0.275, "step": 5925 }, { "epoch": 1.4105432260367703, "grad_norm": 0.3459412598350246, "learning_rate": 2.1116707247895484e-06, "loss": 0.3434, "step": 5926 }, { "epoch": 1.4107812221098353, "grad_norm": 0.3767275620938957, "learning_rate": 2.1100974959051198e-06, "loss": 0.38, "step": 5927 }, { "epoch": 1.4110192181829, "grad_norm": 0.39657367634889606, "learning_rate": 2.1085246965316936e-06, "loss": 0.3327, "step": 5928 }, { "epoch": 1.4112572142559647, "grad_norm": 0.39804610232153587, "learning_rate": 2.106952326903031e-06, "loss": 0.2669, "step": 5929 }, { "epoch": 1.4114952103290297, "grad_norm": 0.42512925166801646, "learning_rate": 2.105380387252824e-06, "loss": 0.3799, "step": 5930 }, { "epoch": 1.4117332064020944, "grad_norm": 0.367576966472368, "learning_rate": 2.1038088778147004e-06, "loss": 0.3648, "step": 5931 }, { "epoch": 1.4119712024751592, "grad_norm": 0.4071023704661706, "learning_rate": 2.1022377988222255e-06, "loss": 0.2493, "step": 5932 }, { "epoch": 1.412209198548224, "grad_norm": 0.3517027129740413, "learning_rate": 2.100667150508899e-06, "loss": 0.3263, "step": 5933 }, { "epoch": 1.4124471946212886, "grad_norm": 0.3630304666957127, "learning_rate": 2.099096933108163e-06, "loss": 0.3633, "step": 5934 }, { "epoch": 1.4126851906943536, "grad_norm": 0.36827148716226255, "learning_rate": 2.0975271468533864e-06, "loss": 0.3073, "step": 5935 }, { "epoch": 1.4129231867674183, "grad_norm": 0.39577401190423284, "learning_rate": 2.0959577919778803e-06, "loss": 0.2861, "step": 5936 }, { "epoch": 1.413161182840483, "grad_norm": 0.40173519198012886, "learning_rate": 2.0943888687148883e-06, "loss": 0.3431, "step": 5937 }, { "epoch": 1.413399178913548, "grad_norm": 0.3992484721549741, "learning_rate": 2.0928203772975917e-06, "loss": 0.3809, "step": 5938 }, { "epoch": 1.4136371749866128, "grad_norm": 0.39599927216720754, "learning_rate": 2.0912523179591076e-06, "loss": 0.3008, "step": 5939 }, { "epoch": 1.4138751710596775, "grad_norm": 0.4242280603241625, "learning_rate": 2.0896846909324874e-06, "loss": 0.3229, "step": 5940 }, { "epoch": 1.4141131671327423, "grad_norm": 0.3664045587336667, "learning_rate": 2.0881174964507205e-06, "loss": 0.4031, "step": 5941 }, { "epoch": 1.414351163205807, "grad_norm": 0.36607982694161056, "learning_rate": 2.086550734746728e-06, "loss": 0.3328, "step": 5942 }, { "epoch": 1.414589159278872, "grad_norm": 0.3924516864004525, "learning_rate": 2.0849844060533736e-06, "loss": 0.2976, "step": 5943 }, { "epoch": 1.4148271553519367, "grad_norm": 0.4057578371930518, "learning_rate": 2.0834185106034503e-06, "loss": 0.3015, "step": 5944 }, { "epoch": 1.4150651514250014, "grad_norm": 0.3805032627968153, "learning_rate": 2.081853048629689e-06, "loss": 0.3876, "step": 5945 }, { "epoch": 1.4153031474980664, "grad_norm": 0.3698394601043139, "learning_rate": 2.0802880203647565e-06, "loss": 0.2778, "step": 5946 }, { "epoch": 1.4155411435711311, "grad_norm": 0.3828459136639372, "learning_rate": 2.078723426041254e-06, "loss": 0.3119, "step": 5947 }, { "epoch": 1.4157791396441959, "grad_norm": 0.38957935834327895, "learning_rate": 2.0771592658917196e-06, "loss": 0.3794, "step": 5948 }, { "epoch": 1.4160171357172606, "grad_norm": 0.3851845261585521, "learning_rate": 2.0755955401486255e-06, "loss": 0.3443, "step": 5949 }, { "epoch": 1.4162551317903254, "grad_norm": 0.3546131940340063, "learning_rate": 2.0740322490443802e-06, "loss": 0.3024, "step": 5950 }, { "epoch": 1.4164931278633903, "grad_norm": 0.4166580686412631, "learning_rate": 2.072469392811329e-06, "loss": 0.3346, "step": 5951 }, { "epoch": 1.416731123936455, "grad_norm": 0.40477461909600554, "learning_rate": 2.070906971681748e-06, "loss": 0.3986, "step": 5952 }, { "epoch": 1.4169691200095198, "grad_norm": 0.3863431875261796, "learning_rate": 2.0693449858878543e-06, "loss": 0.2625, "step": 5953 }, { "epoch": 1.4172071160825848, "grad_norm": 0.42378362283530085, "learning_rate": 2.0677834356617967e-06, "loss": 0.2718, "step": 5954 }, { "epoch": 1.4174451121556495, "grad_norm": 0.37534140974542646, "learning_rate": 2.066222321235659e-06, "loss": 0.3537, "step": 5955 }, { "epoch": 1.4176831082287142, "grad_norm": 0.3421669011162348, "learning_rate": 2.064661642841462e-06, "loss": 0.3523, "step": 5956 }, { "epoch": 1.417921104301779, "grad_norm": 0.37943922157081356, "learning_rate": 2.0631014007111627e-06, "loss": 0.2818, "step": 5957 }, { "epoch": 1.4181591003748437, "grad_norm": 0.37246301648402214, "learning_rate": 2.0615415950766504e-06, "loss": 0.3343, "step": 5958 }, { "epoch": 1.4183970964479087, "grad_norm": 0.38400127878264756, "learning_rate": 2.0599822261697516e-06, "loss": 0.3702, "step": 5959 }, { "epoch": 1.4186350925209734, "grad_norm": 0.39219554424930037, "learning_rate": 2.0584232942222247e-06, "loss": 0.2835, "step": 5960 }, { "epoch": 1.4188730885940382, "grad_norm": 0.39117355067041304, "learning_rate": 2.056864799465769e-06, "loss": 0.2966, "step": 5961 }, { "epoch": 1.4191110846671031, "grad_norm": 0.39451400558685296, "learning_rate": 2.055306742132014e-06, "loss": 0.337, "step": 5962 }, { "epoch": 1.4193490807401679, "grad_norm": 0.36635780928223005, "learning_rate": 2.053749122452525e-06, "loss": 0.362, "step": 5963 }, { "epoch": 1.4195870768132326, "grad_norm": 0.3873092005359913, "learning_rate": 2.052191940658803e-06, "loss": 0.3079, "step": 5964 }, { "epoch": 1.4198250728862973, "grad_norm": 0.3836880319475697, "learning_rate": 2.050635196982284e-06, "loss": 0.3435, "step": 5965 }, { "epoch": 1.420063068959362, "grad_norm": 0.44296005657411364, "learning_rate": 2.049078891654339e-06, "loss": 0.37, "step": 5966 }, { "epoch": 1.420301065032427, "grad_norm": 0.35016323703149027, "learning_rate": 2.0475230249062727e-06, "loss": 0.315, "step": 5967 }, { "epoch": 1.4205390611054918, "grad_norm": 0.3950620054962618, "learning_rate": 2.0459675969693256e-06, "loss": 0.3173, "step": 5968 }, { "epoch": 1.4207770571785565, "grad_norm": 0.3952176517038452, "learning_rate": 2.044412608074672e-06, "loss": 0.3234, "step": 5969 }, { "epoch": 1.4210150532516215, "grad_norm": 0.38307975634819236, "learning_rate": 2.042858058453422e-06, "loss": 0.405, "step": 5970 }, { "epoch": 1.4212530493246862, "grad_norm": 0.3364899399185918, "learning_rate": 2.041303948336622e-06, "loss": 0.3013, "step": 5971 }, { "epoch": 1.421491045397751, "grad_norm": 0.38894109353307277, "learning_rate": 2.0397502779552498e-06, "loss": 0.2979, "step": 5972 }, { "epoch": 1.4217290414708157, "grad_norm": 0.3510083872107239, "learning_rate": 2.0381970475402196e-06, "loss": 0.3435, "step": 5973 }, { "epoch": 1.4219670375438804, "grad_norm": 0.3947818997277785, "learning_rate": 2.0366442573223795e-06, "loss": 0.3896, "step": 5974 }, { "epoch": 1.4222050336169454, "grad_norm": 0.39395505014453025, "learning_rate": 2.0350919075325124e-06, "loss": 0.2891, "step": 5975 }, { "epoch": 1.4224430296900101, "grad_norm": 0.3978783535558053, "learning_rate": 2.0335399984013366e-06, "loss": 0.3043, "step": 5976 }, { "epoch": 1.4226810257630749, "grad_norm": 0.3682896039882474, "learning_rate": 2.0319885301595034e-06, "loss": 0.4002, "step": 5977 }, { "epoch": 1.4229190218361398, "grad_norm": 0.37550375575321043, "learning_rate": 2.0304375030375996e-06, "loss": 0.2694, "step": 5978 }, { "epoch": 1.4231570179092046, "grad_norm": 0.38790715040650325, "learning_rate": 2.0288869172661463e-06, "loss": 0.3062, "step": 5979 }, { "epoch": 1.4233950139822693, "grad_norm": 0.39968408903014346, "learning_rate": 2.0273367730755993e-06, "loss": 0.3375, "step": 5980 }, { "epoch": 1.423633010055334, "grad_norm": 0.3878895341440517, "learning_rate": 2.025787070696348e-06, "loss": 0.3548, "step": 5981 }, { "epoch": 1.4238710061283988, "grad_norm": 0.39123351222051045, "learning_rate": 2.0242378103587157e-06, "loss": 0.2858, "step": 5982 }, { "epoch": 1.4241090022014637, "grad_norm": 0.37665471025750874, "learning_rate": 2.0226889922929603e-06, "loss": 0.3093, "step": 5983 }, { "epoch": 1.4243469982745285, "grad_norm": 0.39559868934355824, "learning_rate": 2.0211406167292775e-06, "loss": 0.3772, "step": 5984 }, { "epoch": 1.4245849943475932, "grad_norm": 0.3851654558218671, "learning_rate": 2.0195926838977926e-06, "loss": 0.2878, "step": 5985 }, { "epoch": 1.4248229904206582, "grad_norm": 0.37919297006292657, "learning_rate": 2.018045194028567e-06, "loss": 0.2562, "step": 5986 }, { "epoch": 1.425060986493723, "grad_norm": 0.40761397021940626, "learning_rate": 2.0164981473515926e-06, "loss": 0.3191, "step": 5987 }, { "epoch": 1.4252989825667877, "grad_norm": 0.4197443032408188, "learning_rate": 2.014951544096804e-06, "loss": 0.3657, "step": 5988 }, { "epoch": 1.4255369786398524, "grad_norm": 0.37473340700791113, "learning_rate": 2.013405384494063e-06, "loss": 0.2811, "step": 5989 }, { "epoch": 1.4257749747129171, "grad_norm": 0.40715204280059647, "learning_rate": 2.0118596687731666e-06, "loss": 0.3151, "step": 5990 }, { "epoch": 1.426012970785982, "grad_norm": 0.3737319670986343, "learning_rate": 2.0103143971638463e-06, "loss": 0.3727, "step": 5991 }, { "epoch": 1.4262509668590468, "grad_norm": 0.3698219377663196, "learning_rate": 2.0087695698957676e-06, "loss": 0.2946, "step": 5992 }, { "epoch": 1.4264889629321116, "grad_norm": 0.37300313864826995, "learning_rate": 2.0072251871985306e-06, "loss": 0.2735, "step": 5993 }, { "epoch": 1.4267269590051765, "grad_norm": 0.3944808909836751, "learning_rate": 2.0056812493016684e-06, "loss": 0.3603, "step": 5994 }, { "epoch": 1.4269649550782413, "grad_norm": 0.3741150627911366, "learning_rate": 2.0041377564346484e-06, "loss": 0.3969, "step": 5995 }, { "epoch": 1.427202951151306, "grad_norm": 0.36618783349147516, "learning_rate": 2.0025947088268714e-06, "loss": 0.3102, "step": 5996 }, { "epoch": 1.4274409472243708, "grad_norm": 0.4085121504761507, "learning_rate": 2.001052106707672e-06, "loss": 0.3215, "step": 5997 }, { "epoch": 1.4276789432974355, "grad_norm": 0.42374164985590546, "learning_rate": 1.9995099503063214e-06, "loss": 0.361, "step": 5998 }, { "epoch": 1.4279169393705005, "grad_norm": 0.36596013003548744, "learning_rate": 1.9979682398520205e-06, "loss": 0.3197, "step": 5999 }, { "epoch": 1.4281549354435652, "grad_norm": 0.38401848047343784, "learning_rate": 1.9964269755739057e-06, "loss": 0.2791, "step": 6000 }, { "epoch": 1.42839293151663, "grad_norm": 0.3988267552673252, "learning_rate": 1.9948861577010475e-06, "loss": 0.313, "step": 6001 }, { "epoch": 1.428630927589695, "grad_norm": 0.38693890781588514, "learning_rate": 1.993345786462449e-06, "loss": 0.391, "step": 6002 }, { "epoch": 1.4288689236627596, "grad_norm": 0.36685062755023384, "learning_rate": 1.991805862087048e-06, "loss": 0.2948, "step": 6003 }, { "epoch": 1.4291069197358244, "grad_norm": 0.5848360061934482, "learning_rate": 1.9902663848037147e-06, "loss": 0.278, "step": 6004 }, { "epoch": 1.4293449158088891, "grad_norm": 0.405283771817613, "learning_rate": 1.988727354841254e-06, "loss": 0.3247, "step": 6005 }, { "epoch": 1.4295829118819539, "grad_norm": 0.38737497620202277, "learning_rate": 1.987188772428403e-06, "loss": 0.3388, "step": 6006 }, { "epoch": 1.4298209079550188, "grad_norm": 0.3778534408788696, "learning_rate": 1.985650637793835e-06, "loss": 0.2796, "step": 6007 }, { "epoch": 1.4300589040280836, "grad_norm": 0.3917808141958219, "learning_rate": 1.9841129511661526e-06, "loss": 0.3428, "step": 6008 }, { "epoch": 1.4302969001011483, "grad_norm": 0.39975809188737405, "learning_rate": 1.9825757127738957e-06, "loss": 0.396, "step": 6009 }, { "epoch": 1.4305348961742133, "grad_norm": 0.37502506922397805, "learning_rate": 1.9810389228455334e-06, "loss": 0.2975, "step": 6010 }, { "epoch": 1.430772892247278, "grad_norm": 0.38726845931918774, "learning_rate": 1.9795025816094747e-06, "loss": 0.273, "step": 6011 }, { "epoch": 1.4310108883203427, "grad_norm": 0.3632876751872661, "learning_rate": 1.9779666892940557e-06, "loss": 0.3556, "step": 6012 }, { "epoch": 1.4312488843934075, "grad_norm": 0.38274179138386594, "learning_rate": 1.9764312461275482e-06, "loss": 0.3783, "step": 6013 }, { "epoch": 1.4314868804664722, "grad_norm": 0.3696767725810876, "learning_rate": 1.974896252338155e-06, "loss": 0.277, "step": 6014 }, { "epoch": 1.4317248765395372, "grad_norm": 0.35996212200903566, "learning_rate": 1.973361708154018e-06, "loss": 0.3241, "step": 6015 }, { "epoch": 1.431962872612602, "grad_norm": 0.3746809277181633, "learning_rate": 1.9718276138032066e-06, "loss": 0.4065, "step": 6016 }, { "epoch": 1.4322008686856667, "grad_norm": 0.3662515570773327, "learning_rate": 1.970293969513725e-06, "loss": 0.3321, "step": 6017 }, { "epoch": 1.4324388647587316, "grad_norm": 0.3893301791415084, "learning_rate": 1.9687607755135114e-06, "loss": 0.269, "step": 6018 }, { "epoch": 1.4326768608317964, "grad_norm": 0.38494260892368, "learning_rate": 1.9672280320304356e-06, "loss": 0.3051, "step": 6019 }, { "epoch": 1.432914856904861, "grad_norm": 0.38863096717866963, "learning_rate": 1.965695739292301e-06, "loss": 0.4005, "step": 6020 }, { "epoch": 1.4331528529779258, "grad_norm": 0.3906075944195601, "learning_rate": 1.964163897526845e-06, "loss": 0.3081, "step": 6021 }, { "epoch": 1.4333908490509906, "grad_norm": 0.4134133532934077, "learning_rate": 1.9626325069617365e-06, "loss": 0.3055, "step": 6022 }, { "epoch": 1.4336288451240555, "grad_norm": 0.4136513446069149, "learning_rate": 1.9611015678245786e-06, "loss": 0.3594, "step": 6023 }, { "epoch": 1.4338668411971203, "grad_norm": 0.3878063222204458, "learning_rate": 1.9595710803429064e-06, "loss": 0.3438, "step": 6024 }, { "epoch": 1.434104837270185, "grad_norm": 0.3940209179695724, "learning_rate": 1.958041044744186e-06, "loss": 0.2871, "step": 6025 }, { "epoch": 1.43434283334325, "grad_norm": 0.3824890901587333, "learning_rate": 1.9565114612558232e-06, "loss": 0.34, "step": 6026 }, { "epoch": 1.4345808294163147, "grad_norm": 0.38272339908598974, "learning_rate": 1.95498233010515e-06, "loss": 0.3798, "step": 6027 }, { "epoch": 1.4348188254893794, "grad_norm": 0.3610519966627758, "learning_rate": 1.9534536515194312e-06, "loss": 0.3026, "step": 6028 }, { "epoch": 1.4350568215624442, "grad_norm": 0.3922292942167091, "learning_rate": 1.9519254257258684e-06, "loss": 0.2929, "step": 6029 }, { "epoch": 1.435294817635509, "grad_norm": 0.4055572921879517, "learning_rate": 1.950397652951593e-06, "loss": 0.3556, "step": 6030 }, { "epoch": 1.4355328137085739, "grad_norm": 0.41972072883290057, "learning_rate": 1.94887033342367e-06, "loss": 0.347, "step": 6031 }, { "epoch": 1.4357708097816386, "grad_norm": 0.3783613808548642, "learning_rate": 1.9473434673690974e-06, "loss": 0.2675, "step": 6032 }, { "epoch": 1.4360088058547034, "grad_norm": 0.374012703303672, "learning_rate": 1.945817055014804e-06, "loss": 0.3121, "step": 6033 }, { "epoch": 1.4362468019277683, "grad_norm": 0.37927442883741, "learning_rate": 1.9442910965876533e-06, "loss": 0.3758, "step": 6034 }, { "epoch": 1.436484798000833, "grad_norm": 0.38190378180904344, "learning_rate": 1.94276559231444e-06, "loss": 0.292, "step": 6035 }, { "epoch": 1.4367227940738978, "grad_norm": 0.37836022258000657, "learning_rate": 1.9412405424218915e-06, "loss": 0.2698, "step": 6036 }, { "epoch": 1.4369607901469625, "grad_norm": 0.38901824469541524, "learning_rate": 1.9397159471366677e-06, "loss": 0.3496, "step": 6037 }, { "epoch": 1.4371987862200273, "grad_norm": 0.37161186422002496, "learning_rate": 1.9381918066853632e-06, "loss": 0.3693, "step": 6038 }, { "epoch": 1.4374367822930922, "grad_norm": 0.38662177675097864, "learning_rate": 1.9366681212945014e-06, "loss": 0.2883, "step": 6039 }, { "epoch": 1.437674778366157, "grad_norm": 0.3647394695355797, "learning_rate": 1.9351448911905407e-06, "loss": 0.2887, "step": 6040 }, { "epoch": 1.4379127744392217, "grad_norm": 0.364210263276713, "learning_rate": 1.933622116599868e-06, "loss": 0.3901, "step": 6041 }, { "epoch": 1.4381507705122867, "grad_norm": 0.4151429490488636, "learning_rate": 1.9320997977488086e-06, "loss": 0.3212, "step": 6042 }, { "epoch": 1.4383887665853514, "grad_norm": 0.4095987714243739, "learning_rate": 1.930577934863616e-06, "loss": 0.2889, "step": 6043 }, { "epoch": 1.4386267626584162, "grad_norm": 0.36732368664961057, "learning_rate": 1.929056528170476e-06, "loss": 0.324, "step": 6044 }, { "epoch": 1.438864758731481, "grad_norm": 0.36769386073709703, "learning_rate": 1.9275355778955073e-06, "loss": 0.3886, "step": 6045 }, { "epoch": 1.4391027548045456, "grad_norm": 0.3914105235529593, "learning_rate": 1.926015084264761e-06, "loss": 0.27, "step": 6046 }, { "epoch": 1.4393407508776106, "grad_norm": 0.35201976589044254, "learning_rate": 1.9244950475042195e-06, "loss": 0.3152, "step": 6047 }, { "epoch": 1.4395787469506753, "grad_norm": 0.37403165542953687, "learning_rate": 1.922975467839799e-06, "loss": 0.3402, "step": 6048 }, { "epoch": 1.43981674302374, "grad_norm": 0.3704566985118877, "learning_rate": 1.9214563454973452e-06, "loss": 0.3232, "step": 6049 }, { "epoch": 1.440054739096805, "grad_norm": 0.3591762003021762, "learning_rate": 1.9199376807026383e-06, "loss": 0.3001, "step": 6050 }, { "epoch": 1.4402927351698698, "grad_norm": 0.3710069533511562, "learning_rate": 1.918419473681389e-06, "loss": 0.3465, "step": 6051 }, { "epoch": 1.4405307312429345, "grad_norm": 0.40999024447887217, "learning_rate": 1.9169017246592404e-06, "loss": 0.4, "step": 6052 }, { "epoch": 1.4407687273159993, "grad_norm": 0.3683061673886724, "learning_rate": 1.915384433861766e-06, "loss": 0.3081, "step": 6053 }, { "epoch": 1.441006723389064, "grad_norm": 0.3822641283219383, "learning_rate": 1.9138676015144765e-06, "loss": 0.2872, "step": 6054 }, { "epoch": 1.441244719462129, "grad_norm": 0.46115476067766986, "learning_rate": 1.912351227842808e-06, "loss": 0.3579, "step": 6055 }, { "epoch": 1.4414827155351937, "grad_norm": 0.3663936685499609, "learning_rate": 1.9108353130721326e-06, "loss": 0.3425, "step": 6056 }, { "epoch": 1.4417207116082584, "grad_norm": 0.3549319540090481, "learning_rate": 1.9093198574277516e-06, "loss": 0.2929, "step": 6057 }, { "epoch": 1.4419587076813234, "grad_norm": 0.42037911174742915, "learning_rate": 1.9078048611348992e-06, "loss": 0.3327, "step": 6058 }, { "epoch": 1.4421967037543881, "grad_norm": 0.3914790764090726, "learning_rate": 1.9062903244187419e-06, "loss": 0.3751, "step": 6059 }, { "epoch": 1.4424346998274529, "grad_norm": 0.39191069864519656, "learning_rate": 1.9047762475043775e-06, "loss": 0.2849, "step": 6060 }, { "epoch": 1.4426726959005176, "grad_norm": 0.5035295970708908, "learning_rate": 1.9032626306168344e-06, "loss": 0.2822, "step": 6061 }, { "epoch": 1.4429106919735823, "grad_norm": 0.3617034512387851, "learning_rate": 1.9017494739810737e-06, "loss": 0.3608, "step": 6062 }, { "epoch": 1.4431486880466473, "grad_norm": 0.3801209303019163, "learning_rate": 1.9002367778219889e-06, "loss": 0.3888, "step": 6063 }, { "epoch": 1.443386684119712, "grad_norm": 0.38415720552229266, "learning_rate": 1.8987245423644012e-06, "loss": 0.3067, "step": 6064 }, { "epoch": 1.4436246801927768, "grad_norm": 0.35618939435471303, "learning_rate": 1.8972127678330703e-06, "loss": 0.3021, "step": 6065 }, { "epoch": 1.4438626762658417, "grad_norm": 0.39214870024334303, "learning_rate": 1.8957014544526808e-06, "loss": 0.3576, "step": 6066 }, { "epoch": 1.4441006723389065, "grad_norm": 0.36937785124750994, "learning_rate": 1.8941906024478524e-06, "loss": 0.315, "step": 6067 }, { "epoch": 1.4443386684119712, "grad_norm": 0.37155671677042273, "learning_rate": 1.8926802120431325e-06, "loss": 0.3286, "step": 6068 }, { "epoch": 1.444576664485036, "grad_norm": 0.4021046642008438, "learning_rate": 1.8911702834630063e-06, "loss": 0.3203, "step": 6069 }, { "epoch": 1.4448146605581007, "grad_norm": 0.4017487540968996, "learning_rate": 1.8896608169318847e-06, "loss": 0.375, "step": 6070 }, { "epoch": 1.4450526566311657, "grad_norm": 0.36752083749151027, "learning_rate": 1.8881518126741121e-06, "loss": 0.2926, "step": 6071 }, { "epoch": 1.4452906527042304, "grad_norm": 0.36577529364212397, "learning_rate": 1.886643270913963e-06, "loss": 0.2998, "step": 6072 }, { "epoch": 1.4455286487772951, "grad_norm": 0.38891808139085265, "learning_rate": 1.885135191875645e-06, "loss": 0.3527, "step": 6073 }, { "epoch": 1.44576664485036, "grad_norm": 0.3858655286851012, "learning_rate": 1.8836275757832957e-06, "loss": 0.3269, "step": 6074 }, { "epoch": 1.4460046409234248, "grad_norm": 0.4342531875991616, "learning_rate": 1.8821204228609835e-06, "loss": 0.2787, "step": 6075 }, { "epoch": 1.4462426369964896, "grad_norm": 0.38696089076302526, "learning_rate": 1.880613733332709e-06, "loss": 0.3401, "step": 6076 }, { "epoch": 1.4464806330695543, "grad_norm": 0.3949331561518244, "learning_rate": 1.879107507422404e-06, "loss": 0.3802, "step": 6077 }, { "epoch": 1.446718629142619, "grad_norm": 0.405406854410338, "learning_rate": 1.8776017453539307e-06, "loss": 0.3093, "step": 6078 }, { "epoch": 1.446956625215684, "grad_norm": 0.37618600362179, "learning_rate": 1.8760964473510823e-06, "loss": 0.2896, "step": 6079 }, { "epoch": 1.4471946212887488, "grad_norm": 0.4006013598116971, "learning_rate": 1.8745916136375814e-06, "loss": 0.3919, "step": 6080 }, { "epoch": 1.4474326173618135, "grad_norm": 0.38408656575398153, "learning_rate": 1.8730872444370874e-06, "loss": 0.366, "step": 6081 }, { "epoch": 1.4476706134348785, "grad_norm": 0.3948058469025689, "learning_rate": 1.8715833399731854e-06, "loss": 0.2908, "step": 6082 }, { "epoch": 1.4479086095079432, "grad_norm": 0.3849530328785906, "learning_rate": 1.870079900469392e-06, "loss": 0.2974, "step": 6083 }, { "epoch": 1.448146605581008, "grad_norm": 0.4378511368770222, "learning_rate": 1.868576926149156e-06, "loss": 0.3846, "step": 6084 }, { "epoch": 1.4483846016540727, "grad_norm": 0.3862944035305655, "learning_rate": 1.8670744172358563e-06, "loss": 0.3343, "step": 6085 }, { "epoch": 1.4486225977271374, "grad_norm": 0.4279048030239408, "learning_rate": 1.865572373952803e-06, "loss": 0.3044, "step": 6086 }, { "epoch": 1.4488605938002024, "grad_norm": 0.45420594269270115, "learning_rate": 1.8640707965232375e-06, "loss": 0.337, "step": 6087 }, { "epoch": 1.4490985898732671, "grad_norm": 0.38069002954177594, "learning_rate": 1.86256968517033e-06, "loss": 0.3705, "step": 6088 }, { "epoch": 1.4493365859463319, "grad_norm": 0.41715068301428293, "learning_rate": 1.8610690401171845e-06, "loss": 0.2696, "step": 6089 }, { "epoch": 1.4495745820193968, "grad_norm": 0.41094165450892295, "learning_rate": 1.8595688615868324e-06, "loss": 0.3237, "step": 6090 }, { "epoch": 1.4498125780924616, "grad_norm": 0.40909914025067795, "learning_rate": 1.8580691498022364e-06, "loss": 0.3643, "step": 6091 }, { "epoch": 1.4500505741655263, "grad_norm": 0.39406224435785253, "learning_rate": 1.8565699049862939e-06, "loss": 0.3093, "step": 6092 }, { "epoch": 1.450288570238591, "grad_norm": 0.4265470858706377, "learning_rate": 1.8550711273618283e-06, "loss": 0.2583, "step": 6093 }, { "epoch": 1.4505265663116558, "grad_norm": 0.4557324973621654, "learning_rate": 1.8535728171515949e-06, "loss": 0.3407, "step": 6094 }, { "epoch": 1.4507645623847207, "grad_norm": 0.39977705869048036, "learning_rate": 1.8520749745782784e-06, "loss": 0.3649, "step": 6095 }, { "epoch": 1.4510025584577855, "grad_norm": 0.37280847284038304, "learning_rate": 1.8505775998644982e-06, "loss": 0.3052, "step": 6096 }, { "epoch": 1.4512405545308502, "grad_norm": 0.39792431548064744, "learning_rate": 1.8490806932327993e-06, "loss": 0.3123, "step": 6097 }, { "epoch": 1.4514785506039152, "grad_norm": 0.42229729780036634, "learning_rate": 1.8475842549056594e-06, "loss": 0.3582, "step": 6098 }, { "epoch": 1.45171654667698, "grad_norm": 0.3864779005126171, "learning_rate": 1.8460882851054867e-06, "loss": 0.3315, "step": 6099 }, { "epoch": 1.4519545427500447, "grad_norm": 0.3953598919130047, "learning_rate": 1.8445927840546186e-06, "loss": 0.2967, "step": 6100 }, { "epoch": 1.4521925388231094, "grad_norm": 0.38578775492268436, "learning_rate": 1.8430977519753235e-06, "loss": 0.3217, "step": 6101 }, { "epoch": 1.4524305348961741, "grad_norm": 0.44987910181185625, "learning_rate": 1.8416031890898006e-06, "loss": 0.3781, "step": 6102 }, { "epoch": 1.4526685309692389, "grad_norm": 0.3912127016231833, "learning_rate": 1.840109095620179e-06, "loss": 0.2981, "step": 6103 }, { "epoch": 1.4529065270423038, "grad_norm": 0.4302943166340117, "learning_rate": 1.838615471788518e-06, "loss": 0.2725, "step": 6104 }, { "epoch": 1.4531445231153686, "grad_norm": 0.43080639650484565, "learning_rate": 1.8371223178168063e-06, "loss": 0.3699, "step": 6105 }, { "epoch": 1.4533825191884333, "grad_norm": 0.4082319577973362, "learning_rate": 1.8356296339269635e-06, "loss": 0.3588, "step": 6106 }, { "epoch": 1.4536205152614983, "grad_norm": 0.36934438218455085, "learning_rate": 1.8341374203408407e-06, "loss": 0.2699, "step": 6107 }, { "epoch": 1.453858511334563, "grad_norm": 0.3802597728212302, "learning_rate": 1.8326456772802148e-06, "loss": 0.3429, "step": 6108 }, { "epoch": 1.4540965074076277, "grad_norm": 0.3935734960848936, "learning_rate": 1.8311544049668001e-06, "loss": 0.4122, "step": 6109 }, { "epoch": 1.4543345034806925, "grad_norm": 0.4192793931557721, "learning_rate": 1.8296636036222338e-06, "loss": 0.2778, "step": 6110 }, { "epoch": 1.4545724995537572, "grad_norm": 0.383099877153263, "learning_rate": 1.8281732734680863e-06, "loss": 0.2909, "step": 6111 }, { "epoch": 1.4548104956268222, "grad_norm": 0.5516280401082402, "learning_rate": 1.8266834147258577e-06, "loss": 0.3406, "step": 6112 }, { "epoch": 1.455048491699887, "grad_norm": 0.3873783424168311, "learning_rate": 1.8251940276169777e-06, "loss": 0.3955, "step": 6113 }, { "epoch": 1.4552864877729517, "grad_norm": 0.38380201175264, "learning_rate": 1.8237051123628057e-06, "loss": 0.2968, "step": 6114 }, { "epoch": 1.4555244838460166, "grad_norm": 0.39275514580382104, "learning_rate": 1.8222166691846321e-06, "loss": 0.2943, "step": 6115 }, { "epoch": 1.4557624799190814, "grad_norm": 0.40228420689226446, "learning_rate": 1.8207286983036765e-06, "loss": 0.3527, "step": 6116 }, { "epoch": 1.456000475992146, "grad_norm": 0.37618752702614194, "learning_rate": 1.819241199941087e-06, "loss": 0.3262, "step": 6117 }, { "epoch": 1.4562384720652108, "grad_norm": 0.37547398305167284, "learning_rate": 1.8177541743179423e-06, "loss": 0.2852, "step": 6118 }, { "epoch": 1.4564764681382756, "grad_norm": 0.41032362127204985, "learning_rate": 1.8162676216552533e-06, "loss": 0.3307, "step": 6119 }, { "epoch": 1.4567144642113405, "grad_norm": 0.4347197449287449, "learning_rate": 1.8147815421739578e-06, "loss": 0.3799, "step": 6120 }, { "epoch": 1.4569524602844053, "grad_norm": 0.3563768966230964, "learning_rate": 1.8132959360949237e-06, "loss": 0.2712, "step": 6121 }, { "epoch": 1.45719045635747, "grad_norm": 0.38101919344310853, "learning_rate": 1.811810803638947e-06, "loss": 0.279, "step": 6122 }, { "epoch": 1.457428452430535, "grad_norm": 0.418475439520036, "learning_rate": 1.810326145026759e-06, "loss": 0.3644, "step": 6123 }, { "epoch": 1.4576664485035997, "grad_norm": 0.34723819719324317, "learning_rate": 1.8088419604790135e-06, "loss": 0.313, "step": 6124 }, { "epoch": 1.4579044445766645, "grad_norm": 0.40905964751759843, "learning_rate": 1.807358250216299e-06, "loss": 0.2901, "step": 6125 }, { "epoch": 1.4581424406497292, "grad_norm": 0.39190988886703376, "learning_rate": 1.8058750144591308e-06, "loss": 0.3643, "step": 6126 }, { "epoch": 1.458380436722794, "grad_norm": 0.39412625081899383, "learning_rate": 1.804392253427954e-06, "loss": 0.4072, "step": 6127 }, { "epoch": 1.458618432795859, "grad_norm": 0.38625582558289245, "learning_rate": 1.8029099673431438e-06, "loss": 0.3247, "step": 6128 }, { "epoch": 1.4588564288689236, "grad_norm": 0.37374977195081105, "learning_rate": 1.8014281564250046e-06, "loss": 0.2899, "step": 6129 }, { "epoch": 1.4590944249419884, "grad_norm": 0.4022256905018728, "learning_rate": 1.7999468208937698e-06, "loss": 0.3349, "step": 6130 }, { "epoch": 1.4593324210150533, "grad_norm": 0.3782717385013846, "learning_rate": 1.7984659609696037e-06, "loss": 0.3659, "step": 6131 }, { "epoch": 1.459570417088118, "grad_norm": 0.42371774399832857, "learning_rate": 1.7969855768725973e-06, "loss": 0.2867, "step": 6132 }, { "epoch": 1.4598084131611828, "grad_norm": 0.4828644226623123, "learning_rate": 1.7955056688227735e-06, "loss": 0.3071, "step": 6133 }, { "epoch": 1.4600464092342476, "grad_norm": 0.3711693938332572, "learning_rate": 1.7940262370400823e-06, "loss": 0.3749, "step": 6134 }, { "epoch": 1.4602844053073123, "grad_norm": 0.38061515908607924, "learning_rate": 1.792547281744403e-06, "loss": 0.2909, "step": 6135 }, { "epoch": 1.4605224013803773, "grad_norm": 0.39151517351766074, "learning_rate": 1.7910688031555473e-06, "loss": 0.2842, "step": 6136 }, { "epoch": 1.460760397453442, "grad_norm": 0.41546568095239644, "learning_rate": 1.7895908014932529e-06, "loss": 0.3413, "step": 6137 }, { "epoch": 1.4609983935265067, "grad_norm": 0.40057642081942424, "learning_rate": 1.788113276977187e-06, "loss": 0.3942, "step": 6138 }, { "epoch": 1.4612363895995717, "grad_norm": 0.4025786155182079, "learning_rate": 1.7866362298269468e-06, "loss": 0.287, "step": 6139 }, { "epoch": 1.4614743856726364, "grad_norm": 0.36206986714191153, "learning_rate": 1.7851596602620568e-06, "loss": 0.3139, "step": 6140 }, { "epoch": 1.4617123817457012, "grad_norm": 0.4078012441791886, "learning_rate": 1.7836835685019732e-06, "loss": 0.37, "step": 6141 }, { "epoch": 1.461950377818766, "grad_norm": 0.36946996756792294, "learning_rate": 1.7822079547660792e-06, "loss": 0.3086, "step": 6142 }, { "epoch": 1.4621883738918307, "grad_norm": 0.3817427648802179, "learning_rate": 1.7807328192736872e-06, "loss": 0.2633, "step": 6143 }, { "epoch": 1.4624263699648956, "grad_norm": 0.38265305398727073, "learning_rate": 1.7792581622440392e-06, "loss": 0.3307, "step": 6144 }, { "epoch": 1.4626643660379604, "grad_norm": 0.5748853333539239, "learning_rate": 1.777783983896304e-06, "loss": 0.3755, "step": 6145 }, { "epoch": 1.462902362111025, "grad_norm": 0.39571210226781933, "learning_rate": 1.7763102844495838e-06, "loss": 0.3066, "step": 6146 }, { "epoch": 1.46314035818409, "grad_norm": 0.39702192397444136, "learning_rate": 1.7748370641229063e-06, "loss": 0.2969, "step": 6147 }, { "epoch": 1.4633783542571548, "grad_norm": 0.3736958018420523, "learning_rate": 1.773364323135227e-06, "loss": 0.3624, "step": 6148 }, { "epoch": 1.4636163503302195, "grad_norm": 0.3853561768534024, "learning_rate": 1.7718920617054313e-06, "loss": 0.359, "step": 6149 }, { "epoch": 1.4638543464032843, "grad_norm": 0.3882989341472717, "learning_rate": 1.7704202800523362e-06, "loss": 0.267, "step": 6150 }, { "epoch": 1.464092342476349, "grad_norm": 0.40235971375885304, "learning_rate": 1.768948978394684e-06, "loss": 0.3269, "step": 6151 }, { "epoch": 1.464330338549414, "grad_norm": 0.41400386307178666, "learning_rate": 1.7674781569511451e-06, "loss": 0.3767, "step": 6152 }, { "epoch": 1.4645683346224787, "grad_norm": 0.4015051161422788, "learning_rate": 1.766007815940321e-06, "loss": 0.2967, "step": 6153 }, { "epoch": 1.4648063306955434, "grad_norm": 0.39178112671184995, "learning_rate": 1.7645379555807408e-06, "loss": 0.276, "step": 6154 }, { "epoch": 1.4650443267686084, "grad_norm": 0.4115792252224875, "learning_rate": 1.7630685760908623e-06, "loss": 0.3404, "step": 6155 }, { "epoch": 1.4652823228416731, "grad_norm": 0.38556483981697237, "learning_rate": 1.7615996776890704e-06, "loss": 0.3528, "step": 6156 }, { "epoch": 1.4655203189147379, "grad_norm": 0.37944105872349776, "learning_rate": 1.760131260593681e-06, "loss": 0.2946, "step": 6157 }, { "epoch": 1.4657583149878026, "grad_norm": 0.3902900142096629, "learning_rate": 1.7586633250229368e-06, "loss": 0.3307, "step": 6158 }, { "epoch": 1.4659963110608674, "grad_norm": 0.3814146994358725, "learning_rate": 1.7571958711950088e-06, "loss": 0.3736, "step": 6159 }, { "epoch": 1.4662343071339323, "grad_norm": 0.37047847418626345, "learning_rate": 1.7557288993279981e-06, "loss": 0.3168, "step": 6160 }, { "epoch": 1.466472303206997, "grad_norm": 0.38122549662040817, "learning_rate": 1.754262409639932e-06, "loss": 0.2881, "step": 6161 }, { "epoch": 1.4667102992800618, "grad_norm": 0.3933736686295024, "learning_rate": 1.7527964023487676e-06, "loss": 0.3072, "step": 6162 }, { "epoch": 1.4669482953531268, "grad_norm": 0.4116114307272517, "learning_rate": 1.751330877672388e-06, "loss": 0.3761, "step": 6163 }, { "epoch": 1.4671862914261915, "grad_norm": 0.36687857623003073, "learning_rate": 1.7498658358286098e-06, "loss": 0.285, "step": 6164 }, { "epoch": 1.4674242874992562, "grad_norm": 0.35502119768687834, "learning_rate": 1.7484012770351732e-06, "loss": 0.327, "step": 6165 }, { "epoch": 1.467662283572321, "grad_norm": 0.38055825453365044, "learning_rate": 1.7469372015097469e-06, "loss": 0.4053, "step": 6166 }, { "epoch": 1.4679002796453857, "grad_norm": 0.37110944722737155, "learning_rate": 1.7454736094699298e-06, "loss": 0.2759, "step": 6167 }, { "epoch": 1.4681382757184507, "grad_norm": 0.4369470705323925, "learning_rate": 1.7440105011332476e-06, "loss": 0.2671, "step": 6168 }, { "epoch": 1.4683762717915154, "grad_norm": 0.5041602732328805, "learning_rate": 1.7425478767171539e-06, "loss": 0.3163, "step": 6169 }, { "epoch": 1.4686142678645802, "grad_norm": 0.38125323959910473, "learning_rate": 1.741085736439031e-06, "loss": 0.3758, "step": 6170 }, { "epoch": 1.4688522639376451, "grad_norm": 0.4598903890600633, "learning_rate": 1.7396240805161896e-06, "loss": 0.3144, "step": 6171 }, { "epoch": 1.4690902600107099, "grad_norm": 0.38337527005033567, "learning_rate": 1.7381629091658664e-06, "loss": 0.288, "step": 6172 }, { "epoch": 1.4693282560837746, "grad_norm": 0.37602121389981175, "learning_rate": 1.7367022226052299e-06, "loss": 0.3685, "step": 6173 }, { "epoch": 1.4695662521568393, "grad_norm": 0.3515901765862798, "learning_rate": 1.7352420210513732e-06, "loss": 0.3525, "step": 6174 }, { "epoch": 1.469804248229904, "grad_norm": 0.37679635447891247, "learning_rate": 1.7337823047213186e-06, "loss": 0.269, "step": 6175 }, { "epoch": 1.470042244302969, "grad_norm": 0.421007003539755, "learning_rate": 1.7323230738320162e-06, "loss": 0.3184, "step": 6176 }, { "epoch": 1.4702802403760338, "grad_norm": 0.3953495895644738, "learning_rate": 1.7308643286003412e-06, "loss": 0.4181, "step": 6177 }, { "epoch": 1.4705182364490985, "grad_norm": 0.3904183739455049, "learning_rate": 1.7294060692431035e-06, "loss": 0.3224, "step": 6178 }, { "epoch": 1.4707562325221635, "grad_norm": 0.3958574641622456, "learning_rate": 1.7279482959770345e-06, "loss": 0.281, "step": 6179 }, { "epoch": 1.4709942285952282, "grad_norm": 0.3657347135001612, "learning_rate": 1.7264910090187952e-06, "loss": 0.3256, "step": 6180 }, { "epoch": 1.471232224668293, "grad_norm": 0.36459178634963607, "learning_rate": 1.7250342085849747e-06, "loss": 0.3699, "step": 6181 }, { "epoch": 1.4714702207413577, "grad_norm": 0.35909114336445935, "learning_rate": 1.72357789489209e-06, "loss": 0.2993, "step": 6182 }, { "epoch": 1.4717082168144224, "grad_norm": 0.35376404383488347, "learning_rate": 1.7221220681565842e-06, "loss": 0.3323, "step": 6183 }, { "epoch": 1.4719462128874874, "grad_norm": 0.39465896136944995, "learning_rate": 1.7206667285948303e-06, "loss": 0.3739, "step": 6184 }, { "epoch": 1.4721842089605521, "grad_norm": 0.365373583943615, "learning_rate": 1.7192118764231276e-06, "loss": 0.2824, "step": 6185 }, { "epoch": 1.4724222050336169, "grad_norm": 0.3970412374830905, "learning_rate": 1.7177575118577022e-06, "loss": 0.2851, "step": 6186 }, { "epoch": 1.4726602011066818, "grad_norm": 0.38867388441314077, "learning_rate": 1.7163036351147094e-06, "loss": 0.368, "step": 6187 }, { "epoch": 1.4728981971797466, "grad_norm": 0.3726608425142561, "learning_rate": 1.7148502464102312e-06, "loss": 0.3686, "step": 6188 }, { "epoch": 1.4731361932528113, "grad_norm": 0.3895210104408985, "learning_rate": 1.7133973459602776e-06, "loss": 0.2775, "step": 6189 }, { "epoch": 1.473374189325876, "grad_norm": 0.4279705014335874, "learning_rate": 1.7119449339807825e-06, "loss": 0.3314, "step": 6190 }, { "epoch": 1.4736121853989408, "grad_norm": 0.376456866460005, "learning_rate": 1.710493010687615e-06, "loss": 0.3556, "step": 6191 }, { "epoch": 1.4738501814720057, "grad_norm": 0.3605199356919443, "learning_rate": 1.7090415762965646e-06, "loss": 0.3045, "step": 6192 }, { "epoch": 1.4740881775450705, "grad_norm": 0.44055911897196376, "learning_rate": 1.7075906310233503e-06, "loss": 0.292, "step": 6193 }, { "epoch": 1.4743261736181352, "grad_norm": 0.3901396740042231, "learning_rate": 1.7061401750836182e-06, "loss": 0.3442, "step": 6194 }, { "epoch": 1.4745641696912002, "grad_norm": 0.3987599004644487, "learning_rate": 1.7046902086929428e-06, "loss": 0.4148, "step": 6195 }, { "epoch": 1.474802165764265, "grad_norm": 0.37091763674215245, "learning_rate": 1.7032407320668243e-06, "loss": 0.2732, "step": 6196 }, { "epoch": 1.4750401618373297, "grad_norm": 0.347095443573286, "learning_rate": 1.7017917454206905e-06, "loss": 0.2876, "step": 6197 }, { "epoch": 1.4752781579103944, "grad_norm": 0.3759182793149618, "learning_rate": 1.7003432489698974e-06, "loss": 0.3734, "step": 6198 }, { "epoch": 1.4755161539834591, "grad_norm": 0.4063498607269111, "learning_rate": 1.698895242929725e-06, "loss": 0.3454, "step": 6199 }, { "epoch": 1.475754150056524, "grad_norm": 0.3813713594888871, "learning_rate": 1.6974477275153872e-06, "loss": 0.2854, "step": 6200 }, { "epoch": 1.4759921461295888, "grad_norm": 0.35289582689259275, "learning_rate": 1.696000702942018e-06, "loss": 0.3151, "step": 6201 }, { "epoch": 1.4762301422026536, "grad_norm": 0.38603206935555157, "learning_rate": 1.6945541694246809e-06, "loss": 0.3949, "step": 6202 }, { "epoch": 1.4764681382757185, "grad_norm": 0.35892990933497454, "learning_rate": 1.6931081271783679e-06, "loss": 0.2862, "step": 6203 }, { "epoch": 1.4767061343487833, "grad_norm": 0.3831559472250615, "learning_rate": 1.6916625764179934e-06, "loss": 0.2722, "step": 6204 }, { "epoch": 1.476944130421848, "grad_norm": 0.40027720342137013, "learning_rate": 1.6902175173584062e-06, "loss": 0.3442, "step": 6205 }, { "epoch": 1.4771821264949128, "grad_norm": 0.383928566968621, "learning_rate": 1.6887729502143762e-06, "loss": 0.3227, "step": 6206 }, { "epoch": 1.4774201225679775, "grad_norm": 0.4035104381552861, "learning_rate": 1.6873288752006013e-06, "loss": 0.2994, "step": 6207 }, { "epoch": 1.4776581186410425, "grad_norm": 0.37119371517468214, "learning_rate": 1.685885292531707e-06, "loss": 0.333, "step": 6208 }, { "epoch": 1.4778961147141072, "grad_norm": 0.3897446214278183, "learning_rate": 1.6844422024222462e-06, "loss": 0.3858, "step": 6209 }, { "epoch": 1.478134110787172, "grad_norm": 0.36090692256055146, "learning_rate": 1.6829996050866965e-06, "loss": 0.2898, "step": 6210 }, { "epoch": 1.478372106860237, "grad_norm": 0.3903554712089631, "learning_rate": 1.6815575007394641e-06, "loss": 0.2851, "step": 6211 }, { "epoch": 1.4786101029333016, "grad_norm": 0.38668090625918494, "learning_rate": 1.6801158895948816e-06, "loss": 0.3617, "step": 6212 }, { "epoch": 1.4788480990063664, "grad_norm": 0.3934627308345188, "learning_rate": 1.6786747718672076e-06, "loss": 0.3588, "step": 6213 }, { "epoch": 1.4790860950794311, "grad_norm": 0.43569448317897996, "learning_rate": 1.6772341477706284e-06, "loss": 0.2777, "step": 6214 }, { "epoch": 1.4793240911524959, "grad_norm": 0.4089467347728445, "learning_rate": 1.675794017519256e-06, "loss": 0.3008, "step": 6215 }, { "epoch": 1.4795620872255608, "grad_norm": 0.3882878487464565, "learning_rate": 1.6743543813271296e-06, "loss": 0.3329, "step": 6216 }, { "epoch": 1.4798000832986256, "grad_norm": 0.37303839591336113, "learning_rate": 1.6729152394082144e-06, "loss": 0.302, "step": 6217 }, { "epoch": 1.4800380793716903, "grad_norm": 0.38100751920843007, "learning_rate": 1.6714765919764015e-06, "loss": 0.2819, "step": 6218 }, { "epoch": 1.4802760754447553, "grad_norm": 0.3845807803382066, "learning_rate": 1.6700384392455122e-06, "loss": 0.3177, "step": 6219 }, { "epoch": 1.48051407151782, "grad_norm": 0.34937828068409094, "learning_rate": 1.6686007814292898e-06, "loss": 0.371, "step": 6220 }, { "epoch": 1.4807520675908847, "grad_norm": 0.3465525601796606, "learning_rate": 1.6671636187414065e-06, "loss": 0.2809, "step": 6221 }, { "epoch": 1.4809900636639495, "grad_norm": 0.37505512040887995, "learning_rate": 1.66572695139546e-06, "loss": 0.3043, "step": 6222 }, { "epoch": 1.4812280597370142, "grad_norm": 0.3849866579968192, "learning_rate": 1.664290779604974e-06, "loss": 0.3555, "step": 6223 }, { "epoch": 1.4814660558100792, "grad_norm": 0.38249173168240747, "learning_rate": 1.6628551035833995e-06, "loss": 0.3256, "step": 6224 }, { "epoch": 1.481704051883144, "grad_norm": 0.4133019140360971, "learning_rate": 1.6614199235441141e-06, "loss": 0.2854, "step": 6225 }, { "epoch": 1.4819420479562087, "grad_norm": 0.3804351372256457, "learning_rate": 1.6599852397004184e-06, "loss": 0.3361, "step": 6226 }, { "epoch": 1.4821800440292736, "grad_norm": 0.38603923989851835, "learning_rate": 1.6585510522655463e-06, "loss": 0.4, "step": 6227 }, { "epoch": 1.4824180401023384, "grad_norm": 0.3518414478466677, "learning_rate": 1.657117361452651e-06, "loss": 0.2999, "step": 6228 }, { "epoch": 1.482656036175403, "grad_norm": 0.4067824455417079, "learning_rate": 1.6556841674748148e-06, "loss": 0.2751, "step": 6229 }, { "epoch": 1.4828940322484678, "grad_norm": 0.3888377616545728, "learning_rate": 1.6542514705450453e-06, "loss": 0.3871, "step": 6230 }, { "epoch": 1.4831320283215326, "grad_norm": 0.37104379881621813, "learning_rate": 1.6528192708762775e-06, "loss": 0.3358, "step": 6231 }, { "epoch": 1.4833700243945975, "grad_norm": 0.37321076058841374, "learning_rate": 1.6513875686813696e-06, "loss": 0.2893, "step": 6232 }, { "epoch": 1.4836080204676623, "grad_norm": 0.40249229900762473, "learning_rate": 1.6499563641731115e-06, "loss": 0.3353, "step": 6233 }, { "epoch": 1.483846016540727, "grad_norm": 0.39795621117297714, "learning_rate": 1.6485256575642133e-06, "loss": 0.4186, "step": 6234 }, { "epoch": 1.484084012613792, "grad_norm": 0.3719720131348448, "learning_rate": 1.6470954490673141e-06, "loss": 0.3079, "step": 6235 }, { "epoch": 1.4843220086868567, "grad_norm": 0.4065939918654111, "learning_rate": 1.6456657388949782e-06, "loss": 0.2705, "step": 6236 }, { "epoch": 1.4845600047599214, "grad_norm": 0.38543792010700656, "learning_rate": 1.6442365272596955e-06, "loss": 0.3343, "step": 6237 }, { "epoch": 1.4847980008329862, "grad_norm": 0.364965468199743, "learning_rate": 1.6428078143738828e-06, "loss": 0.3702, "step": 6238 }, { "epoch": 1.485035996906051, "grad_norm": 0.39025088197310315, "learning_rate": 1.6413796004498816e-06, "loss": 0.2871, "step": 6239 }, { "epoch": 1.4852739929791159, "grad_norm": 0.38934407019438205, "learning_rate": 1.6399518856999597e-06, "loss": 0.3311, "step": 6240 }, { "epoch": 1.4855119890521806, "grad_norm": 0.4045158386339914, "learning_rate": 1.6385246703363117e-06, "loss": 0.3784, "step": 6241 }, { "epoch": 1.4857499851252454, "grad_norm": 0.36882815626153603, "learning_rate": 1.6370979545710564e-06, "loss": 0.3222, "step": 6242 }, { "epoch": 1.4859879811983103, "grad_norm": 0.38031964694752574, "learning_rate": 1.6356717386162392e-06, "loss": 0.297, "step": 6243 }, { "epoch": 1.486225977271375, "grad_norm": 0.3535105134819767, "learning_rate": 1.634246022683831e-06, "loss": 0.3139, "step": 6244 }, { "epoch": 1.4864639733444398, "grad_norm": 0.45655045384221404, "learning_rate": 1.6328208069857288e-06, "loss": 0.4003, "step": 6245 }, { "epoch": 1.4867019694175045, "grad_norm": 0.358001681330835, "learning_rate": 1.631396091733753e-06, "loss": 0.2827, "step": 6246 }, { "epoch": 1.4869399654905693, "grad_norm": 0.39972769951910697, "learning_rate": 1.6299718771396544e-06, "loss": 0.32, "step": 6247 }, { "epoch": 1.4871779615636342, "grad_norm": 0.37405419443502563, "learning_rate": 1.6285481634151057e-06, "loss": 0.3557, "step": 6248 }, { "epoch": 1.487415957636699, "grad_norm": 0.36391998904722717, "learning_rate": 1.6271249507717058e-06, "loss": 0.3482, "step": 6249 }, { "epoch": 1.4876539537097637, "grad_norm": 0.36597440907805356, "learning_rate": 1.6257022394209787e-06, "loss": 0.2612, "step": 6250 }, { "epoch": 1.4878919497828287, "grad_norm": 0.37262756229567356, "learning_rate": 1.6242800295743755e-06, "loss": 0.3512, "step": 6251 }, { "epoch": 1.4881299458558934, "grad_norm": 0.393961403942417, "learning_rate": 1.6228583214432708e-06, "loss": 0.3568, "step": 6252 }, { "epoch": 1.4883679419289582, "grad_norm": 0.3800387502035991, "learning_rate": 1.6214371152389646e-06, "loss": 0.2948, "step": 6253 }, { "epoch": 1.488605938002023, "grad_norm": 0.4078216630673394, "learning_rate": 1.6200164111726857e-06, "loss": 0.2829, "step": 6254 }, { "epoch": 1.4888439340750876, "grad_norm": 0.4060042960242336, "learning_rate": 1.6185962094555857e-06, "loss": 0.335, "step": 6255 }, { "epoch": 1.4890819301481526, "grad_norm": 0.37150604082715344, "learning_rate": 1.6171765102987401e-06, "loss": 0.3452, "step": 6256 }, { "epoch": 1.4893199262212173, "grad_norm": 0.4109956321194422, "learning_rate": 1.6157573139131527e-06, "loss": 0.2697, "step": 6257 }, { "epoch": 1.489557922294282, "grad_norm": 0.3579171265280181, "learning_rate": 1.6143386205097506e-06, "loss": 0.3006, "step": 6258 }, { "epoch": 1.489795918367347, "grad_norm": 0.4053809117913336, "learning_rate": 1.6129204302993845e-06, "loss": 0.3799, "step": 6259 }, { "epoch": 1.4900339144404118, "grad_norm": 0.3824402887929443, "learning_rate": 1.611502743492837e-06, "loss": 0.3014, "step": 6260 }, { "epoch": 1.4902719105134765, "grad_norm": 0.37518205183540226, "learning_rate": 1.6100855603008087e-06, "loss": 0.3104, "step": 6261 }, { "epoch": 1.4905099065865413, "grad_norm": 0.4420001474069447, "learning_rate": 1.6086688809339291e-06, "loss": 0.3232, "step": 6262 }, { "epoch": 1.490747902659606, "grad_norm": 0.3790341363906722, "learning_rate": 1.6072527056027509e-06, "loss": 0.4002, "step": 6263 }, { "epoch": 1.490985898732671, "grad_norm": 0.3871942676533004, "learning_rate": 1.6058370345177531e-06, "loss": 0.2823, "step": 6264 }, { "epoch": 1.4912238948057357, "grad_norm": 0.39208286818203664, "learning_rate": 1.6044218678893398e-06, "loss": 0.3084, "step": 6265 }, { "epoch": 1.4914618908788004, "grad_norm": 0.4084685412861594, "learning_rate": 1.6030072059278396e-06, "loss": 0.382, "step": 6266 }, { "epoch": 1.4916998869518654, "grad_norm": 0.399630334749065, "learning_rate": 1.6015930488435055e-06, "loss": 0.3122, "step": 6267 }, { "epoch": 1.4919378830249301, "grad_norm": 0.3744742890214211, "learning_rate": 1.6001793968465173e-06, "loss": 0.2994, "step": 6268 }, { "epoch": 1.4921758790979949, "grad_norm": 0.4074445031836467, "learning_rate": 1.5987662501469787e-06, "loss": 0.3392, "step": 6269 }, { "epoch": 1.4924138751710596, "grad_norm": 0.3701010877824389, "learning_rate": 1.5973536089549174e-06, "loss": 0.3836, "step": 6270 }, { "epoch": 1.4926518712441244, "grad_norm": 0.34247020644927706, "learning_rate": 1.595941473480287e-06, "loss": 0.2838, "step": 6271 }, { "epoch": 1.4928898673171893, "grad_norm": 0.4149376790144497, "learning_rate": 1.594529843932966e-06, "loss": 0.2893, "step": 6272 }, { "epoch": 1.493127863390254, "grad_norm": 0.37778290292253097, "learning_rate": 1.593118720522756e-06, "loss": 0.3544, "step": 6273 }, { "epoch": 1.4933658594633188, "grad_norm": 0.38515242574627634, "learning_rate": 1.591708103459388e-06, "loss": 0.3389, "step": 6274 }, { "epoch": 1.4936038555363838, "grad_norm": 0.392490049369985, "learning_rate": 1.590297992952513e-06, "loss": 0.2979, "step": 6275 }, { "epoch": 1.4938418516094485, "grad_norm": 0.4109028404933468, "learning_rate": 1.588888389211708e-06, "loss": 0.3001, "step": 6276 }, { "epoch": 1.4940798476825132, "grad_norm": 0.37566084684050377, "learning_rate": 1.587479292446475e-06, "loss": 0.3806, "step": 6277 }, { "epoch": 1.494317843755578, "grad_norm": 0.3770457435748563, "learning_rate": 1.5860707028662415e-06, "loss": 0.2925, "step": 6278 }, { "epoch": 1.4945558398286427, "grad_norm": 0.40003813495891827, "learning_rate": 1.5846626206803572e-06, "loss": 0.317, "step": 6279 }, { "epoch": 1.4947938359017077, "grad_norm": 0.38923435537963624, "learning_rate": 1.5832550460980978e-06, "loss": 0.3441, "step": 6280 }, { "epoch": 1.4950318319747724, "grad_norm": 0.3630845762326767, "learning_rate": 1.5818479793286663e-06, "loss": 0.3697, "step": 6281 }, { "epoch": 1.4952698280478371, "grad_norm": 0.38811524683296966, "learning_rate": 1.5804414205811864e-06, "loss": 0.2647, "step": 6282 }, { "epoch": 1.495507824120902, "grad_norm": 0.36922187210672575, "learning_rate": 1.5790353700647066e-06, "loss": 0.3284, "step": 6283 }, { "epoch": 1.4957458201939668, "grad_norm": 0.400379309159919, "learning_rate": 1.5776298279882018e-06, "loss": 0.3806, "step": 6284 }, { "epoch": 1.4959838162670316, "grad_norm": 0.38009298047601064, "learning_rate": 1.5762247945605696e-06, "loss": 0.2978, "step": 6285 }, { "epoch": 1.4962218123400963, "grad_norm": 0.3884850826229188, "learning_rate": 1.5748202699906335e-06, "loss": 0.2626, "step": 6286 }, { "epoch": 1.496459808413161, "grad_norm": 0.43122694564171327, "learning_rate": 1.5734162544871379e-06, "loss": 0.349, "step": 6287 }, { "epoch": 1.496697804486226, "grad_norm": 0.37117494480049085, "learning_rate": 1.5720127482587582e-06, "loss": 0.3717, "step": 6288 }, { "epoch": 1.4969358005592908, "grad_norm": 0.6961660701599369, "learning_rate": 1.5706097515140888e-06, "loss": 0.2725, "step": 6289 }, { "epoch": 1.4971737966323555, "grad_norm": 0.39992955711015266, "learning_rate": 1.5692072644616497e-06, "loss": 0.3149, "step": 6290 }, { "epoch": 1.4974117927054205, "grad_norm": 0.3844509561461545, "learning_rate": 1.5678052873098843e-06, "loss": 0.3416, "step": 6291 }, { "epoch": 1.4976497887784852, "grad_norm": 0.4114588280934326, "learning_rate": 1.5664038202671616e-06, "loss": 0.2867, "step": 6292 }, { "epoch": 1.49788778485155, "grad_norm": 0.3838890205158945, "learning_rate": 1.565002863541774e-06, "loss": 0.2827, "step": 6293 }, { "epoch": 1.4981257809246147, "grad_norm": 0.4048312301464947, "learning_rate": 1.5636024173419389e-06, "loss": 0.3504, "step": 6294 }, { "epoch": 1.4983637769976794, "grad_norm": 0.3751861323780018, "learning_rate": 1.562202481875797e-06, "loss": 0.362, "step": 6295 }, { "epoch": 1.4986017730707444, "grad_norm": 0.37261348585719195, "learning_rate": 1.5608030573514131e-06, "loss": 0.2872, "step": 6296 }, { "epoch": 1.4988397691438091, "grad_norm": 0.6348899760877189, "learning_rate": 1.5594041439767772e-06, "loss": 0.2771, "step": 6297 }, { "epoch": 1.4990777652168739, "grad_norm": 0.38641372515323824, "learning_rate": 1.5580057419598011e-06, "loss": 0.352, "step": 6298 }, { "epoch": 1.4993157612899388, "grad_norm": 0.39465392512377706, "learning_rate": 1.5566078515083227e-06, "loss": 0.3409, "step": 6299 }, { "epoch": 1.4995537573630036, "grad_norm": 0.3627033946670975, "learning_rate": 1.5552104728301031e-06, "loss": 0.2711, "step": 6300 }, { "epoch": 1.4997917534360683, "grad_norm": 0.35357386216875875, "learning_rate": 1.5538136061328256e-06, "loss": 0.3098, "step": 6301 }, { "epoch": 1.500029749509133, "grad_norm": 0.401206280542586, "learning_rate": 1.552417251624102e-06, "loss": 0.3832, "step": 6302 }, { "epoch": 1.5002677455821978, "grad_norm": 0.35052024645889873, "learning_rate": 1.5510214095114644e-06, "loss": 0.2957, "step": 6303 }, { "epoch": 1.5005057416552627, "grad_norm": 0.4270002745679526, "learning_rate": 1.5496260800023688e-06, "loss": 0.3093, "step": 6304 }, { "epoch": 1.5007437377283275, "grad_norm": 0.39472985597101196, "learning_rate": 1.5482312633041956e-06, "loss": 0.3537, "step": 6305 }, { "epoch": 1.5009817338013924, "grad_norm": 0.3586093492162438, "learning_rate": 1.54683695962425e-06, "loss": 0.3202, "step": 6306 }, { "epoch": 1.5012197298744572, "grad_norm": 0.4005505431292118, "learning_rate": 1.5454431691697575e-06, "loss": 0.2976, "step": 6307 }, { "epoch": 1.501457725947522, "grad_norm": 0.35924720446659664, "learning_rate": 1.5440498921478726e-06, "loss": 0.2957, "step": 6308 }, { "epoch": 1.5016957220205867, "grad_norm": 0.396240213306404, "learning_rate": 1.5426571287656705e-06, "loss": 0.3937, "step": 6309 }, { "epoch": 1.5019337180936514, "grad_norm": 0.44063755653514214, "learning_rate": 1.5412648792301494e-06, "loss": 0.3058, "step": 6310 }, { "epoch": 1.5021717141667161, "grad_norm": 0.39238660452322954, "learning_rate": 1.5398731437482322e-06, "loss": 0.283, "step": 6311 }, { "epoch": 1.502409710239781, "grad_norm": 0.3882700159407384, "learning_rate": 1.538481922526765e-06, "loss": 0.3765, "step": 6312 }, { "epoch": 1.5026477063128458, "grad_norm": 0.39130343929171285, "learning_rate": 1.5370912157725183e-06, "loss": 0.3601, "step": 6313 }, { "epoch": 1.5028857023859108, "grad_norm": 0.3727457835348889, "learning_rate": 1.5357010236921853e-06, "loss": 0.308, "step": 6314 }, { "epoch": 1.5031236984589755, "grad_norm": 0.38525499619958226, "learning_rate": 1.5343113464923808e-06, "loss": 0.3076, "step": 6315 }, { "epoch": 1.5033616945320403, "grad_norm": 0.3731034146144273, "learning_rate": 1.5329221843796492e-06, "loss": 0.3616, "step": 6316 }, { "epoch": 1.503599690605105, "grad_norm": 0.38290221481731973, "learning_rate": 1.5315335375604523e-06, "loss": 0.2957, "step": 6317 }, { "epoch": 1.5038376866781697, "grad_norm": 0.3851034270150811, "learning_rate": 1.5301454062411769e-06, "loss": 0.3072, "step": 6318 }, { "epoch": 1.5040756827512345, "grad_norm": 0.4023568178915069, "learning_rate": 1.5287577906281347e-06, "loss": 0.3343, "step": 6319 }, { "epoch": 1.5043136788242994, "grad_norm": 0.47865734964101286, "learning_rate": 1.5273706909275593e-06, "loss": 0.3684, "step": 6320 }, { "epoch": 1.5045516748973642, "grad_norm": 0.36513450339658127, "learning_rate": 1.5259841073456078e-06, "loss": 0.3044, "step": 6321 }, { "epoch": 1.5047896709704291, "grad_norm": 0.37595196535771713, "learning_rate": 1.5245980400883609e-06, "loss": 0.3099, "step": 6322 }, { "epoch": 1.5050276670434939, "grad_norm": 0.386737994428304, "learning_rate": 1.5232124893618228e-06, "loss": 0.3566, "step": 6323 }, { "epoch": 1.5052656631165586, "grad_norm": 0.341571020509951, "learning_rate": 1.5218274553719198e-06, "loss": 0.3182, "step": 6324 }, { "epoch": 1.5055036591896234, "grad_norm": 0.3790719754813889, "learning_rate": 1.5204429383245034e-06, "loss": 0.2723, "step": 6325 }, { "epoch": 1.505741655262688, "grad_norm": 0.3997178109826318, "learning_rate": 1.5190589384253458e-06, "loss": 0.2983, "step": 6326 }, { "epoch": 1.5059796513357528, "grad_norm": 0.36641685229169446, "learning_rate": 1.5176754558801448e-06, "loss": 0.4068, "step": 6327 }, { "epoch": 1.5062176474088178, "grad_norm": 0.34991820225784404, "learning_rate": 1.516292490894518e-06, "loss": 0.2743, "step": 6328 }, { "epoch": 1.5064556434818825, "grad_norm": 0.35331131328827264, "learning_rate": 1.514910043674011e-06, "loss": 0.2919, "step": 6329 }, { "epoch": 1.5066936395549475, "grad_norm": 0.37730212152272385, "learning_rate": 1.5135281144240888e-06, "loss": 0.3632, "step": 6330 }, { "epoch": 1.5069316356280122, "grad_norm": 0.3634842374888263, "learning_rate": 1.5121467033501403e-06, "loss": 0.3557, "step": 6331 }, { "epoch": 1.507169631701077, "grad_norm": 0.37734949621666297, "learning_rate": 1.5107658106574764e-06, "loss": 0.2589, "step": 6332 }, { "epoch": 1.5074076277741417, "grad_norm": 0.3625755897347857, "learning_rate": 1.5093854365513327e-06, "loss": 0.3306, "step": 6333 }, { "epoch": 1.5076456238472065, "grad_norm": 0.3709257192300279, "learning_rate": 1.5080055812368654e-06, "loss": 0.3503, "step": 6334 }, { "epoch": 1.5078836199202712, "grad_norm": 0.3697093486108935, "learning_rate": 1.5066262449191576e-06, "loss": 0.2987, "step": 6335 }, { "epoch": 1.5081216159933362, "grad_norm": 0.3975396214243995, "learning_rate": 1.5052474278032125e-06, "loss": 0.2942, "step": 6336 }, { "epoch": 1.508359612066401, "grad_norm": 0.36248809899098255, "learning_rate": 1.5038691300939552e-06, "loss": 0.3452, "step": 6337 }, { "epoch": 1.5085976081394659, "grad_norm": 0.37957957692819194, "learning_rate": 1.5024913519962353e-06, "loss": 0.38, "step": 6338 }, { "epoch": 1.5088356042125306, "grad_norm": 0.36619032508426047, "learning_rate": 1.5011140937148243e-06, "loss": 0.2854, "step": 6339 }, { "epoch": 1.5090736002855953, "grad_norm": 0.36939842494901753, "learning_rate": 1.4997373554544176e-06, "loss": 0.2817, "step": 6340 }, { "epoch": 1.50931159635866, "grad_norm": 0.3996275043396052, "learning_rate": 1.4983611374196323e-06, "loss": 0.3731, "step": 6341 }, { "epoch": 1.5095495924317248, "grad_norm": 0.37179348365865195, "learning_rate": 1.4969854398150069e-06, "loss": 0.3166, "step": 6342 }, { "epoch": 1.5097875885047896, "grad_norm": 0.4072043189526779, "learning_rate": 1.4956102628450065e-06, "loss": 0.2694, "step": 6343 }, { "epoch": 1.5100255845778545, "grad_norm": 0.41059367411920217, "learning_rate": 1.4942356067140162e-06, "loss": 0.327, "step": 6344 }, { "epoch": 1.5102635806509193, "grad_norm": 0.3933303176194039, "learning_rate": 1.4928614716263428e-06, "loss": 0.3873, "step": 6345 }, { "epoch": 1.5105015767239842, "grad_norm": 0.38198672911717163, "learning_rate": 1.4914878577862168e-06, "loss": 0.2796, "step": 6346 }, { "epoch": 1.510739572797049, "grad_norm": 0.5557986276562655, "learning_rate": 1.490114765397792e-06, "loss": 0.3061, "step": 6347 }, { "epoch": 1.5109775688701137, "grad_norm": 0.40633469354717716, "learning_rate": 1.4887421946651436e-06, "loss": 0.3823, "step": 6348 }, { "epoch": 1.5112155649431784, "grad_norm": 0.40308923354303, "learning_rate": 1.487370145792269e-06, "loss": 0.3266, "step": 6349 }, { "epoch": 1.5114535610162432, "grad_norm": 0.38544791649854926, "learning_rate": 1.4859986189830894e-06, "loss": 0.2822, "step": 6350 }, { "epoch": 1.511691557089308, "grad_norm": 0.3987203797284687, "learning_rate": 1.4846276144414468e-06, "loss": 0.3075, "step": 6351 }, { "epoch": 1.5119295531623729, "grad_norm": 0.6071459510037844, "learning_rate": 1.483257132371107e-06, "loss": 0.4008, "step": 6352 }, { "epoch": 1.5121675492354376, "grad_norm": 0.3813619772349361, "learning_rate": 1.4818871729757572e-06, "loss": 0.277, "step": 6353 }, { "epoch": 1.5124055453085026, "grad_norm": 0.39614277089555516, "learning_rate": 1.4805177364590078e-06, "loss": 0.3156, "step": 6354 }, { "epoch": 1.5126435413815673, "grad_norm": 0.360231458449255, "learning_rate": 1.4791488230243907e-06, "loss": 0.3506, "step": 6355 }, { "epoch": 1.512881537454632, "grad_norm": 0.3694141952246499, "learning_rate": 1.4777804328753582e-06, "loss": 0.3909, "step": 6356 }, { "epoch": 1.5131195335276968, "grad_norm": 0.3556081977766239, "learning_rate": 1.47641256621529e-06, "loss": 0.2827, "step": 6357 }, { "epoch": 1.5133575296007615, "grad_norm": 0.38860260107924, "learning_rate": 1.4750452232474843e-06, "loss": 0.3373, "step": 6358 }, { "epoch": 1.5135955256738263, "grad_norm": 0.38185389209509235, "learning_rate": 1.4736784041751617e-06, "loss": 0.4104, "step": 6359 }, { "epoch": 1.5138335217468912, "grad_norm": 0.35687668113180804, "learning_rate": 1.4723121092014654e-06, "loss": 0.2966, "step": 6360 }, { "epoch": 1.514071517819956, "grad_norm": 0.3771926399784858, "learning_rate": 1.4709463385294586e-06, "loss": 0.294, "step": 6361 }, { "epoch": 1.514309513893021, "grad_norm": 0.39400534017538535, "learning_rate": 1.4695810923621323e-06, "loss": 0.3273, "step": 6362 }, { "epoch": 1.5145475099660857, "grad_norm": 0.36830931162825825, "learning_rate": 1.4682163709023934e-06, "loss": 0.389, "step": 6363 }, { "epoch": 1.5147855060391504, "grad_norm": 0.3829512181500474, "learning_rate": 1.4668521743530745e-06, "loss": 0.3118, "step": 6364 }, { "epoch": 1.5150235021122151, "grad_norm": 0.38703933279454056, "learning_rate": 1.465488502916928e-06, "loss": 0.3094, "step": 6365 }, { "epoch": 1.5152614981852799, "grad_norm": 0.3730048469300558, "learning_rate": 1.4641253567966302e-06, "loss": 0.38, "step": 6366 }, { "epoch": 1.5154994942583446, "grad_norm": 0.3774200036872977, "learning_rate": 1.462762736194777e-06, "loss": 0.3355, "step": 6367 }, { "epoch": 1.5157374903314096, "grad_norm": 0.4263601312122989, "learning_rate": 1.4614006413138882e-06, "loss": 0.2627, "step": 6368 }, { "epoch": 1.5159754864044743, "grad_norm": 0.45738040178098543, "learning_rate": 1.460039072356405e-06, "loss": 0.3157, "step": 6369 }, { "epoch": 1.516213482477539, "grad_norm": 0.3827099888179909, "learning_rate": 1.4586780295246888e-06, "loss": 0.3668, "step": 6370 }, { "epoch": 1.516451478550604, "grad_norm": 0.3839304092663823, "learning_rate": 1.457317513021027e-06, "loss": 0.2815, "step": 6371 }, { "epoch": 1.5166894746236688, "grad_norm": 0.3874574711568108, "learning_rate": 1.455957523047624e-06, "loss": 0.292, "step": 6372 }, { "epoch": 1.5169274706967335, "grad_norm": 0.365985707188615, "learning_rate": 1.454598059806609e-06, "loss": 0.368, "step": 6373 }, { "epoch": 1.5171654667697982, "grad_norm": 0.3664109124443364, "learning_rate": 1.4532391235000316e-06, "loss": 0.339, "step": 6374 }, { "epoch": 1.517403462842863, "grad_norm": 0.3873829121823865, "learning_rate": 1.4518807143298625e-06, "loss": 0.2742, "step": 6375 }, { "epoch": 1.5176414589159277, "grad_norm": 0.36086287426960706, "learning_rate": 1.4505228324979954e-06, "loss": 0.3181, "step": 6376 }, { "epoch": 1.5178794549889927, "grad_norm": 0.3800615640914873, "learning_rate": 1.449165478206246e-06, "loss": 0.3744, "step": 6377 }, { "epoch": 1.5181174510620574, "grad_norm": 0.34268205116914047, "learning_rate": 1.447808651656349e-06, "loss": 0.3002, "step": 6378 }, { "epoch": 1.5183554471351224, "grad_norm": 0.4034392600733182, "learning_rate": 1.4464523530499636e-06, "loss": 0.2793, "step": 6379 }, { "epoch": 1.5185934432081871, "grad_norm": 0.3778878775280351, "learning_rate": 1.4450965825886693e-06, "loss": 0.3479, "step": 6380 }, { "epoch": 1.5188314392812519, "grad_norm": 0.3622997863722037, "learning_rate": 1.4437413404739669e-06, "loss": 0.3375, "step": 6381 }, { "epoch": 1.5190694353543166, "grad_norm": 0.3806285094389011, "learning_rate": 1.442386626907279e-06, "loss": 0.2532, "step": 6382 }, { "epoch": 1.5193074314273813, "grad_norm": 0.3702090170524379, "learning_rate": 1.4410324420899475e-06, "loss": 0.3195, "step": 6383 }, { "epoch": 1.519545427500446, "grad_norm": 0.3715020539789907, "learning_rate": 1.4396787862232413e-06, "loss": 0.3655, "step": 6384 }, { "epoch": 1.519783423573511, "grad_norm": 0.3683352719161142, "learning_rate": 1.438325659508346e-06, "loss": 0.2953, "step": 6385 }, { "epoch": 1.5200214196465758, "grad_norm": 0.3796246049843194, "learning_rate": 1.436973062146369e-06, "loss": 0.2857, "step": 6386 }, { "epoch": 1.5202594157196407, "grad_norm": 0.37688288858107755, "learning_rate": 1.4356209943383386e-06, "loss": 0.3469, "step": 6387 }, { "epoch": 1.5204974117927055, "grad_norm": 0.37028336062287065, "learning_rate": 1.4342694562852084e-06, "loss": 0.3181, "step": 6388 }, { "epoch": 1.5207354078657702, "grad_norm": 0.39109691442662825, "learning_rate": 1.432918448187849e-06, "loss": 0.2942, "step": 6389 }, { "epoch": 1.520973403938835, "grad_norm": 0.44854038551840963, "learning_rate": 1.4315679702470537e-06, "loss": 0.3434, "step": 6390 }, { "epoch": 1.5212114000118997, "grad_norm": 0.41884702455758605, "learning_rate": 1.430218022663536e-06, "loss": 0.3763, "step": 6391 }, { "epoch": 1.5214493960849644, "grad_norm": 0.3594762787773168, "learning_rate": 1.428868605637933e-06, "loss": 0.325, "step": 6392 }, { "epoch": 1.5216873921580294, "grad_norm": 0.4335927443090939, "learning_rate": 1.4275197193708007e-06, "loss": 0.2827, "step": 6393 }, { "epoch": 1.5219253882310941, "grad_norm": 0.39709453363546243, "learning_rate": 1.4261713640626168e-06, "loss": 0.3365, "step": 6394 }, { "epoch": 1.522163384304159, "grad_norm": 0.3747147101012874, "learning_rate": 1.4248235399137805e-06, "loss": 0.3943, "step": 6395 }, { "epoch": 1.5224013803772238, "grad_norm": 0.48060494905748885, "learning_rate": 1.4234762471246116e-06, "loss": 0.3053, "step": 6396 }, { "epoch": 1.5226393764502886, "grad_norm": 0.3908845745235385, "learning_rate": 1.4221294858953499e-06, "loss": 0.3409, "step": 6397 }, { "epoch": 1.5228773725233533, "grad_norm": 0.3658107584700326, "learning_rate": 1.4207832564261603e-06, "loss": 0.3652, "step": 6398 }, { "epoch": 1.523115368596418, "grad_norm": 0.3781266672514177, "learning_rate": 1.419437558917125e-06, "loss": 0.334, "step": 6399 }, { "epoch": 1.5233533646694828, "grad_norm": 0.3934768910170049, "learning_rate": 1.4180923935682467e-06, "loss": 0.2721, "step": 6400 }, { "epoch": 1.5235913607425478, "grad_norm": 0.42820638390694693, "learning_rate": 1.4167477605794505e-06, "loss": 0.2985, "step": 6401 }, { "epoch": 1.5238293568156125, "grad_norm": 0.40845089003089985, "learning_rate": 1.4154036601505834e-06, "loss": 0.3593, "step": 6402 }, { "epoch": 1.5240673528886775, "grad_norm": 0.39510953166289925, "learning_rate": 1.4140600924814101e-06, "loss": 0.3068, "step": 6403 }, { "epoch": 1.5243053489617422, "grad_norm": 0.39306339830594605, "learning_rate": 1.4127170577716193e-06, "loss": 0.2853, "step": 6404 }, { "epoch": 1.524543345034807, "grad_norm": 0.3633843389560684, "learning_rate": 1.4113745562208191e-06, "loss": 0.368, "step": 6405 }, { "epoch": 1.5247813411078717, "grad_norm": 0.3681166528899588, "learning_rate": 1.4100325880285381e-06, "loss": 0.3689, "step": 6406 }, { "epoch": 1.5250193371809364, "grad_norm": 0.3597587077669697, "learning_rate": 1.4086911533942254e-06, "loss": 0.2705, "step": 6407 }, { "epoch": 1.5252573332540011, "grad_norm": 0.3678376723681916, "learning_rate": 1.4073502525172528e-06, "loss": 0.302, "step": 6408 }, { "epoch": 1.525495329327066, "grad_norm": 0.3967644755270021, "learning_rate": 1.4060098855969102e-06, "loss": 0.3791, "step": 6409 }, { "epoch": 1.5257333254001308, "grad_norm": 0.3765325293672744, "learning_rate": 1.4046700528324082e-06, "loss": 0.2981, "step": 6410 }, { "epoch": 1.5259713214731958, "grad_norm": 0.36491847328820715, "learning_rate": 1.403330754422882e-06, "loss": 0.2766, "step": 6411 }, { "epoch": 1.5262093175462605, "grad_norm": 0.38299479011732535, "learning_rate": 1.401991990567383e-06, "loss": 0.3489, "step": 6412 }, { "epoch": 1.5264473136193253, "grad_norm": 0.38079523523420805, "learning_rate": 1.4006537614648846e-06, "loss": 0.3457, "step": 6413 }, { "epoch": 1.52668530969239, "grad_norm": 0.3893545607621366, "learning_rate": 1.3993160673142791e-06, "loss": 0.2929, "step": 6414 }, { "epoch": 1.5269233057654548, "grad_norm": 0.3650551166262176, "learning_rate": 1.3979789083143847e-06, "loss": 0.3031, "step": 6415 }, { "epoch": 1.5271613018385195, "grad_norm": 0.370501273331385, "learning_rate": 1.3966422846639338e-06, "loss": 0.3539, "step": 6416 }, { "epoch": 1.5273992979115845, "grad_norm": 0.37724908299456045, "learning_rate": 1.3953061965615822e-06, "loss": 0.3347, "step": 6417 }, { "epoch": 1.5276372939846492, "grad_norm": 0.4060598296506883, "learning_rate": 1.3939706442059054e-06, "loss": 0.253, "step": 6418 }, { "epoch": 1.5278752900577142, "grad_norm": 0.435691084317172, "learning_rate": 1.3926356277954001e-06, "loss": 0.3471, "step": 6419 }, { "epoch": 1.528113286130779, "grad_norm": 0.38356363994640225, "learning_rate": 1.391301147528482e-06, "loss": 0.3906, "step": 6420 }, { "epoch": 1.5283512822038436, "grad_norm": 0.3809047541613839, "learning_rate": 1.389967203603489e-06, "loss": 0.3001, "step": 6421 }, { "epoch": 1.5285892782769084, "grad_norm": 0.40431301095479877, "learning_rate": 1.388633796218677e-06, "loss": 0.2865, "step": 6422 }, { "epoch": 1.5288272743499731, "grad_norm": 0.3637907972443383, "learning_rate": 1.3873009255722236e-06, "loss": 0.3513, "step": 6423 }, { "epoch": 1.5290652704230379, "grad_norm": 0.36132364029306596, "learning_rate": 1.3859685918622269e-06, "loss": 0.3441, "step": 6424 }, { "epoch": 1.5293032664961028, "grad_norm": 0.3913993625800555, "learning_rate": 1.3846367952867025e-06, "loss": 0.2711, "step": 6425 }, { "epoch": 1.5295412625691676, "grad_norm": 0.3707148982517722, "learning_rate": 1.3833055360435916e-06, "loss": 0.308, "step": 6426 }, { "epoch": 1.5297792586422325, "grad_norm": 0.3729561418753125, "learning_rate": 1.3819748143307509e-06, "loss": 0.3893, "step": 6427 }, { "epoch": 1.5300172547152973, "grad_norm": 0.4422339804101922, "learning_rate": 1.3806446303459586e-06, "loss": 0.2895, "step": 6428 }, { "epoch": 1.530255250788362, "grad_norm": 0.3578722524768168, "learning_rate": 1.3793149842869125e-06, "loss": 0.3015, "step": 6429 }, { "epoch": 1.5304932468614267, "grad_norm": 0.39833175086792916, "learning_rate": 1.377985876351231e-06, "loss": 0.3476, "step": 6430 }, { "epoch": 1.5307312429344915, "grad_norm": 0.3670466020585641, "learning_rate": 1.376657306736453e-06, "loss": 0.3009, "step": 6431 }, { "epoch": 1.5309692390075562, "grad_norm": 0.3668993481646055, "learning_rate": 1.375329275640036e-06, "loss": 0.2879, "step": 6432 }, { "epoch": 1.5312072350806212, "grad_norm": 0.3760947174886567, "learning_rate": 1.3740017832593588e-06, "loss": 0.3278, "step": 6433 }, { "epoch": 1.531445231153686, "grad_norm": 0.42040674402359185, "learning_rate": 1.3726748297917196e-06, "loss": 0.4024, "step": 6434 }, { "epoch": 1.5316832272267509, "grad_norm": 0.35338872181053815, "learning_rate": 1.3713484154343366e-06, "loss": 0.3243, "step": 6435 }, { "epoch": 1.5319212232998156, "grad_norm": 0.36677960202123283, "learning_rate": 1.370022540384347e-06, "loss": 0.2743, "step": 6436 }, { "epoch": 1.5321592193728804, "grad_norm": 0.39245535596015746, "learning_rate": 1.368697204838808e-06, "loss": 0.3666, "step": 6437 }, { "epoch": 1.532397215445945, "grad_norm": 0.4062599071898869, "learning_rate": 1.3673724089947005e-06, "loss": 0.3457, "step": 6438 }, { "epoch": 1.5326352115190098, "grad_norm": 0.406451461182181, "learning_rate": 1.366048153048919e-06, "loss": 0.2789, "step": 6439 }, { "epoch": 1.5328732075920746, "grad_norm": 0.41876146445384105, "learning_rate": 1.364724437198282e-06, "loss": 0.2907, "step": 6440 }, { "epoch": 1.5331112036651395, "grad_norm": 0.3781185900759409, "learning_rate": 1.3634012616395249e-06, "loss": 0.3733, "step": 6441 }, { "epoch": 1.5333491997382043, "grad_norm": 0.355167285944409, "learning_rate": 1.3620786265693065e-06, "loss": 0.2928, "step": 6442 }, { "epoch": 1.5335871958112692, "grad_norm": 0.37322147215613916, "learning_rate": 1.360756532184202e-06, "loss": 0.2858, "step": 6443 }, { "epoch": 1.533825191884334, "grad_norm": 0.3630088334381094, "learning_rate": 1.3594349786807075e-06, "loss": 0.3354, "step": 6444 }, { "epoch": 1.5340631879573987, "grad_norm": 0.37666334694230696, "learning_rate": 1.3581139662552384e-06, "loss": 0.3768, "step": 6445 }, { "epoch": 1.5343011840304634, "grad_norm": 0.3646570924309373, "learning_rate": 1.3567934951041295e-06, "loss": 0.3052, "step": 6446 }, { "epoch": 1.5345391801035282, "grad_norm": 0.40326392321455273, "learning_rate": 1.3554735654236362e-06, "loss": 0.3308, "step": 6447 }, { "epoch": 1.534777176176593, "grad_norm": 0.3811017852408969, "learning_rate": 1.354154177409932e-06, "loss": 0.3621, "step": 6448 }, { "epoch": 1.5350151722496579, "grad_norm": 0.3847249344728611, "learning_rate": 1.3528353312591113e-06, "loss": 0.3481, "step": 6449 }, { "epoch": 1.5352531683227226, "grad_norm": 0.40310178526788526, "learning_rate": 1.3515170271671869e-06, "loss": 0.2535, "step": 6450 }, { "epoch": 1.5354911643957876, "grad_norm": 0.4142458710891034, "learning_rate": 1.350199265330091e-06, "loss": 0.3009, "step": 6451 }, { "epoch": 1.5357291604688523, "grad_norm": 0.41762456749772203, "learning_rate": 1.3488820459436746e-06, "loss": 0.3848, "step": 6452 }, { "epoch": 1.535967156541917, "grad_norm": 0.3546073693357267, "learning_rate": 1.3475653692037121e-06, "loss": 0.2921, "step": 6453 }, { "epoch": 1.5362051526149818, "grad_norm": 0.40900895467204246, "learning_rate": 1.3462492353058925e-06, "loss": 0.2866, "step": 6454 }, { "epoch": 1.5364431486880465, "grad_norm": 0.3839566221518419, "learning_rate": 1.3449336444458262e-06, "loss": 0.351, "step": 6455 }, { "epoch": 1.5366811447611113, "grad_norm": 0.39130970196546844, "learning_rate": 1.3436185968190424e-06, "loss": 0.3163, "step": 6456 }, { "epoch": 1.5369191408341762, "grad_norm": 0.3741648167638271, "learning_rate": 1.3423040926209891e-06, "loss": 0.2968, "step": 6457 }, { "epoch": 1.537157136907241, "grad_norm": 0.37882647569434985, "learning_rate": 1.3409901320470353e-06, "loss": 0.3218, "step": 6458 }, { "epoch": 1.537395132980306, "grad_norm": 0.40274124899921193, "learning_rate": 1.3396767152924667e-06, "loss": 0.3784, "step": 6459 }, { "epoch": 1.5376331290533707, "grad_norm": 0.3978813236044458, "learning_rate": 1.3383638425524909e-06, "loss": 0.3023, "step": 6460 }, { "epoch": 1.5378711251264354, "grad_norm": 0.38122434614979217, "learning_rate": 1.3370515140222322e-06, "loss": 0.2824, "step": 6461 }, { "epoch": 1.5381091211995002, "grad_norm": 0.3763755588799363, "learning_rate": 1.3357397298967356e-06, "loss": 0.358, "step": 6462 }, { "epoch": 1.538347117272565, "grad_norm": 0.37502057107131376, "learning_rate": 1.3344284903709647e-06, "loss": 0.3488, "step": 6463 }, { "epoch": 1.5385851133456296, "grad_norm": 0.3851990727799265, "learning_rate": 1.3331177956398007e-06, "loss": 0.2791, "step": 6464 }, { "epoch": 1.5388231094186946, "grad_norm": 0.4611773475919031, "learning_rate": 1.3318076458980472e-06, "loss": 0.3228, "step": 6465 }, { "epoch": 1.5390611054917593, "grad_norm": 0.4023609502927905, "learning_rate": 1.330498041340425e-06, "loss": 0.3756, "step": 6466 }, { "epoch": 1.5392991015648243, "grad_norm": 0.36953198203324916, "learning_rate": 1.3291889821615728e-06, "loss": 0.3158, "step": 6467 }, { "epoch": 1.539537097637889, "grad_norm": 0.39443173542625404, "learning_rate": 1.3278804685560476e-06, "loss": 0.2908, "step": 6468 }, { "epoch": 1.5397750937109538, "grad_norm": 0.3713126115765714, "learning_rate": 1.3265725007183306e-06, "loss": 0.3272, "step": 6469 }, { "epoch": 1.5400130897840185, "grad_norm": 0.38677506574041237, "learning_rate": 1.3252650788428156e-06, "loss": 0.3867, "step": 6470 }, { "epoch": 1.5402510858570833, "grad_norm": 0.3478181826041587, "learning_rate": 1.3239582031238191e-06, "loss": 0.2743, "step": 6471 }, { "epoch": 1.540489081930148, "grad_norm": 0.3406479266488313, "learning_rate": 1.3226518737555744e-06, "loss": 0.3027, "step": 6472 }, { "epoch": 1.540727078003213, "grad_norm": 0.39762042117594537, "learning_rate": 1.321346090932234e-06, "loss": 0.3722, "step": 6473 }, { "epoch": 1.5409650740762777, "grad_norm": 0.37089894103258814, "learning_rate": 1.3200408548478705e-06, "loss": 0.3375, "step": 6474 }, { "epoch": 1.5412030701493427, "grad_norm": 0.3965123393190166, "learning_rate": 1.318736165696473e-06, "loss": 0.2923, "step": 6475 }, { "epoch": 1.5414410662224074, "grad_norm": 0.39001895537688247, "learning_rate": 1.3174320236719524e-06, "loss": 0.3209, "step": 6476 }, { "epoch": 1.5416790622954721, "grad_norm": 0.39041593273975445, "learning_rate": 1.3161284289681347e-06, "loss": 0.4009, "step": 6477 }, { "epoch": 1.5419170583685369, "grad_norm": 0.3757834666815788, "learning_rate": 1.3148253817787671e-06, "loss": 0.3065, "step": 6478 }, { "epoch": 1.5421550544416016, "grad_norm": 0.3850973543387022, "learning_rate": 1.3135228822975148e-06, "loss": 0.2928, "step": 6479 }, { "epoch": 1.5423930505146664, "grad_norm": 0.38795859626153806, "learning_rate": 1.3122209307179602e-06, "loss": 0.3493, "step": 6480 }, { "epoch": 1.5426310465877313, "grad_norm": 0.3781000254192901, "learning_rate": 1.3109195272336073e-06, "loss": 0.3487, "step": 6481 }, { "epoch": 1.542869042660796, "grad_norm": 0.42706004699561867, "learning_rate": 1.3096186720378762e-06, "loss": 0.2718, "step": 6482 }, { "epoch": 1.543107038733861, "grad_norm": 0.4346631528907676, "learning_rate": 1.308318365324106e-06, "loss": 0.3182, "step": 6483 }, { "epoch": 1.5433450348069258, "grad_norm": 0.3671857753557718, "learning_rate": 1.3070186072855551e-06, "loss": 0.3902, "step": 6484 }, { "epoch": 1.5435830308799905, "grad_norm": 0.3756868316351957, "learning_rate": 1.3057193981153986e-06, "loss": 0.3181, "step": 6485 }, { "epoch": 1.5438210269530552, "grad_norm": 0.3921792506523292, "learning_rate": 1.304420738006732e-06, "loss": 0.2774, "step": 6486 }, { "epoch": 1.54405902302612, "grad_norm": 0.3998042980004656, "learning_rate": 1.3031226271525677e-06, "loss": 0.3231, "step": 6487 }, { "epoch": 1.5442970190991847, "grad_norm": 0.40991845946955335, "learning_rate": 1.3018250657458377e-06, "loss": 0.3567, "step": 6488 }, { "epoch": 1.5445350151722497, "grad_norm": 0.3688970315279264, "learning_rate": 1.3005280539793908e-06, "loss": 0.2933, "step": 6489 }, { "epoch": 1.5447730112453144, "grad_norm": 0.37617632189473976, "learning_rate": 1.2992315920459958e-06, "loss": 0.3308, "step": 6490 }, { "epoch": 1.5450110073183794, "grad_norm": 0.3884799402239865, "learning_rate": 1.2979356801383376e-06, "loss": 0.3745, "step": 6491 }, { "epoch": 1.545249003391444, "grad_norm": 0.3545462251747847, "learning_rate": 1.296640318449023e-06, "loss": 0.3218, "step": 6492 }, { "epoch": 1.5454869994645088, "grad_norm": 0.3910225423977597, "learning_rate": 1.295345507170574e-06, "loss": 0.2844, "step": 6493 }, { "epoch": 1.5457249955375736, "grad_norm": 0.39920924863182033, "learning_rate": 1.2940512464954313e-06, "loss": 0.3485, "step": 6494 }, { "epoch": 1.5459629916106383, "grad_norm": 0.3725385615151038, "learning_rate": 1.2927575366159529e-06, "loss": 0.3894, "step": 6495 }, { "epoch": 1.546200987683703, "grad_norm": 0.35822929977999596, "learning_rate": 1.291464377724419e-06, "loss": 0.3083, "step": 6496 }, { "epoch": 1.546438983756768, "grad_norm": 0.394890044687265, "learning_rate": 1.290171770013023e-06, "loss": 0.3128, "step": 6497 }, { "epoch": 1.5466769798298328, "grad_norm": 0.36206101450795275, "learning_rate": 1.2888797136738785e-06, "loss": 0.3694, "step": 6498 }, { "epoch": 1.5469149759028977, "grad_norm": 0.35891273995138384, "learning_rate": 1.2875882088990172e-06, "loss": 0.3243, "step": 6499 }, { "epoch": 1.5471529719759625, "grad_norm": 0.39198989856107563, "learning_rate": 1.2862972558803893e-06, "loss": 0.2723, "step": 6500 }, { "epoch": 1.5473909680490272, "grad_norm": 0.4058267955634325, "learning_rate": 1.285006854809861e-06, "loss": 0.3337, "step": 6501 }, { "epoch": 1.547628964122092, "grad_norm": 0.38126495691581086, "learning_rate": 1.2837170058792192e-06, "loss": 0.374, "step": 6502 }, { "epoch": 1.5478669601951567, "grad_norm": 0.37306157896006814, "learning_rate": 1.2824277092801663e-06, "loss": 0.2942, "step": 6503 }, { "epoch": 1.5481049562682214, "grad_norm": 0.3821763777566332, "learning_rate": 1.2811389652043244e-06, "loss": 0.2898, "step": 6504 }, { "epoch": 1.5483429523412864, "grad_norm": 0.38236792478815307, "learning_rate": 1.2798507738432326e-06, "loss": 0.3272, "step": 6505 }, { "epoch": 1.5485809484143511, "grad_norm": 0.3844001205504733, "learning_rate": 1.2785631353883476e-06, "loss": 0.3261, "step": 6506 }, { "epoch": 1.548818944487416, "grad_norm": 0.3847625058188862, "learning_rate": 1.2772760500310444e-06, "loss": 0.3074, "step": 6507 }, { "epoch": 1.5490569405604808, "grad_norm": 0.4101461881227883, "learning_rate": 1.2759895179626147e-06, "loss": 0.3132, "step": 6508 }, { "epoch": 1.5492949366335456, "grad_norm": 0.3736053815442064, "learning_rate": 1.2747035393742718e-06, "loss": 0.3962, "step": 6509 }, { "epoch": 1.5495329327066103, "grad_norm": 0.36963876676347174, "learning_rate": 1.2734181144571422e-06, "loss": 0.2847, "step": 6510 }, { "epoch": 1.549770928779675, "grad_norm": 0.42277471628828484, "learning_rate": 1.272133243402272e-06, "loss": 0.2858, "step": 6511 }, { "epoch": 1.5500089248527398, "grad_norm": 0.35688249609379674, "learning_rate": 1.2708489264006246e-06, "loss": 0.3236, "step": 6512 }, { "epoch": 1.5502469209258047, "grad_norm": 0.3855474832519827, "learning_rate": 1.2695651636430816e-06, "loss": 0.3387, "step": 6513 }, { "epoch": 1.5504849169988695, "grad_norm": 0.36968276951753154, "learning_rate": 1.2682819553204424e-06, "loss": 0.2947, "step": 6514 }, { "epoch": 1.5507229130719344, "grad_norm": 0.399145007942629, "learning_rate": 1.2669993016234222e-06, "loss": 0.3258, "step": 6515 }, { "epoch": 1.5509609091449992, "grad_norm": 0.39491923210379837, "learning_rate": 1.2657172027426556e-06, "loss": 0.3859, "step": 6516 }, { "epoch": 1.551198905218064, "grad_norm": 0.3614434517449473, "learning_rate": 1.264435658868695e-06, "loss": 0.2889, "step": 6517 }, { "epoch": 1.5514369012911287, "grad_norm": 0.3977718991319386, "learning_rate": 1.2631546701920073e-06, "loss": 0.2863, "step": 6518 }, { "epoch": 1.5516748973641934, "grad_norm": 0.493812725827731, "learning_rate": 1.2618742369029819e-06, "loss": 0.3471, "step": 6519 }, { "epoch": 1.5519128934372581, "grad_norm": 0.396858346322872, "learning_rate": 1.2605943591919218e-06, "loss": 0.3643, "step": 6520 }, { "epoch": 1.552150889510323, "grad_norm": 0.4006368823356171, "learning_rate": 1.2593150372490482e-06, "loss": 0.2958, "step": 6521 }, { "epoch": 1.5523888855833878, "grad_norm": 0.3651666797447085, "learning_rate": 1.258036271264499e-06, "loss": 0.2965, "step": 6522 }, { "epoch": 1.5526268816564528, "grad_norm": 0.3609444928045324, "learning_rate": 1.2567580614283331e-06, "loss": 0.3453, "step": 6523 }, { "epoch": 1.5528648777295175, "grad_norm": 0.35866895527736053, "learning_rate": 1.255480407930522e-06, "loss": 0.3647, "step": 6524 }, { "epoch": 1.5531028738025823, "grad_norm": 0.3792035303974508, "learning_rate": 1.254203310960958e-06, "loss": 0.3127, "step": 6525 }, { "epoch": 1.553340869875647, "grad_norm": 0.38922874090947, "learning_rate": 1.2529267707094483e-06, "loss": 0.3213, "step": 6526 }, { "epoch": 1.5535788659487118, "grad_norm": 0.38138547100943027, "learning_rate": 1.2516507873657184e-06, "loss": 0.3742, "step": 6527 }, { "epoch": 1.5538168620217765, "grad_norm": 0.3818945794940767, "learning_rate": 1.2503753611194113e-06, "loss": 0.2942, "step": 6528 }, { "epoch": 1.5540548580948415, "grad_norm": 0.3825385830982494, "learning_rate": 1.2491004921600868e-06, "loss": 0.2755, "step": 6529 }, { "epoch": 1.5542928541679062, "grad_norm": 0.3998069859296653, "learning_rate": 1.247826180677222e-06, "loss": 0.3644, "step": 6530 }, { "epoch": 1.5545308502409712, "grad_norm": 0.35741830458922036, "learning_rate": 1.2465524268602107e-06, "loss": 0.3762, "step": 6531 }, { "epoch": 1.554768846314036, "grad_norm": 0.3777877889719805, "learning_rate": 1.2452792308983646e-06, "loss": 0.2921, "step": 6532 }, { "epoch": 1.5550068423871006, "grad_norm": 0.3803043899122809, "learning_rate": 1.2440065929809125e-06, "loss": 0.3158, "step": 6533 }, { "epoch": 1.5552448384601654, "grad_norm": 0.36524440591889934, "learning_rate": 1.2427345132969997e-06, "loss": 0.3631, "step": 6534 }, { "epoch": 1.55548283453323, "grad_norm": 0.38177834729615784, "learning_rate": 1.2414629920356862e-06, "loss": 0.2983, "step": 6535 }, { "epoch": 1.5557208306062948, "grad_norm": 0.3903386365033605, "learning_rate": 1.2401920293859558e-06, "loss": 0.2908, "step": 6536 }, { "epoch": 1.5559588266793598, "grad_norm": 0.37546580053400613, "learning_rate": 1.238921625536703e-06, "loss": 0.3295, "step": 6537 }, { "epoch": 1.5561968227524245, "grad_norm": 0.37598661605642897, "learning_rate": 1.2376517806767412e-06, "loss": 0.377, "step": 6538 }, { "epoch": 1.5564348188254895, "grad_norm": 0.38383478247802943, "learning_rate": 1.2363824949948012e-06, "loss": 0.2775, "step": 6539 }, { "epoch": 1.5566728148985542, "grad_norm": 0.3763981357885576, "learning_rate": 1.2351137686795294e-06, "loss": 0.3129, "step": 6540 }, { "epoch": 1.556910810971619, "grad_norm": 0.40852446358280264, "learning_rate": 1.2338456019194906e-06, "loss": 0.3695, "step": 6541 }, { "epoch": 1.5571488070446837, "grad_norm": 0.3695119241962966, "learning_rate": 1.2325779949031653e-06, "loss": 0.3202, "step": 6542 }, { "epoch": 1.5573868031177485, "grad_norm": 0.44568416123855525, "learning_rate": 1.231310947818951e-06, "loss": 0.2751, "step": 6543 }, { "epoch": 1.5576247991908132, "grad_norm": 0.39409919509614555, "learning_rate": 1.2300444608551636e-06, "loss": 0.3013, "step": 6544 }, { "epoch": 1.5578627952638782, "grad_norm": 0.3742902470173008, "learning_rate": 1.228778534200032e-06, "loss": 0.381, "step": 6545 }, { "epoch": 1.558100791336943, "grad_norm": 0.32924222634354267, "learning_rate": 1.2275131680417069e-06, "loss": 0.3009, "step": 6546 }, { "epoch": 1.5583387874100079, "grad_norm": 0.3667969007920045, "learning_rate": 1.2262483625682514e-06, "loss": 0.2832, "step": 6547 }, { "epoch": 1.5585767834830726, "grad_norm": 0.3894529324837924, "learning_rate": 1.2249841179676481e-06, "loss": 0.4056, "step": 6548 }, { "epoch": 1.5588147795561373, "grad_norm": 0.36782423847690526, "learning_rate": 1.2237204344277925e-06, "loss": 0.3222, "step": 6549 }, { "epoch": 1.559052775629202, "grad_norm": 0.3724914266382319, "learning_rate": 1.2224573121365024e-06, "loss": 0.2706, "step": 6550 }, { "epoch": 1.5592907717022668, "grad_norm": 0.418089388727327, "learning_rate": 1.2211947512815076e-06, "loss": 0.3172, "step": 6551 }, { "epoch": 1.5595287677753316, "grad_norm": 0.402369301328807, "learning_rate": 1.2199327520504562e-06, "loss": 0.378, "step": 6552 }, { "epoch": 1.5597667638483965, "grad_norm": 0.42301913940674296, "learning_rate": 1.218671314630912e-06, "loss": 0.3409, "step": 6553 }, { "epoch": 1.5600047599214613, "grad_norm": 0.38961962135190426, "learning_rate": 1.2174104392103558e-06, "loss": 0.2681, "step": 6554 }, { "epoch": 1.5602427559945262, "grad_norm": 0.4054494324727994, "learning_rate": 1.2161501259761855e-06, "loss": 0.3628, "step": 6555 }, { "epoch": 1.560480752067591, "grad_norm": 0.3373988273851839, "learning_rate": 1.2148903751157144e-06, "loss": 0.3361, "step": 6556 }, { "epoch": 1.5607187481406557, "grad_norm": 0.40537913952657784, "learning_rate": 1.213631186816172e-06, "loss": 0.2743, "step": 6557 }, { "epoch": 1.5609567442137204, "grad_norm": 0.38794190280853064, "learning_rate": 1.2123725612647064e-06, "loss": 0.327, "step": 6558 }, { "epoch": 1.5611947402867852, "grad_norm": 0.38511311663261943, "learning_rate": 1.2111144986483792e-06, "loss": 0.3744, "step": 6559 }, { "epoch": 1.56143273635985, "grad_norm": 0.3674215063731254, "learning_rate": 1.2098569991541698e-06, "loss": 0.3067, "step": 6560 }, { "epoch": 1.5616707324329149, "grad_norm": 0.40312804407214897, "learning_rate": 1.2086000629689743e-06, "loss": 0.3093, "step": 6561 }, { "epoch": 1.5619087285059796, "grad_norm": 0.3762916570398983, "learning_rate": 1.2073436902796038e-06, "loss": 0.3063, "step": 6562 }, { "epoch": 1.5621467245790446, "grad_norm": 0.36072105739082216, "learning_rate": 1.2060878812727854e-06, "loss": 0.3568, "step": 6563 }, { "epoch": 1.5623847206521093, "grad_norm": 0.354835431200888, "learning_rate": 1.2048326361351658e-06, "loss": 0.2666, "step": 6564 }, { "epoch": 1.562622716725174, "grad_norm": 0.4104035256897569, "learning_rate": 1.203577955053305e-06, "loss": 0.3111, "step": 6565 }, { "epoch": 1.5628607127982388, "grad_norm": 0.3512367007914632, "learning_rate": 1.2023238382136787e-06, "loss": 0.3713, "step": 6566 }, { "epoch": 1.5630987088713035, "grad_norm": 0.3646761606995802, "learning_rate": 1.20107028580268e-06, "loss": 0.2961, "step": 6567 }, { "epoch": 1.5633367049443683, "grad_norm": 0.35406751256411023, "learning_rate": 1.199817298006618e-06, "loss": 0.2711, "step": 6568 }, { "epoch": 1.5635747010174332, "grad_norm": 0.4043510614910653, "learning_rate": 1.1985648750117179e-06, "loss": 0.3151, "step": 6569 }, { "epoch": 1.563812697090498, "grad_norm": 0.3791673629759354, "learning_rate": 1.1973130170041198e-06, "loss": 0.4031, "step": 6570 }, { "epoch": 1.564050693163563, "grad_norm": 0.3842915787993255, "learning_rate": 1.1960617241698813e-06, "loss": 0.2776, "step": 6571 }, { "epoch": 1.5642886892366277, "grad_norm": 0.4119663617052321, "learning_rate": 1.1948109966949745e-06, "loss": 0.3098, "step": 6572 }, { "epoch": 1.5645266853096924, "grad_norm": 0.3592669784242863, "learning_rate": 1.1935608347652906e-06, "loss": 0.3729, "step": 6573 }, { "epoch": 1.5647646813827571, "grad_norm": 0.373131383360226, "learning_rate": 1.192311238566633e-06, "loss": 0.3484, "step": 6574 }, { "epoch": 1.5650026774558219, "grad_norm": 0.3833981711887363, "learning_rate": 1.1910622082847234e-06, "loss": 0.2392, "step": 6575 }, { "epoch": 1.5652406735288866, "grad_norm": 0.34671002508415816, "learning_rate": 1.1898137441051982e-06, "loss": 0.3245, "step": 6576 }, { "epoch": 1.5654786696019516, "grad_norm": 0.3632990316751966, "learning_rate": 1.1885658462136085e-06, "loss": 0.4118, "step": 6577 }, { "epoch": 1.5657166656750163, "grad_norm": 0.35796363702341216, "learning_rate": 1.1873185147954263e-06, "loss": 0.2738, "step": 6578 }, { "epoch": 1.5659546617480813, "grad_norm": 0.3527329899780401, "learning_rate": 1.1860717500360336e-06, "loss": 0.2714, "step": 6579 }, { "epoch": 1.566192657821146, "grad_norm": 0.41834053159356094, "learning_rate": 1.1848255521207308e-06, "loss": 0.3636, "step": 6580 }, { "epoch": 1.5664306538942108, "grad_norm": 0.39616933163013157, "learning_rate": 1.1835799212347343e-06, "loss": 0.3605, "step": 6581 }, { "epoch": 1.5666686499672755, "grad_norm": 0.37466230103769954, "learning_rate": 1.1823348575631749e-06, "loss": 0.2658, "step": 6582 }, { "epoch": 1.5669066460403402, "grad_norm": 0.39963898406246245, "learning_rate": 1.1810903612911001e-06, "loss": 0.3117, "step": 6583 }, { "epoch": 1.567144642113405, "grad_norm": 0.3910180293713378, "learning_rate": 1.179846432603473e-06, "loss": 0.3908, "step": 6584 }, { "epoch": 1.56738263818647, "grad_norm": 0.3978347412470743, "learning_rate": 1.178603071685172e-06, "loss": 0.3303, "step": 6585 }, { "epoch": 1.5676206342595347, "grad_norm": 0.4194169551330718, "learning_rate": 1.1773602787209915e-06, "loss": 0.2969, "step": 6586 }, { "epoch": 1.5678586303325996, "grad_norm": 0.4026400405007115, "learning_rate": 1.1761180538956408e-06, "loss": 0.3348, "step": 6587 }, { "epoch": 1.5680966264056644, "grad_norm": 0.3774639635057926, "learning_rate": 1.1748763973937454e-06, "loss": 0.3725, "step": 6588 }, { "epoch": 1.5683346224787291, "grad_norm": 0.37626958019160905, "learning_rate": 1.1736353093998471e-06, "loss": 0.3056, "step": 6589 }, { "epoch": 1.5685726185517939, "grad_norm": 0.3938142965328492, "learning_rate": 1.1723947900983995e-06, "loss": 0.2885, "step": 6590 }, { "epoch": 1.5688106146248586, "grad_norm": 0.3988797825778564, "learning_rate": 1.1711548396737782e-06, "loss": 0.3832, "step": 6591 }, { "epoch": 1.5690486106979233, "grad_norm": 0.3627863918712503, "learning_rate": 1.1699154583102684e-06, "loss": 0.2937, "step": 6592 }, { "epoch": 1.5692866067709883, "grad_norm": 0.37559495224162986, "learning_rate": 1.1686766461920735e-06, "loss": 0.3193, "step": 6593 }, { "epoch": 1.569524602844053, "grad_norm": 0.3864117859142998, "learning_rate": 1.1674384035033115e-06, "loss": 0.3263, "step": 6594 }, { "epoch": 1.569762598917118, "grad_norm": 0.3796716007438366, "learning_rate": 1.1662007304280153e-06, "loss": 0.3871, "step": 6595 }, { "epoch": 1.5700005949901827, "grad_norm": 0.3728167400610051, "learning_rate": 1.1649636271501346e-06, "loss": 0.2965, "step": 6596 }, { "epoch": 1.5702385910632475, "grad_norm": 0.3719186929240788, "learning_rate": 1.1637270938535334e-06, "loss": 0.298, "step": 6597 }, { "epoch": 1.5704765871363122, "grad_norm": 0.37701153994508685, "learning_rate": 1.1624911307219904e-06, "loss": 0.353, "step": 6598 }, { "epoch": 1.570714583209377, "grad_norm": 0.37159186789859183, "learning_rate": 1.1612557379391993e-06, "loss": 0.3372, "step": 6599 }, { "epoch": 1.5709525792824417, "grad_norm": 0.3868110826031627, "learning_rate": 1.1600209156887732e-06, "loss": 0.2635, "step": 6600 }, { "epoch": 1.5711905753555067, "grad_norm": 0.3807624942737973, "learning_rate": 1.158786664154235e-06, "loss": 0.3153, "step": 6601 }, { "epoch": 1.5714285714285714, "grad_norm": 0.409345535932182, "learning_rate": 1.1575529835190264e-06, "loss": 0.3898, "step": 6602 }, { "epoch": 1.5716665675016364, "grad_norm": 0.3879086278028145, "learning_rate": 1.1563198739665017e-06, "loss": 0.2732, "step": 6603 }, { "epoch": 1.571904563574701, "grad_norm": 0.40901261697805064, "learning_rate": 1.1550873356799297e-06, "loss": 0.2728, "step": 6604 }, { "epoch": 1.5721425596477658, "grad_norm": 0.3791446730046189, "learning_rate": 1.1538553688425002e-06, "loss": 0.365, "step": 6605 }, { "epoch": 1.5723805557208306, "grad_norm": 0.3662473834687694, "learning_rate": 1.1526239736373118e-06, "loss": 0.3693, "step": 6606 }, { "epoch": 1.5726185517938953, "grad_norm": 0.3745667664354613, "learning_rate": 1.1513931502473807e-06, "loss": 0.2745, "step": 6607 }, { "epoch": 1.57285654786696, "grad_norm": 0.3758851474104438, "learning_rate": 1.1501628988556368e-06, "loss": 0.3148, "step": 6608 }, { "epoch": 1.573094543940025, "grad_norm": 0.40537121108015156, "learning_rate": 1.1489332196449271e-06, "loss": 0.3888, "step": 6609 }, { "epoch": 1.5733325400130898, "grad_norm": 0.3957681771676492, "learning_rate": 1.1477041127980115e-06, "loss": 0.3328, "step": 6610 }, { "epoch": 1.5735705360861547, "grad_norm": 0.4123620794488522, "learning_rate": 1.146475578497566e-06, "loss": 0.2634, "step": 6611 }, { "epoch": 1.5738085321592195, "grad_norm": 0.4057487023110117, "learning_rate": 1.1452476169261812e-06, "loss": 0.341, "step": 6612 }, { "epoch": 1.5740465282322842, "grad_norm": 0.39959292930319834, "learning_rate": 1.144020228266362e-06, "loss": 0.3469, "step": 6613 }, { "epoch": 1.574284524305349, "grad_norm": 0.34870011465820333, "learning_rate": 1.1427934127005296e-06, "loss": 0.284, "step": 6614 }, { "epoch": 1.5745225203784137, "grad_norm": 0.41846723355624454, "learning_rate": 1.1415671704110193e-06, "loss": 0.2993, "step": 6615 }, { "epoch": 1.5747605164514784, "grad_norm": 0.4035291904243755, "learning_rate": 1.1403415015800801e-06, "loss": 0.3623, "step": 6616 }, { "epoch": 1.5749985125245434, "grad_norm": 0.3751084543135697, "learning_rate": 1.1391164063898768e-06, "loss": 0.307, "step": 6617 }, { "epoch": 1.575236508597608, "grad_norm": 0.3848206656891993, "learning_rate": 1.137891885022488e-06, "loss": 0.2872, "step": 6618 }, { "epoch": 1.575474504670673, "grad_norm": 0.36097442997024354, "learning_rate": 1.1366679376599104e-06, "loss": 0.3278, "step": 6619 }, { "epoch": 1.5757125007437378, "grad_norm": 0.3676488968525861, "learning_rate": 1.1354445644840516e-06, "loss": 0.3784, "step": 6620 }, { "epoch": 1.5759504968168025, "grad_norm": 0.3757067541931903, "learning_rate": 1.1342217656767352e-06, "loss": 0.294, "step": 6621 }, { "epoch": 1.5761884928898673, "grad_norm": 0.4248595314868793, "learning_rate": 1.1329995414196986e-06, "loss": 0.3104, "step": 6622 }, { "epoch": 1.576426488962932, "grad_norm": 0.39022645581178395, "learning_rate": 1.1317778918945954e-06, "loss": 0.3596, "step": 6623 }, { "epoch": 1.5766644850359968, "grad_norm": 0.37146786462426146, "learning_rate": 1.1305568172829928e-06, "loss": 0.3432, "step": 6624 }, { "epoch": 1.5769024811090617, "grad_norm": 0.3957415682588482, "learning_rate": 1.1293363177663724e-06, "loss": 0.2897, "step": 6625 }, { "epoch": 1.5771404771821265, "grad_norm": 0.37407424529688554, "learning_rate": 1.1281163935261298e-06, "loss": 0.3319, "step": 6626 }, { "epoch": 1.5773784732551914, "grad_norm": 0.5040169043587555, "learning_rate": 1.1268970447435772e-06, "loss": 0.4073, "step": 6627 }, { "epoch": 1.5776164693282562, "grad_norm": 0.3494340725959216, "learning_rate": 1.1256782715999409e-06, "loss": 0.2889, "step": 6628 }, { "epoch": 1.577854465401321, "grad_norm": 0.3592534966406634, "learning_rate": 1.124460074276359e-06, "loss": 0.2832, "step": 6629 }, { "epoch": 1.5780924614743856, "grad_norm": 0.4051953408007675, "learning_rate": 1.1232424529538865e-06, "loss": 0.3587, "step": 6630 }, { "epoch": 1.5783304575474504, "grad_norm": 0.3796932548423937, "learning_rate": 1.1220254078134919e-06, "loss": 0.3441, "step": 6631 }, { "epoch": 1.5785684536205151, "grad_norm": 0.38402120878401813, "learning_rate": 1.1208089390360566e-06, "loss": 0.2712, "step": 6632 }, { "epoch": 1.57880644969358, "grad_norm": 0.3928063655919691, "learning_rate": 1.1195930468023808e-06, "loss": 0.3301, "step": 6633 }, { "epoch": 1.5790444457666448, "grad_norm": 0.3791650075605935, "learning_rate": 1.1183777312931748e-06, "loss": 0.3712, "step": 6634 }, { "epoch": 1.5792824418397098, "grad_norm": 0.38088863826024594, "learning_rate": 1.1171629926890648e-06, "loss": 0.3067, "step": 6635 }, { "epoch": 1.5795204379127745, "grad_norm": 0.4035315968898168, "learning_rate": 1.1159488311705914e-06, "loss": 0.2967, "step": 6636 }, { "epoch": 1.5797584339858393, "grad_norm": 0.39150229184884205, "learning_rate": 1.1147352469182077e-06, "loss": 0.3563, "step": 6637 }, { "epoch": 1.579996430058904, "grad_norm": 0.3945787408533381, "learning_rate": 1.1135222401122835e-06, "loss": 0.3706, "step": 6638 }, { "epoch": 1.5802344261319687, "grad_norm": 0.3824380546742477, "learning_rate": 1.112309810933101e-06, "loss": 0.3121, "step": 6639 }, { "epoch": 1.5804724222050335, "grad_norm": 0.38647053605937204, "learning_rate": 1.111097959560858e-06, "loss": 0.2656, "step": 6640 }, { "epoch": 1.5807104182780984, "grad_norm": 0.39387059548636993, "learning_rate": 1.109886686175664e-06, "loss": 0.3632, "step": 6641 }, { "epoch": 1.5809484143511632, "grad_norm": 0.38920881826537457, "learning_rate": 1.1086759909575462e-06, "loss": 0.2883, "step": 6642 }, { "epoch": 1.5811864104242281, "grad_norm": 0.39014824674716264, "learning_rate": 1.1074658740864425e-06, "loss": 0.2812, "step": 6643 }, { "epoch": 1.5814244064972929, "grad_norm": 0.4100290719394353, "learning_rate": 1.1062563357422063e-06, "loss": 0.3405, "step": 6644 }, { "epoch": 1.5816624025703576, "grad_norm": 0.379281769403233, "learning_rate": 1.1050473761046038e-06, "loss": 0.3978, "step": 6645 }, { "epoch": 1.5819003986434224, "grad_norm": 0.3843586382669758, "learning_rate": 1.1038389953533197e-06, "loss": 0.2815, "step": 6646 }, { "epoch": 1.582138394716487, "grad_norm": 0.37029427889320665, "learning_rate": 1.102631193667947e-06, "loss": 0.2894, "step": 6647 }, { "epoch": 1.5823763907895518, "grad_norm": 0.363151439990877, "learning_rate": 1.1014239712279946e-06, "loss": 0.3463, "step": 6648 }, { "epoch": 1.5826143868626168, "grad_norm": 0.3499094972795553, "learning_rate": 1.100217328212887e-06, "loss": 0.3472, "step": 6649 }, { "epoch": 1.5828523829356815, "grad_norm": 0.37693181662588054, "learning_rate": 1.0990112648019595e-06, "loss": 0.2838, "step": 6650 }, { "epoch": 1.5830903790087465, "grad_norm": 0.4029662389391535, "learning_rate": 1.0978057811744646e-06, "loss": 0.3276, "step": 6651 }, { "epoch": 1.5833283750818112, "grad_norm": 0.4147641724374556, "learning_rate": 1.096600877509566e-06, "loss": 0.3633, "step": 6652 }, { "epoch": 1.583566371154876, "grad_norm": 0.3672722330153522, "learning_rate": 1.0953965539863409e-06, "loss": 0.2932, "step": 6653 }, { "epoch": 1.5838043672279407, "grad_norm": 0.3893345967844479, "learning_rate": 1.0941928107837852e-06, "loss": 0.2635, "step": 6654 }, { "epoch": 1.5840423633010055, "grad_norm": 0.39379673310513574, "learning_rate": 1.0929896480808022e-06, "loss": 0.372, "step": 6655 }, { "epoch": 1.5842803593740702, "grad_norm": 0.3763842161032918, "learning_rate": 1.0917870660562125e-06, "loss": 0.3338, "step": 6656 }, { "epoch": 1.5845183554471352, "grad_norm": 0.42973871300326605, "learning_rate": 1.090585064888749e-06, "loss": 0.2549, "step": 6657 }, { "epoch": 1.5847563515202, "grad_norm": 0.3692281668495606, "learning_rate": 1.0893836447570595e-06, "loss": 0.3091, "step": 6658 }, { "epoch": 1.5849943475932649, "grad_norm": 0.4105528991354411, "learning_rate": 1.0881828058397025e-06, "loss": 0.3591, "step": 6659 }, { "epoch": 1.5852323436663296, "grad_norm": 0.3506145732459315, "learning_rate": 1.0869825483151563e-06, "loss": 0.3442, "step": 6660 }, { "epoch": 1.5854703397393943, "grad_norm": 0.38609627382705203, "learning_rate": 1.0857828723618063e-06, "loss": 0.2698, "step": 6661 }, { "epoch": 1.585708335812459, "grad_norm": 0.39582571031621394, "learning_rate": 1.084583778157955e-06, "loss": 0.3305, "step": 6662 }, { "epoch": 1.5859463318855238, "grad_norm": 0.4773499221532177, "learning_rate": 1.0833852658818167e-06, "loss": 0.3659, "step": 6663 }, { "epoch": 1.5861843279585885, "grad_norm": 0.387549231721086, "learning_rate": 1.0821873357115203e-06, "loss": 0.2837, "step": 6664 }, { "epoch": 1.5864223240316535, "grad_norm": 0.3684761502468937, "learning_rate": 1.0809899878251078e-06, "loss": 0.3226, "step": 6665 }, { "epoch": 1.5866603201047182, "grad_norm": 0.4189733561178494, "learning_rate": 1.0797932224005348e-06, "loss": 0.3902, "step": 6666 }, { "epoch": 1.5868983161777832, "grad_norm": 0.3603259475366153, "learning_rate": 1.0785970396156698e-06, "loss": 0.3178, "step": 6667 }, { "epoch": 1.587136312250848, "grad_norm": 0.37689639588625584, "learning_rate": 1.0774014396482962e-06, "loss": 0.2916, "step": 6668 }, { "epoch": 1.5873743083239127, "grad_norm": 0.38804261264274786, "learning_rate": 1.0762064226761087e-06, "loss": 0.3446, "step": 6669 }, { "epoch": 1.5876123043969774, "grad_norm": 0.36867385287936777, "learning_rate": 1.075011988876717e-06, "loss": 0.3833, "step": 6670 }, { "epoch": 1.5878503004700422, "grad_norm": 0.7076069958920685, "learning_rate": 1.073818138427643e-06, "loss": 0.3034, "step": 6671 }, { "epoch": 1.588088296543107, "grad_norm": 0.37858638589930405, "learning_rate": 1.0726248715063226e-06, "loss": 0.2957, "step": 6672 }, { "epoch": 1.5883262926161719, "grad_norm": 0.3866414028696196, "learning_rate": 1.0714321882901036e-06, "loss": 0.3701, "step": 6673 }, { "epoch": 1.5885642886892366, "grad_norm": 0.39497519093426026, "learning_rate": 1.070240088956251e-06, "loss": 0.3026, "step": 6674 }, { "epoch": 1.5888022847623016, "grad_norm": 0.37372560276384903, "learning_rate": 1.069048573681939e-06, "loss": 0.2698, "step": 6675 }, { "epoch": 1.5890402808353663, "grad_norm": 0.4009991162457296, "learning_rate": 1.0678576426442554e-06, "loss": 0.3123, "step": 6676 }, { "epoch": 1.589278276908431, "grad_norm": 0.39506909405987567, "learning_rate": 1.0666672960202028e-06, "loss": 0.3798, "step": 6677 }, { "epoch": 1.5895162729814958, "grad_norm": 0.3524389433062458, "learning_rate": 1.0654775339866963e-06, "loss": 0.2702, "step": 6678 }, { "epoch": 1.5897542690545605, "grad_norm": 0.38272669069986726, "learning_rate": 1.0642883567205635e-06, "loss": 0.28, "step": 6679 }, { "epoch": 1.5899922651276253, "grad_norm": 0.40293966435907647, "learning_rate": 1.0630997643985447e-06, "loss": 0.3393, "step": 6680 }, { "epoch": 1.5902302612006902, "grad_norm": 0.3587703813764963, "learning_rate": 1.061911757197296e-06, "loss": 0.3498, "step": 6681 }, { "epoch": 1.590468257273755, "grad_norm": 0.38543802046712716, "learning_rate": 1.0607243352933838e-06, "loss": 0.2903, "step": 6682 }, { "epoch": 1.59070625334682, "grad_norm": 0.4301083842782692, "learning_rate": 1.0595374988632884e-06, "loss": 0.3182, "step": 6683 }, { "epoch": 1.5909442494198847, "grad_norm": 0.40034262172534774, "learning_rate": 1.058351248083403e-06, "loss": 0.3613, "step": 6684 }, { "epoch": 1.5911822454929494, "grad_norm": 0.382673941851363, "learning_rate": 1.0571655831300342e-06, "loss": 0.326, "step": 6685 }, { "epoch": 1.5914202415660141, "grad_norm": 0.41204904076387927, "learning_rate": 1.0559805041794002e-06, "loss": 0.2805, "step": 6686 }, { "epoch": 1.5916582376390789, "grad_norm": 0.38921959287793867, "learning_rate": 1.0547960114076328e-06, "loss": 0.3514, "step": 6687 }, { "epoch": 1.5918962337121436, "grad_norm": 0.40306688375436, "learning_rate": 1.053612104990779e-06, "loss": 0.3668, "step": 6688 }, { "epoch": 1.5921342297852086, "grad_norm": 0.4133630297518753, "learning_rate": 1.052428785104795e-06, "loss": 0.2802, "step": 6689 }, { "epoch": 1.5923722258582733, "grad_norm": 0.3962023255094279, "learning_rate": 1.051246051925552e-06, "loss": 0.316, "step": 6690 }, { "epoch": 1.5926102219313383, "grad_norm": 0.41861934380314547, "learning_rate": 1.0500639056288325e-06, "loss": 0.3799, "step": 6691 }, { "epoch": 1.592848218004403, "grad_norm": 0.3766382477058585, "learning_rate": 1.0488823463903341e-06, "loss": 0.3612, "step": 6692 }, { "epoch": 1.5930862140774678, "grad_norm": 0.3881147115564832, "learning_rate": 1.0477013743856652e-06, "loss": 0.274, "step": 6693 }, { "epoch": 1.5933242101505325, "grad_norm": 0.3621383112707997, "learning_rate": 1.0465209897903466e-06, "loss": 0.3322, "step": 6694 }, { "epoch": 1.5935622062235972, "grad_norm": 0.3636848700106123, "learning_rate": 1.0453411927798136e-06, "loss": 0.3889, "step": 6695 }, { "epoch": 1.593800202296662, "grad_norm": 0.37473055964276525, "learning_rate": 1.0441619835294125e-06, "loss": 0.2783, "step": 6696 }, { "epoch": 1.594038198369727, "grad_norm": 0.37206515088751896, "learning_rate": 1.0429833622144037e-06, "loss": 0.2903, "step": 6697 }, { "epoch": 1.5942761944427917, "grad_norm": 0.39577591456358313, "learning_rate": 1.0418053290099589e-06, "loss": 0.3918, "step": 6698 }, { "epoch": 1.5945141905158566, "grad_norm": 0.37165220358822043, "learning_rate": 1.0406278840911632e-06, "loss": 0.3808, "step": 6699 }, { "epoch": 1.5947521865889214, "grad_norm": 0.3930220555144249, "learning_rate": 1.0394510276330145e-06, "loss": 0.2849, "step": 6700 }, { "epoch": 1.5949901826619861, "grad_norm": 0.4069004575367408, "learning_rate": 1.038274759810421e-06, "loss": 0.3189, "step": 6701 }, { "epoch": 1.5952281787350509, "grad_norm": 0.39687917691298413, "learning_rate": 1.0370990807982073e-06, "loss": 0.3705, "step": 6702 }, { "epoch": 1.5954661748081156, "grad_norm": 0.3910720271352218, "learning_rate": 1.0359239907711082e-06, "loss": 0.2893, "step": 6703 }, { "epoch": 1.5957041708811803, "grad_norm": 0.37980335830006934, "learning_rate": 1.0347494899037703e-06, "loss": 0.2815, "step": 6704 }, { "epoch": 1.5959421669542453, "grad_norm": 0.39726445474059047, "learning_rate": 1.033575578370754e-06, "loss": 0.3508, "step": 6705 }, { "epoch": 1.59618016302731, "grad_norm": 0.37676377876897194, "learning_rate": 1.0324022563465307e-06, "loss": 0.3562, "step": 6706 }, { "epoch": 1.596418159100375, "grad_norm": 0.3593315132381354, "learning_rate": 1.0312295240054853e-06, "loss": 0.2756, "step": 6707 }, { "epoch": 1.5966561551734397, "grad_norm": 0.37080438768334384, "learning_rate": 1.030057381521916e-06, "loss": 0.3206, "step": 6708 }, { "epoch": 1.5968941512465045, "grad_norm": 0.37873106808738827, "learning_rate": 1.028885829070032e-06, "loss": 0.3971, "step": 6709 }, { "epoch": 1.5971321473195692, "grad_norm": 0.3659434667845778, "learning_rate": 1.027714866823954e-06, "loss": 0.283, "step": 6710 }, { "epoch": 1.597370143392634, "grad_norm": 0.3867687273653711, "learning_rate": 1.0265444949577163e-06, "loss": 0.2797, "step": 6711 }, { "epoch": 1.5976081394656987, "grad_norm": 0.4072148084054436, "learning_rate": 1.0253747136452657e-06, "loss": 0.3506, "step": 6712 }, { "epoch": 1.5978461355387636, "grad_norm": 0.3901721585652958, "learning_rate": 1.0242055230604598e-06, "loss": 0.3852, "step": 6713 }, { "epoch": 1.5980841316118284, "grad_norm": 0.38077201173399056, "learning_rate": 1.0230369233770688e-06, "loss": 0.2825, "step": 6714 }, { "epoch": 1.5983221276848933, "grad_norm": 0.34750351350090636, "learning_rate": 1.0218689147687766e-06, "loss": 0.318, "step": 6715 }, { "epoch": 1.598560123757958, "grad_norm": 0.37793674402799227, "learning_rate": 1.0207014974091788e-06, "loss": 0.373, "step": 6716 }, { "epoch": 1.5987981198310228, "grad_norm": 0.37210043250443864, "learning_rate": 1.0195346714717813e-06, "loss": 0.3131, "step": 6717 }, { "epoch": 1.5990361159040876, "grad_norm": 0.397386731456454, "learning_rate": 1.0183684371300035e-06, "loss": 0.2892, "step": 6718 }, { "epoch": 1.5992741119771523, "grad_norm": 0.3857199576264192, "learning_rate": 1.0172027945571765e-06, "loss": 0.3093, "step": 6719 }, { "epoch": 1.599512108050217, "grad_norm": 0.3652664433334774, "learning_rate": 1.0160377439265445e-06, "loss": 0.3803, "step": 6720 }, { "epoch": 1.599750104123282, "grad_norm": 0.39011567367020755, "learning_rate": 1.014873285411262e-06, "loss": 0.2906, "step": 6721 }, { "epoch": 1.5999881001963467, "grad_norm": 0.4067204738621554, "learning_rate": 1.013709419184396e-06, "loss": 0.2948, "step": 6722 }, { "epoch": 1.6002260962694117, "grad_norm": 0.38555422364472247, "learning_rate": 1.0125461454189273e-06, "loss": 0.3671, "step": 6723 }, { "epoch": 1.6004640923424764, "grad_norm": 0.38611908239425363, "learning_rate": 1.0113834642877457e-06, "loss": 0.3296, "step": 6724 }, { "epoch": 1.6007020884155412, "grad_norm": 0.4058794539643264, "learning_rate": 1.0102213759636548e-06, "loss": 0.2817, "step": 6725 }, { "epoch": 1.600940084488606, "grad_norm": 0.38734559534856855, "learning_rate": 1.0090598806193701e-06, "loss": 0.33, "step": 6726 }, { "epoch": 1.6011780805616707, "grad_norm": 0.393666212501604, "learning_rate": 1.0078989784275183e-06, "loss": 0.3874, "step": 6727 }, { "epoch": 1.6014160766347354, "grad_norm": 0.36559390636596556, "learning_rate": 1.0067386695606367e-06, "loss": 0.27, "step": 6728 }, { "epoch": 1.6016540727078004, "grad_norm": 0.409681509162357, "learning_rate": 1.0055789541911788e-06, "loss": 0.275, "step": 6729 }, { "epoch": 1.601892068780865, "grad_norm": 0.4138944613457098, "learning_rate": 1.004419832491505e-06, "loss": 0.343, "step": 6730 }, { "epoch": 1.60213006485393, "grad_norm": 0.3955901179511804, "learning_rate": 1.0032613046338907e-06, "loss": 0.3837, "step": 6731 }, { "epoch": 1.6023680609269948, "grad_norm": 0.3840912144760111, "learning_rate": 1.0021033707905202e-06, "loss": 0.2799, "step": 6732 }, { "epoch": 1.6026060570000595, "grad_norm": 0.3907610707868771, "learning_rate": 1.0009460311334928e-06, "loss": 0.319, "step": 6733 }, { "epoch": 1.6028440530731243, "grad_norm": 0.38100921827655265, "learning_rate": 9.99789285834815e-07, "loss": 0.3672, "step": 6734 }, { "epoch": 1.603082049146189, "grad_norm": 0.4088419649010157, "learning_rate": 9.98633135066412e-07, "loss": 0.3222, "step": 6735 }, { "epoch": 1.6033200452192538, "grad_norm": 0.3801150461937333, "learning_rate": 9.974775790001134e-07, "loss": 0.2863, "step": 6736 }, { "epoch": 1.6035580412923187, "grad_norm": 0.3695771436542104, "learning_rate": 9.963226178076646e-07, "loss": 0.3493, "step": 6737 }, { "epoch": 1.6037960373653835, "grad_norm": 0.4646018757207895, "learning_rate": 9.951682516607208e-07, "loss": 0.3526, "step": 6738 }, { "epoch": 1.6040340334384484, "grad_norm": 0.3739693404848989, "learning_rate": 9.940144807308494e-07, "loss": 0.2835, "step": 6739 }, { "epoch": 1.6042720295115132, "grad_norm": 0.38989411473136254, "learning_rate": 9.928613051895297e-07, "loss": 0.3219, "step": 6740 }, { "epoch": 1.604510025584578, "grad_norm": 0.39146856372760747, "learning_rate": 9.91708725208152e-07, "loss": 0.3897, "step": 6741 }, { "epoch": 1.6047480216576426, "grad_norm": 0.35756410647769543, "learning_rate": 9.90556740958017e-07, "loss": 0.3037, "step": 6742 }, { "epoch": 1.6049860177307074, "grad_norm": 0.41503456281085804, "learning_rate": 9.894053526103397e-07, "loss": 0.2837, "step": 6743 }, { "epoch": 1.605224013803772, "grad_norm": 0.37911206000367126, "learning_rate": 9.882545603362448e-07, "loss": 0.3649, "step": 6744 }, { "epoch": 1.605462009876837, "grad_norm": 0.39287333843325145, "learning_rate": 9.87104364306768e-07, "loss": 0.3827, "step": 6745 }, { "epoch": 1.6057000059499018, "grad_norm": 0.3623004181552304, "learning_rate": 9.859547646928568e-07, "loss": 0.2571, "step": 6746 }, { "epoch": 1.6059380020229668, "grad_norm": 0.3729145089679708, "learning_rate": 9.848057616653705e-07, "loss": 0.3106, "step": 6747 }, { "epoch": 1.6061759980960315, "grad_norm": 0.39121154395488567, "learning_rate": 9.83657355395079e-07, "loss": 0.3548, "step": 6748 }, { "epoch": 1.6064139941690962, "grad_norm": 0.3649057165068434, "learning_rate": 9.82509546052664e-07, "loss": 0.3453, "step": 6749 }, { "epoch": 1.606651990242161, "grad_norm": 0.38056432571485566, "learning_rate": 9.813623338087181e-07, "loss": 0.2907, "step": 6750 }, { "epoch": 1.6068899863152257, "grad_norm": 0.3773297281289237, "learning_rate": 9.802157188337464e-07, "loss": 0.3433, "step": 6751 }, { "epoch": 1.6071279823882905, "grad_norm": 0.37897532559459307, "learning_rate": 9.790697012981632e-07, "loss": 0.3886, "step": 6752 }, { "epoch": 1.6073659784613554, "grad_norm": 0.36839416215381326, "learning_rate": 9.77924281372295e-07, "loss": 0.2888, "step": 6753 }, { "epoch": 1.6076039745344202, "grad_norm": 0.40572797254422144, "learning_rate": 9.767794592263802e-07, "loss": 0.2758, "step": 6754 }, { "epoch": 1.6078419706074851, "grad_norm": 0.38690003338447304, "learning_rate": 9.756352350305676e-07, "loss": 0.3507, "step": 6755 }, { "epoch": 1.6080799666805499, "grad_norm": 0.3633536684182977, "learning_rate": 9.744916089549155e-07, "loss": 0.3725, "step": 6756 }, { "epoch": 1.6083179627536146, "grad_norm": 0.3430540268517774, "learning_rate": 9.733485811693983e-07, "loss": 0.2831, "step": 6757 }, { "epoch": 1.6085559588266793, "grad_norm": 0.3967941777054526, "learning_rate": 9.722061518438963e-07, "loss": 0.3197, "step": 6758 }, { "epoch": 1.608793954899744, "grad_norm": 0.3769154445985261, "learning_rate": 9.71064321148203e-07, "loss": 0.3843, "step": 6759 }, { "epoch": 1.6090319509728088, "grad_norm": 0.36745266921746866, "learning_rate": 9.699230892520222e-07, "loss": 0.2952, "step": 6760 }, { "epoch": 1.6092699470458738, "grad_norm": 0.3719289532826721, "learning_rate": 9.687824563249687e-07, "loss": 0.2952, "step": 6761 }, { "epoch": 1.6095079431189385, "grad_norm": 0.3850579375106196, "learning_rate": 9.67642422536571e-07, "loss": 0.3314, "step": 6762 }, { "epoch": 1.6097459391920035, "grad_norm": 0.36942355814013683, "learning_rate": 9.665029880562655e-07, "loss": 0.355, "step": 6763 }, { "epoch": 1.6099839352650682, "grad_norm": 0.37525710772280524, "learning_rate": 9.65364153053399e-07, "loss": 0.2903, "step": 6764 }, { "epoch": 1.610221931338133, "grad_norm": 0.37197742941684636, "learning_rate": 9.64225917697232e-07, "loss": 0.3024, "step": 6765 }, { "epoch": 1.6104599274111977, "grad_norm": 0.3768595495711356, "learning_rate": 9.630882821569338e-07, "loss": 0.3852, "step": 6766 }, { "epoch": 1.6106979234842624, "grad_norm": 0.35867128131233944, "learning_rate": 9.61951246601585e-07, "loss": 0.3093, "step": 6767 }, { "epoch": 1.6109359195573272, "grad_norm": 0.40435567311806875, "learning_rate": 9.608148112001774e-07, "loss": 0.262, "step": 6768 }, { "epoch": 1.6111739156303921, "grad_norm": 0.3786650690585147, "learning_rate": 9.59678976121614e-07, "loss": 0.319, "step": 6769 }, { "epoch": 1.6114119117034569, "grad_norm": 0.3873536911631197, "learning_rate": 9.58543741534706e-07, "loss": 0.367, "step": 6770 }, { "epoch": 1.6116499077765218, "grad_norm": 0.36824419357607824, "learning_rate": 9.574091076081799e-07, "loss": 0.2774, "step": 6771 }, { "epoch": 1.6118879038495866, "grad_norm": 0.3891611443567881, "learning_rate": 9.562750745106697e-07, "loss": 0.2913, "step": 6772 }, { "epoch": 1.6121258999226513, "grad_norm": 0.3995297635418241, "learning_rate": 9.551416424107202e-07, "loss": 0.3454, "step": 6773 }, { "epoch": 1.612363895995716, "grad_norm": 0.41453803457475424, "learning_rate": 9.54008811476787e-07, "loss": 0.3433, "step": 6774 }, { "epoch": 1.6126018920687808, "grad_norm": 0.39579544768901054, "learning_rate": 9.528765818772379e-07, "loss": 0.2877, "step": 6775 }, { "epoch": 1.6128398881418455, "grad_norm": 0.3488915743792545, "learning_rate": 9.517449537803497e-07, "loss": 0.3048, "step": 6776 }, { "epoch": 1.6130778842149105, "grad_norm": 0.38162237869505217, "learning_rate": 9.506139273543108e-07, "loss": 0.3919, "step": 6777 }, { "epoch": 1.6133158802879752, "grad_norm": 0.40658809756542225, "learning_rate": 9.494835027672189e-07, "loss": 0.2859, "step": 6778 }, { "epoch": 1.6135538763610402, "grad_norm": 0.4090597053962956, "learning_rate": 9.483536801870835e-07, "loss": 0.2859, "step": 6779 }, { "epoch": 1.613791872434105, "grad_norm": 0.4162769342871607, "learning_rate": 9.472244597818236e-07, "loss": 0.3559, "step": 6780 }, { "epoch": 1.6140298685071697, "grad_norm": 0.4127306882903097, "learning_rate": 9.460958417192706e-07, "loss": 0.3618, "step": 6781 }, { "epoch": 1.6142678645802344, "grad_norm": 0.3705494652288955, "learning_rate": 9.449678261671636e-07, "loss": 0.2495, "step": 6782 }, { "epoch": 1.6145058606532992, "grad_norm": 0.38615110793723045, "learning_rate": 9.438404132931534e-07, "loss": 0.3189, "step": 6783 }, { "epoch": 1.614743856726364, "grad_norm": 0.4247897491555896, "learning_rate": 9.427136032648038e-07, "loss": 0.3723, "step": 6784 }, { "epoch": 1.6149818527994289, "grad_norm": 0.38613015342079937, "learning_rate": 9.415873962495847e-07, "loss": 0.3214, "step": 6785 }, { "epoch": 1.6152198488724936, "grad_norm": 0.38838589089115877, "learning_rate": 9.40461792414879e-07, "loss": 0.3093, "step": 6786 }, { "epoch": 1.6154578449455586, "grad_norm": 0.4134809114242341, "learning_rate": 9.393367919279794e-07, "loss": 0.3258, "step": 6787 }, { "epoch": 1.6156958410186233, "grad_norm": 0.3657710453341731, "learning_rate": 9.382123949560868e-07, "loss": 0.3751, "step": 6788 }, { "epoch": 1.615933837091688, "grad_norm": 0.367777502716404, "learning_rate": 9.370886016663178e-07, "loss": 0.284, "step": 6789 }, { "epoch": 1.6161718331647528, "grad_norm": 0.37224518047407673, "learning_rate": 9.35965412225694e-07, "loss": 0.3233, "step": 6790 }, { "epoch": 1.6164098292378175, "grad_norm": 0.3679948065415524, "learning_rate": 9.348428268011489e-07, "loss": 0.38, "step": 6791 }, { "epoch": 1.6166478253108822, "grad_norm": 0.368226197018974, "learning_rate": 9.337208455595271e-07, "loss": 0.3483, "step": 6792 }, { "epoch": 1.6168858213839472, "grad_norm": 0.38742952029982053, "learning_rate": 9.325994686675826e-07, "loss": 0.2773, "step": 6793 }, { "epoch": 1.617123817457012, "grad_norm": 0.4010018284261614, "learning_rate": 9.314786962919798e-07, "loss": 0.3711, "step": 6794 }, { "epoch": 1.617361813530077, "grad_norm": 0.39828012262925994, "learning_rate": 9.303585285992933e-07, "loss": 0.3747, "step": 6795 }, { "epoch": 1.6175998096031416, "grad_norm": 0.41099074455886697, "learning_rate": 9.292389657560069e-07, "loss": 0.2781, "step": 6796 }, { "epoch": 1.6178378056762064, "grad_norm": 0.3833507644875006, "learning_rate": 9.281200079285152e-07, "loss": 0.2849, "step": 6797 }, { "epoch": 1.6180758017492711, "grad_norm": 0.38415300774108235, "learning_rate": 9.270016552831252e-07, "loss": 0.3648, "step": 6798 }, { "epoch": 1.6183137978223359, "grad_norm": 0.37049113228296326, "learning_rate": 9.258839079860499e-07, "loss": 0.3186, "step": 6799 }, { "epoch": 1.6185517938954006, "grad_norm": 0.3725759851299388, "learning_rate": 9.247667662034149e-07, "loss": 0.2857, "step": 6800 }, { "epoch": 1.6187897899684656, "grad_norm": 0.3974032467656143, "learning_rate": 9.236502301012546e-07, "loss": 0.3328, "step": 6801 }, { "epoch": 1.6190277860415303, "grad_norm": 0.37422414801531034, "learning_rate": 9.225342998455145e-07, "loss": 0.3822, "step": 6802 }, { "epoch": 1.6192657821145953, "grad_norm": 0.34722153079424223, "learning_rate": 9.214189756020487e-07, "loss": 0.2668, "step": 6803 }, { "epoch": 1.61950377818766, "grad_norm": 0.3888234856151576, "learning_rate": 9.203042575366228e-07, "loss": 0.2795, "step": 6804 }, { "epoch": 1.6197417742607247, "grad_norm": 0.40283307764457343, "learning_rate": 9.191901458149106e-07, "loss": 0.3836, "step": 6805 }, { "epoch": 1.6199797703337895, "grad_norm": 0.38472144664022295, "learning_rate": 9.180766406024971e-07, "loss": 0.3505, "step": 6806 }, { "epoch": 1.6202177664068542, "grad_norm": 0.37583986626555843, "learning_rate": 9.16963742064877e-07, "loss": 0.2628, "step": 6807 }, { "epoch": 1.620455762479919, "grad_norm": 0.362264134804524, "learning_rate": 9.158514503674543e-07, "loss": 0.3411, "step": 6808 }, { "epoch": 1.620693758552984, "grad_norm": 0.3769911946362758, "learning_rate": 9.14739765675543e-07, "loss": 0.371, "step": 6809 }, { "epoch": 1.6209317546260487, "grad_norm": 0.3853498348483161, "learning_rate": 9.136286881543666e-07, "loss": 0.314, "step": 6810 }, { "epoch": 1.6211697506991136, "grad_norm": 0.3983780077597233, "learning_rate": 9.125182179690584e-07, "loss": 0.2673, "step": 6811 }, { "epoch": 1.6214077467721784, "grad_norm": 0.37571986287733705, "learning_rate": 9.114083552846636e-07, "loss": 0.3361, "step": 6812 }, { "epoch": 1.621645742845243, "grad_norm": 0.3904265606928051, "learning_rate": 9.102991002661337e-07, "loss": 0.367, "step": 6813 }, { "epoch": 1.6218837389183078, "grad_norm": 0.4223581365461809, "learning_rate": 9.091904530783319e-07, "loss": 0.3114, "step": 6814 }, { "epoch": 1.6221217349913726, "grad_norm": 0.39388763487496037, "learning_rate": 9.080824138860295e-07, "loss": 0.3303, "step": 6815 }, { "epoch": 1.6223597310644373, "grad_norm": 0.39770354399393776, "learning_rate": 9.069749828539109e-07, "loss": 0.3909, "step": 6816 }, { "epoch": 1.6225977271375023, "grad_norm": 0.37675287316781486, "learning_rate": 9.058681601465663e-07, "loss": 0.3139, "step": 6817 }, { "epoch": 1.622835723210567, "grad_norm": 0.359202858527697, "learning_rate": 9.047619459284968e-07, "loss": 0.2548, "step": 6818 }, { "epoch": 1.623073719283632, "grad_norm": 0.3666753439305403, "learning_rate": 9.036563403641136e-07, "loss": 0.2944, "step": 6819 }, { "epoch": 1.6233117153566967, "grad_norm": 0.48209117748060715, "learning_rate": 9.025513436177368e-07, "loss": 0.3864, "step": 6820 }, { "epoch": 1.6235497114297615, "grad_norm": 0.3719293692788079, "learning_rate": 9.014469558535965e-07, "loss": 0.278, "step": 6821 }, { "epoch": 1.6237877075028262, "grad_norm": 0.3969227125064937, "learning_rate": 9.003431772358323e-07, "loss": 0.306, "step": 6822 }, { "epoch": 1.624025703575891, "grad_norm": 0.3951142656857, "learning_rate": 8.992400079284919e-07, "loss": 0.3504, "step": 6823 }, { "epoch": 1.6242636996489557, "grad_norm": 0.3481746671653846, "learning_rate": 8.981374480955347e-07, "loss": 0.3283, "step": 6824 }, { "epoch": 1.6245016957220206, "grad_norm": 0.3891762848601158, "learning_rate": 8.970354979008261e-07, "loss": 0.281, "step": 6825 }, { "epoch": 1.6247396917950854, "grad_norm": 0.43069665710742855, "learning_rate": 8.959341575081465e-07, "loss": 0.3199, "step": 6826 }, { "epoch": 1.6249776878681503, "grad_norm": 0.37051390449080696, "learning_rate": 8.948334270811809e-07, "loss": 0.3871, "step": 6827 }, { "epoch": 1.625215683941215, "grad_norm": 0.3880703377585865, "learning_rate": 8.937333067835247e-07, "loss": 0.2861, "step": 6828 }, { "epoch": 1.6254536800142798, "grad_norm": 0.384396753893017, "learning_rate": 8.926337967786836e-07, "loss": 0.2741, "step": 6829 }, { "epoch": 1.6256916760873446, "grad_norm": 0.3569238940042476, "learning_rate": 8.915348972300713e-07, "loss": 0.357, "step": 6830 }, { "epoch": 1.6259296721604093, "grad_norm": 0.3499062660519206, "learning_rate": 8.904366083010119e-07, "loss": 0.3204, "step": 6831 }, { "epoch": 1.626167668233474, "grad_norm": 0.38069461747113986, "learning_rate": 8.893389301547384e-07, "loss": 0.2716, "step": 6832 }, { "epoch": 1.626405664306539, "grad_norm": 0.35807608092297644, "learning_rate": 8.882418629543926e-07, "loss": 0.3028, "step": 6833 }, { "epoch": 1.6266436603796037, "grad_norm": 0.38443325525551314, "learning_rate": 8.871454068630259e-07, "loss": 0.3946, "step": 6834 }, { "epoch": 1.6268816564526687, "grad_norm": 0.357934918939066, "learning_rate": 8.860495620435989e-07, "loss": 0.3355, "step": 6835 }, { "epoch": 1.6271196525257334, "grad_norm": 0.36332940789321744, "learning_rate": 8.849543286589813e-07, "loss": 0.297, "step": 6836 }, { "epoch": 1.6273576485987982, "grad_norm": 0.39032305116436367, "learning_rate": 8.838597068719518e-07, "loss": 0.3483, "step": 6837 }, { "epoch": 1.627595644671863, "grad_norm": 0.37557326175473943, "learning_rate": 8.82765696845197e-07, "loss": 0.3611, "step": 6838 }, { "epoch": 1.6278336407449276, "grad_norm": 0.3693511552822045, "learning_rate": 8.816722987413162e-07, "loss": 0.2734, "step": 6839 }, { "epoch": 1.6280716368179924, "grad_norm": 0.3718337287364868, "learning_rate": 8.805795127228145e-07, "loss": 0.305, "step": 6840 }, { "epoch": 1.6283096328910573, "grad_norm": 0.3904012424314939, "learning_rate": 8.794873389521069e-07, "loss": 0.3579, "step": 6841 }, { "epoch": 1.628547628964122, "grad_norm": 0.3643275958018295, "learning_rate": 8.783957775915159e-07, "loss": 0.3106, "step": 6842 }, { "epoch": 1.628785625037187, "grad_norm": 0.38787892776494476, "learning_rate": 8.77304828803277e-07, "loss": 0.2696, "step": 6843 }, { "epoch": 1.6290236211102518, "grad_norm": 0.3660591486708151, "learning_rate": 8.762144927495309e-07, "loss": 0.3095, "step": 6844 }, { "epoch": 1.6292616171833165, "grad_norm": 0.39222908624194175, "learning_rate": 8.751247695923292e-07, "loss": 0.4224, "step": 6845 }, { "epoch": 1.6294996132563813, "grad_norm": 0.42296916733578066, "learning_rate": 8.740356594936311e-07, "loss": 0.2812, "step": 6846 }, { "epoch": 1.629737609329446, "grad_norm": 0.40203065491885415, "learning_rate": 8.729471626153052e-07, "loss": 0.345, "step": 6847 }, { "epoch": 1.6299756054025107, "grad_norm": 0.3806358325222802, "learning_rate": 8.7185927911913e-07, "loss": 0.3598, "step": 6848 }, { "epoch": 1.6302136014755757, "grad_norm": 0.3826490418036862, "learning_rate": 8.707720091667904e-07, "loss": 0.3363, "step": 6849 }, { "epoch": 1.6304515975486404, "grad_norm": 0.41820535006387766, "learning_rate": 8.696853529198829e-07, "loss": 0.2485, "step": 6850 }, { "epoch": 1.6306895936217054, "grad_norm": 0.3883852616080865, "learning_rate": 8.685993105399104e-07, "loss": 0.324, "step": 6851 }, { "epoch": 1.6309275896947701, "grad_norm": 0.3924806045692454, "learning_rate": 8.675138821882856e-07, "loss": 0.3843, "step": 6852 }, { "epoch": 1.6311655857678349, "grad_norm": 0.3886779181476492, "learning_rate": 8.664290680263321e-07, "loss": 0.2817, "step": 6853 }, { "epoch": 1.6314035818408996, "grad_norm": 0.41445337145235195, "learning_rate": 8.653448682152782e-07, "loss": 0.2835, "step": 6854 }, { "epoch": 1.6316415779139644, "grad_norm": 0.35876208864526293, "learning_rate": 8.642612829162639e-07, "loss": 0.3438, "step": 6855 }, { "epoch": 1.631879573987029, "grad_norm": 0.3470504035205238, "learning_rate": 8.631783122903353e-07, "loss": 0.3624, "step": 6856 }, { "epoch": 1.632117570060094, "grad_norm": 0.373620998827096, "learning_rate": 8.620959564984504e-07, "loss": 0.2853, "step": 6857 }, { "epoch": 1.6323555661331588, "grad_norm": 0.40447652300735565, "learning_rate": 8.610142157014728e-07, "loss": 0.3606, "step": 6858 }, { "epoch": 1.6325935622062238, "grad_norm": 0.39593981996363464, "learning_rate": 8.599330900601766e-07, "loss": 0.3431, "step": 6859 }, { "epoch": 1.6328315582792885, "grad_norm": 0.3592935150194725, "learning_rate": 8.588525797352432e-07, "loss": 0.3056, "step": 6860 }, { "epoch": 1.6330695543523532, "grad_norm": 0.37888268748077286, "learning_rate": 8.577726848872636e-07, "loss": 0.2669, "step": 6861 }, { "epoch": 1.633307550425418, "grad_norm": 0.3640350475107053, "learning_rate": 8.566934056767368e-07, "loss": 0.3408, "step": 6862 }, { "epoch": 1.6335455464984827, "grad_norm": 0.385536304300356, "learning_rate": 8.556147422640704e-07, "loss": 0.3648, "step": 6863 }, { "epoch": 1.6337835425715475, "grad_norm": 0.3924071742555982, "learning_rate": 8.545366948095802e-07, "loss": 0.2619, "step": 6864 }, { "epoch": 1.6340215386446124, "grad_norm": 0.40696056451988827, "learning_rate": 8.5345926347349e-07, "loss": 0.323, "step": 6865 }, { "epoch": 1.6342595347176772, "grad_norm": 0.42407201911219133, "learning_rate": 8.523824484159348e-07, "loss": 0.3849, "step": 6866 }, { "epoch": 1.6344975307907421, "grad_norm": 0.36284614007552857, "learning_rate": 8.513062497969554e-07, "loss": 0.337, "step": 6867 }, { "epoch": 1.6347355268638069, "grad_norm": 0.3887082396171208, "learning_rate": 8.502306677765004e-07, "loss": 0.2736, "step": 6868 }, { "epoch": 1.6349735229368716, "grad_norm": 0.38345494649062606, "learning_rate": 8.491557025144276e-07, "loss": 0.3312, "step": 6869 }, { "epoch": 1.6352115190099363, "grad_norm": 0.38028798193743946, "learning_rate": 8.480813541705057e-07, "loss": 0.375, "step": 6870 }, { "epoch": 1.635449515083001, "grad_norm": 0.38125998205133177, "learning_rate": 8.470076229044077e-07, "loss": 0.3121, "step": 6871 }, { "epoch": 1.6356875111560658, "grad_norm": 0.3801307871037892, "learning_rate": 8.45934508875717e-07, "loss": 0.3276, "step": 6872 }, { "epoch": 1.6359255072291308, "grad_norm": 0.4262493108275734, "learning_rate": 8.448620122439255e-07, "loss": 0.384, "step": 6873 }, { "epoch": 1.6361635033021955, "grad_norm": 0.3533409555026496, "learning_rate": 8.437901331684317e-07, "loss": 0.3331, "step": 6874 }, { "epoch": 1.6364014993752605, "grad_norm": 0.3740778241308746, "learning_rate": 8.427188718085438e-07, "loss": 0.2702, "step": 6875 }, { "epoch": 1.6366394954483252, "grad_norm": 0.3899836491292678, "learning_rate": 8.416482283234778e-07, "loss": 0.3263, "step": 6876 }, { "epoch": 1.63687749152139, "grad_norm": 0.37954634736543486, "learning_rate": 8.405782028723575e-07, "loss": 0.38, "step": 6877 }, { "epoch": 1.6371154875944547, "grad_norm": 0.3537922473440547, "learning_rate": 8.395087956142156e-07, "loss": 0.2528, "step": 6878 }, { "epoch": 1.6373534836675194, "grad_norm": 0.3919326904956134, "learning_rate": 8.384400067079923e-07, "loss": 0.2933, "step": 6879 }, { "epoch": 1.6375914797405842, "grad_norm": 0.3740391889328203, "learning_rate": 8.37371836312535e-07, "loss": 0.3497, "step": 6880 }, { "epoch": 1.6378294758136491, "grad_norm": 0.37761989502751364, "learning_rate": 8.363042845866021e-07, "loss": 0.3382, "step": 6881 }, { "epoch": 1.6380674718867139, "grad_norm": 0.38783064891752894, "learning_rate": 8.352373516888573e-07, "loss": 0.2931, "step": 6882 }, { "epoch": 1.6383054679597788, "grad_norm": 0.37480525199164966, "learning_rate": 8.341710377778739e-07, "loss": 0.3041, "step": 6883 }, { "epoch": 1.6385434640328436, "grad_norm": 0.37392017588457593, "learning_rate": 8.331053430121317e-07, "loss": 0.3653, "step": 6884 }, { "epoch": 1.6387814601059083, "grad_norm": 0.37701145032799044, "learning_rate": 8.320402675500195e-07, "loss": 0.2937, "step": 6885 }, { "epoch": 1.639019456178973, "grad_norm": 0.3811824330700152, "learning_rate": 8.309758115498334e-07, "loss": 0.2808, "step": 6886 }, { "epoch": 1.6392574522520378, "grad_norm": 0.3643999529788084, "learning_rate": 8.299119751697788e-07, "loss": 0.3547, "step": 6887 }, { "epoch": 1.6394954483251025, "grad_norm": 0.36502990685126363, "learning_rate": 8.288487585679677e-07, "loss": 0.3857, "step": 6888 }, { "epoch": 1.6397334443981675, "grad_norm": 0.3658225172262794, "learning_rate": 8.277861619024208e-07, "loss": 0.2652, "step": 6889 }, { "epoch": 1.6399714404712322, "grad_norm": 0.38230360243140593, "learning_rate": 8.267241853310654e-07, "loss": 0.3224, "step": 6890 }, { "epoch": 1.6402094365442972, "grad_norm": 0.38637764499463456, "learning_rate": 8.25662829011738e-07, "loss": 0.3791, "step": 6891 }, { "epoch": 1.640447432617362, "grad_norm": 0.38029656509707016, "learning_rate": 8.246020931021808e-07, "loss": 0.2924, "step": 6892 }, { "epoch": 1.6406854286904267, "grad_norm": 0.3747338210302809, "learning_rate": 8.235419777600484e-07, "loss": 0.2697, "step": 6893 }, { "epoch": 1.6409234247634914, "grad_norm": 0.3600089909702746, "learning_rate": 8.224824831428991e-07, "loss": 0.3374, "step": 6894 }, { "epoch": 1.6411614208365561, "grad_norm": 0.38204493116592925, "learning_rate": 8.21423609408199e-07, "loss": 0.3695, "step": 6895 }, { "epoch": 1.6413994169096209, "grad_norm": 0.3865945626227688, "learning_rate": 8.203653567133224e-07, "loss": 0.2677, "step": 6896 }, { "epoch": 1.6416374129826858, "grad_norm": 0.37313722469830596, "learning_rate": 8.193077252155545e-07, "loss": 0.2893, "step": 6897 }, { "epoch": 1.6418754090557506, "grad_norm": 0.3817816737768635, "learning_rate": 8.182507150720837e-07, "loss": 0.3498, "step": 6898 }, { "epoch": 1.6421134051288155, "grad_norm": 0.36328532420628223, "learning_rate": 8.171943264400084e-07, "loss": 0.3091, "step": 6899 }, { "epoch": 1.6423514012018803, "grad_norm": 0.40910572921600497, "learning_rate": 8.161385594763338e-07, "loss": 0.2732, "step": 6900 }, { "epoch": 1.642589397274945, "grad_norm": 0.4173927289371764, "learning_rate": 8.150834143379726e-07, "loss": 0.3009, "step": 6901 }, { "epoch": 1.6428273933480098, "grad_norm": 0.4031792044847936, "learning_rate": 8.140288911817462e-07, "loss": 0.3839, "step": 6902 }, { "epoch": 1.6430653894210745, "grad_norm": 0.36972889214424126, "learning_rate": 8.129749901643824e-07, "loss": 0.314, "step": 6903 }, { "epoch": 1.6433033854941392, "grad_norm": 0.3875768012510301, "learning_rate": 8.119217114425171e-07, "loss": 0.2766, "step": 6904 }, { "epoch": 1.6435413815672042, "grad_norm": 0.36865187596941734, "learning_rate": 8.108690551726938e-07, "loss": 0.3303, "step": 6905 }, { "epoch": 1.643779377640269, "grad_norm": 0.3707979238870622, "learning_rate": 8.098170215113627e-07, "loss": 0.3094, "step": 6906 }, { "epoch": 1.644017373713334, "grad_norm": 0.3961111362306213, "learning_rate": 8.087656106148811e-07, "loss": 0.262, "step": 6907 }, { "epoch": 1.6442553697863986, "grad_norm": 0.37905376446342726, "learning_rate": 8.077148226395171e-07, "loss": 0.2942, "step": 6908 }, { "epoch": 1.6444933658594634, "grad_norm": 0.3891989136340424, "learning_rate": 8.066646577414427e-07, "loss": 0.3634, "step": 6909 }, { "epoch": 1.6447313619325281, "grad_norm": 0.3635988958220919, "learning_rate": 8.056151160767384e-07, "loss": 0.3054, "step": 6910 }, { "epoch": 1.6449693580055929, "grad_norm": 0.3976482528225413, "learning_rate": 8.045661978013919e-07, "loss": 0.2771, "step": 6911 }, { "epoch": 1.6452073540786576, "grad_norm": 0.36402867904746283, "learning_rate": 8.035179030712981e-07, "loss": 0.343, "step": 6912 }, { "epoch": 1.6454453501517223, "grad_norm": 0.41373321397240326, "learning_rate": 8.0247023204226e-07, "loss": 0.3902, "step": 6913 }, { "epoch": 1.6456833462247873, "grad_norm": 0.36420015222611274, "learning_rate": 8.014231848699877e-07, "loss": 0.2814, "step": 6914 }, { "epoch": 1.645921342297852, "grad_norm": 0.3831807168567994, "learning_rate": 8.003767617100977e-07, "loss": 0.2808, "step": 6915 }, { "epoch": 1.646159338370917, "grad_norm": 0.5084315028646742, "learning_rate": 7.993309627181145e-07, "loss": 0.3751, "step": 6916 }, { "epoch": 1.6463973344439817, "grad_norm": 0.37628750772345293, "learning_rate": 7.982857880494699e-07, "loss": 0.3378, "step": 6917 }, { "epoch": 1.6466353305170465, "grad_norm": 0.47297885018084573, "learning_rate": 7.972412378595024e-07, "loss": 0.2825, "step": 6918 }, { "epoch": 1.6468733265901112, "grad_norm": 0.40733630305637625, "learning_rate": 7.961973123034572e-07, "loss": 0.3292, "step": 6919 }, { "epoch": 1.647111322663176, "grad_norm": 0.3636765632920138, "learning_rate": 7.951540115364892e-07, "loss": 0.3736, "step": 6920 }, { "epoch": 1.6473493187362407, "grad_norm": 0.3599660837275332, "learning_rate": 7.941113357136587e-07, "loss": 0.2892, "step": 6921 }, { "epoch": 1.6475873148093056, "grad_norm": 0.3781156441865842, "learning_rate": 7.930692849899319e-07, "loss": 0.2934, "step": 6922 }, { "epoch": 1.6478253108823704, "grad_norm": 0.39701363259275885, "learning_rate": 7.920278595201825e-07, "loss": 0.369, "step": 6923 }, { "epoch": 1.6480633069554353, "grad_norm": 0.37156701594123176, "learning_rate": 7.909870594591951e-07, "loss": 0.3413, "step": 6924 }, { "epoch": 1.6483013030285, "grad_norm": 0.41917032707799795, "learning_rate": 7.89946884961656e-07, "loss": 0.2803, "step": 6925 }, { "epoch": 1.6485392991015648, "grad_norm": 0.4065671522305405, "learning_rate": 7.88907336182162e-07, "loss": 0.3162, "step": 6926 }, { "epoch": 1.6487772951746296, "grad_norm": 0.3790168785337112, "learning_rate": 7.878684132752152e-07, "loss": 0.3934, "step": 6927 }, { "epoch": 1.6490152912476943, "grad_norm": 0.3708736912100001, "learning_rate": 7.868301163952253e-07, "loss": 0.2832, "step": 6928 }, { "epoch": 1.649253287320759, "grad_norm": 0.3935121315020632, "learning_rate": 7.857924456965083e-07, "loss": 0.3096, "step": 6929 }, { "epoch": 1.649491283393824, "grad_norm": 0.3818251147778019, "learning_rate": 7.847554013332892e-07, "loss": 0.3547, "step": 6930 }, { "epoch": 1.6497292794668887, "grad_norm": 0.38974578717813124, "learning_rate": 7.83718983459697e-07, "loss": 0.3627, "step": 6931 }, { "epoch": 1.6499672755399537, "grad_norm": 0.3757552764551271, "learning_rate": 7.8268319222977e-07, "loss": 0.3034, "step": 6932 }, { "epoch": 1.6502052716130184, "grad_norm": 0.384816533453232, "learning_rate": 7.81648027797452e-07, "loss": 0.3372, "step": 6933 }, { "epoch": 1.6504432676860832, "grad_norm": 0.3667467367151375, "learning_rate": 7.806134903165935e-07, "loss": 0.3632, "step": 6934 }, { "epoch": 1.650681263759148, "grad_norm": 0.3586707968143236, "learning_rate": 7.795795799409522e-07, "loss": 0.301, "step": 6935 }, { "epoch": 1.6509192598322127, "grad_norm": 0.40472475833277655, "learning_rate": 7.785462968241947e-07, "loss": 0.2845, "step": 6936 }, { "epoch": 1.6511572559052774, "grad_norm": 0.47875293593979357, "learning_rate": 7.775136411198914e-07, "loss": 0.3537, "step": 6937 }, { "epoch": 1.6513952519783424, "grad_norm": 0.3744577504265213, "learning_rate": 7.764816129815201e-07, "loss": 0.3607, "step": 6938 }, { "epoch": 1.651633248051407, "grad_norm": 0.37901981094401166, "learning_rate": 7.754502125624658e-07, "loss": 0.2718, "step": 6939 }, { "epoch": 1.651871244124472, "grad_norm": 0.373755700776998, "learning_rate": 7.744194400160204e-07, "loss": 0.2972, "step": 6940 }, { "epoch": 1.6521092401975368, "grad_norm": 0.39240847812458995, "learning_rate": 7.73389295495382e-07, "loss": 0.3436, "step": 6941 }, { "epoch": 1.6523472362706015, "grad_norm": 0.39288015487594496, "learning_rate": 7.723597791536553e-07, "loss": 0.3164, "step": 6942 }, { "epoch": 1.6525852323436663, "grad_norm": 0.3862794779553621, "learning_rate": 7.713308911438527e-07, "loss": 0.3161, "step": 6943 }, { "epoch": 1.652823228416731, "grad_norm": 0.4601597581604055, "learning_rate": 7.703026316188916e-07, "loss": 0.3288, "step": 6944 }, { "epoch": 1.6530612244897958, "grad_norm": 0.38403595804884394, "learning_rate": 7.692750007315969e-07, "loss": 0.4199, "step": 6945 }, { "epoch": 1.6532992205628607, "grad_norm": 0.3689442026310426, "learning_rate": 7.682479986346996e-07, "loss": 0.2934, "step": 6946 }, { "epoch": 1.6535372166359255, "grad_norm": 0.364745434398555, "learning_rate": 7.67221625480839e-07, "loss": 0.2731, "step": 6947 }, { "epoch": 1.6537752127089904, "grad_norm": 0.438406775655767, "learning_rate": 7.66195881422559e-07, "loss": 0.3691, "step": 6948 }, { "epoch": 1.6540132087820552, "grad_norm": 0.3721049405974233, "learning_rate": 7.651707666123098e-07, "loss": 0.3326, "step": 6949 }, { "epoch": 1.65425120485512, "grad_norm": 0.3894342319600942, "learning_rate": 7.641462812024486e-07, "loss": 0.2969, "step": 6950 }, { "epoch": 1.6544892009281846, "grad_norm": 0.3805823643582205, "learning_rate": 7.631224253452408e-07, "loss": 0.3233, "step": 6951 }, { "epoch": 1.6547271970012494, "grad_norm": 0.42081777893965966, "learning_rate": 7.620991991928561e-07, "loss": 0.3882, "step": 6952 }, { "epoch": 1.6549651930743141, "grad_norm": 0.37868479763553864, "learning_rate": 7.61076602897371e-07, "loss": 0.3121, "step": 6953 }, { "epoch": 1.655203189147379, "grad_norm": 0.4244582354427234, "learning_rate": 7.600546366107686e-07, "loss": 0.2845, "step": 6954 }, { "epoch": 1.6554411852204438, "grad_norm": 0.47303463276050134, "learning_rate": 7.590333004849387e-07, "loss": 0.377, "step": 6955 }, { "epoch": 1.6556791812935088, "grad_norm": 0.3665165569695061, "learning_rate": 7.580125946716765e-07, "loss": 0.3637, "step": 6956 }, { "epoch": 1.6559171773665735, "grad_norm": 0.379471968924663, "learning_rate": 7.569925193226846e-07, "loss": 0.2889, "step": 6957 }, { "epoch": 1.6561551734396383, "grad_norm": 0.38456329877923556, "learning_rate": 7.55973074589571e-07, "loss": 0.3245, "step": 6958 }, { "epoch": 1.656393169512703, "grad_norm": 0.3852464205929202, "learning_rate": 7.549542606238508e-07, "loss": 0.388, "step": 6959 }, { "epoch": 1.6566311655857677, "grad_norm": 0.35614680443106633, "learning_rate": 7.53936077576945e-07, "loss": 0.3175, "step": 6960 }, { "epoch": 1.6568691616588325, "grad_norm": 0.3892713822547582, "learning_rate": 7.529185256001803e-07, "loss": 0.2797, "step": 6961 }, { "epoch": 1.6571071577318974, "grad_norm": 0.3748547210585344, "learning_rate": 7.519016048447908e-07, "loss": 0.3167, "step": 6962 }, { "epoch": 1.6573451538049622, "grad_norm": 0.37044782848474667, "learning_rate": 7.508853154619145e-07, "loss": 0.3462, "step": 6963 }, { "epoch": 1.6575831498780271, "grad_norm": 0.3581984373631393, "learning_rate": 7.498696576025993e-07, "loss": 0.2675, "step": 6964 }, { "epoch": 1.6578211459510919, "grad_norm": 0.3814135531783868, "learning_rate": 7.488546314177964e-07, "loss": 0.3111, "step": 6965 }, { "epoch": 1.6580591420241566, "grad_norm": 0.3994529997922176, "learning_rate": 7.478402370583631e-07, "loss": 0.3391, "step": 6966 }, { "epoch": 1.6582971380972213, "grad_norm": 0.35098177887130955, "learning_rate": 7.468264746750642e-07, "loss": 0.2783, "step": 6967 }, { "epoch": 1.658535134170286, "grad_norm": 0.3932454998383334, "learning_rate": 7.458133444185694e-07, "loss": 0.2877, "step": 6968 }, { "epoch": 1.6587731302433508, "grad_norm": 0.4124862836963177, "learning_rate": 7.448008464394557e-07, "loss": 0.3172, "step": 6969 }, { "epoch": 1.6590111263164158, "grad_norm": 0.4156043963623505, "learning_rate": 7.437889808882043e-07, "loss": 0.3898, "step": 6970 }, { "epoch": 1.6592491223894805, "grad_norm": 0.3664091808631948, "learning_rate": 7.427777479152043e-07, "loss": 0.2909, "step": 6971 }, { "epoch": 1.6594871184625455, "grad_norm": 0.3897124758944131, "learning_rate": 7.417671476707489e-07, "loss": 0.3297, "step": 6972 }, { "epoch": 1.6597251145356102, "grad_norm": 0.3893820163964789, "learning_rate": 7.407571803050384e-07, "loss": 0.3597, "step": 6973 }, { "epoch": 1.659963110608675, "grad_norm": 0.3695387069491869, "learning_rate": 7.397478459681806e-07, "loss": 0.3694, "step": 6974 }, { "epoch": 1.6602011066817397, "grad_norm": 0.39522818795728604, "learning_rate": 7.387391448101861e-07, "loss": 0.282, "step": 6975 }, { "epoch": 1.6604391027548044, "grad_norm": 0.40266534305717183, "learning_rate": 7.377310769809736e-07, "loss": 0.3463, "step": 6976 }, { "epoch": 1.6606770988278692, "grad_norm": 0.3563962402516382, "learning_rate": 7.367236426303653e-07, "loss": 0.3897, "step": 6977 }, { "epoch": 1.6609150949009341, "grad_norm": 0.35469458517661445, "learning_rate": 7.357168419080929e-07, "loss": 0.2726, "step": 6978 }, { "epoch": 1.6611530909739989, "grad_norm": 0.38451642874884295, "learning_rate": 7.347106749637911e-07, "loss": 0.3007, "step": 6979 }, { "epoch": 1.6613910870470638, "grad_norm": 0.3867505955294661, "learning_rate": 7.337051419470014e-07, "loss": 0.3803, "step": 6980 }, { "epoch": 1.6616290831201286, "grad_norm": 0.36921539064006076, "learning_rate": 7.327002430071706e-07, "loss": 0.3494, "step": 6981 }, { "epoch": 1.6618670791931933, "grad_norm": 0.3865817459852862, "learning_rate": 7.316959782936516e-07, "loss": 0.2724, "step": 6982 }, { "epoch": 1.662105075266258, "grad_norm": 0.38384403692006913, "learning_rate": 7.306923479557032e-07, "loss": 0.3179, "step": 6983 }, { "epoch": 1.6623430713393228, "grad_norm": 0.3753237383050048, "learning_rate": 7.296893521424891e-07, "loss": 0.3753, "step": 6984 }, { "epoch": 1.6625810674123875, "grad_norm": 0.37042483156179623, "learning_rate": 7.286869910030797e-07, "loss": 0.307, "step": 6985 }, { "epoch": 1.6628190634854525, "grad_norm": 0.3789501748283688, "learning_rate": 7.276852646864507e-07, "loss": 0.3027, "step": 6986 }, { "epoch": 1.6630570595585172, "grad_norm": 0.4135444826893321, "learning_rate": 7.266841733414837e-07, "loss": 0.3552, "step": 6987 }, { "epoch": 1.6632950556315822, "grad_norm": 0.39788532592242914, "learning_rate": 7.256837171169651e-07, "loss": 0.3437, "step": 6988 }, { "epoch": 1.663533051704647, "grad_norm": 0.3593638442974499, "learning_rate": 7.246838961615877e-07, "loss": 0.3051, "step": 6989 }, { "epoch": 1.6637710477777117, "grad_norm": 0.4159858649381969, "learning_rate": 7.23684710623948e-07, "loss": 0.3217, "step": 6990 }, { "epoch": 1.6640090438507764, "grad_norm": 0.3672974524927207, "learning_rate": 7.226861606525526e-07, "loss": 0.3805, "step": 6991 }, { "epoch": 1.6642470399238412, "grad_norm": 0.3787693493770054, "learning_rate": 7.216882463958091e-07, "loss": 0.3179, "step": 6992 }, { "epoch": 1.664485035996906, "grad_norm": 0.3796098325253126, "learning_rate": 7.20690968002033e-07, "loss": 0.2713, "step": 6993 }, { "epoch": 1.6647230320699709, "grad_norm": 0.38051154387730174, "learning_rate": 7.196943256194439e-07, "loss": 0.2955, "step": 6994 }, { "epoch": 1.6649610281430356, "grad_norm": 0.38260035859588687, "learning_rate": 7.186983193961677e-07, "loss": 0.3802, "step": 6995 }, { "epoch": 1.6651990242161006, "grad_norm": 0.3560984048327247, "learning_rate": 7.177029494802351e-07, "loss": 0.3178, "step": 6996 }, { "epoch": 1.6654370202891653, "grad_norm": 0.38924386944904715, "learning_rate": 7.167082160195837e-07, "loss": 0.2741, "step": 6997 }, { "epoch": 1.66567501636223, "grad_norm": 0.4039947533316631, "learning_rate": 7.157141191620548e-07, "loss": 0.3479, "step": 6998 }, { "epoch": 1.6659130124352948, "grad_norm": 0.38643709485362443, "learning_rate": 7.147206590553956e-07, "loss": 0.3369, "step": 6999 }, { "epoch": 1.6661510085083595, "grad_norm": 0.40129045045890266, "learning_rate": 7.137278358472583e-07, "loss": 0.2818, "step": 7000 }, { "epoch": 1.6663890045814242, "grad_norm": 0.4499031615543838, "learning_rate": 7.127356496852029e-07, "loss": 0.3035, "step": 7001 }, { "epoch": 1.6666270006544892, "grad_norm": 0.3748939127556976, "learning_rate": 7.117441007166919e-07, "loss": 0.3819, "step": 7002 }, { "epoch": 1.666864996727554, "grad_norm": 0.35881304656890556, "learning_rate": 7.10753189089094e-07, "loss": 0.2835, "step": 7003 }, { "epoch": 1.667102992800619, "grad_norm": 0.41355655256875934, "learning_rate": 7.097629149496815e-07, "loss": 0.2988, "step": 7004 }, { "epoch": 1.6673409888736836, "grad_norm": 0.6309979385551239, "learning_rate": 7.087732784456369e-07, "loss": 0.3538, "step": 7005 }, { "epoch": 1.6675789849467484, "grad_norm": 0.37545921085712725, "learning_rate": 7.077842797240426e-07, "loss": 0.3416, "step": 7006 }, { "epoch": 1.6678169810198131, "grad_norm": 0.365443297712444, "learning_rate": 7.067959189318885e-07, "loss": 0.2977, "step": 7007 }, { "epoch": 1.6680549770928779, "grad_norm": 0.37039435287713296, "learning_rate": 7.058081962160696e-07, "loss": 0.3164, "step": 7008 }, { "epoch": 1.6682929731659426, "grad_norm": 0.37427937679786044, "learning_rate": 7.048211117233861e-07, "loss": 0.3486, "step": 7009 }, { "epoch": 1.6685309692390076, "grad_norm": 0.3729855514273312, "learning_rate": 7.038346656005429e-07, "loss": 0.2989, "step": 7010 }, { "epoch": 1.6687689653120723, "grad_norm": 0.374090406829846, "learning_rate": 7.028488579941506e-07, "loss": 0.2792, "step": 7011 }, { "epoch": 1.6690069613851373, "grad_norm": 0.39515663629452236, "learning_rate": 7.018636890507241e-07, "loss": 0.3659, "step": 7012 }, { "epoch": 1.669244957458202, "grad_norm": 0.35372595983883504, "learning_rate": 7.008791589166847e-07, "loss": 0.3607, "step": 7013 }, { "epoch": 1.6694829535312667, "grad_norm": 0.3533302020072786, "learning_rate": 6.998952677383569e-07, "loss": 0.276, "step": 7014 }, { "epoch": 1.6697209496043315, "grad_norm": 0.42475735192235214, "learning_rate": 6.989120156619717e-07, "loss": 0.3186, "step": 7015 }, { "epoch": 1.6699589456773962, "grad_norm": 0.38764230011040096, "learning_rate": 6.979294028336652e-07, "loss": 0.3573, "step": 7016 }, { "epoch": 1.670196941750461, "grad_norm": 0.3611283372327025, "learning_rate": 6.969474293994771e-07, "loss": 0.3349, "step": 7017 }, { "epoch": 1.670434937823526, "grad_norm": 0.36366689855170664, "learning_rate": 6.959660955053527e-07, "loss": 0.296, "step": 7018 }, { "epoch": 1.6706729338965907, "grad_norm": 0.5422835131958719, "learning_rate": 6.949854012971441e-07, "loss": 0.3383, "step": 7019 }, { "epoch": 1.6709109299696556, "grad_norm": 0.35747903910606177, "learning_rate": 6.94005346920606e-07, "loss": 0.4007, "step": 7020 }, { "epoch": 1.6711489260427204, "grad_norm": 0.36742844091875815, "learning_rate": 6.930259325213978e-07, "loss": 0.2987, "step": 7021 }, { "epoch": 1.671386922115785, "grad_norm": 0.40350636301013826, "learning_rate": 6.920471582450861e-07, "loss": 0.3304, "step": 7022 }, { "epoch": 1.6716249181888498, "grad_norm": 0.40006906898219396, "learning_rate": 6.910690242371404e-07, "loss": 0.3446, "step": 7023 }, { "epoch": 1.6718629142619146, "grad_norm": 0.35727853106570195, "learning_rate": 6.900915306429351e-07, "loss": 0.3153, "step": 7024 }, { "epoch": 1.6721009103349793, "grad_norm": 0.382181007945197, "learning_rate": 6.891146776077507e-07, "loss": 0.2882, "step": 7025 }, { "epoch": 1.6723389064080443, "grad_norm": 0.4060235764358048, "learning_rate": 6.881384652767709e-07, "loss": 0.3207, "step": 7026 }, { "epoch": 1.672576902481109, "grad_norm": 0.37097326742176884, "learning_rate": 6.871628937950848e-07, "loss": 0.3922, "step": 7027 }, { "epoch": 1.672814898554174, "grad_norm": 0.40285375164369536, "learning_rate": 6.861879633076878e-07, "loss": 0.2923, "step": 7028 }, { "epoch": 1.6730528946272387, "grad_norm": 0.37801910915821857, "learning_rate": 6.852136739594783e-07, "loss": 0.2882, "step": 7029 }, { "epoch": 1.6732908907003035, "grad_norm": 0.36728422194745036, "learning_rate": 6.842400258952597e-07, "loss": 0.3977, "step": 7030 }, { "epoch": 1.6735288867733682, "grad_norm": 0.39561139457941275, "learning_rate": 6.832670192597396e-07, "loss": 0.3719, "step": 7031 }, { "epoch": 1.673766882846433, "grad_norm": 0.37538581127020354, "learning_rate": 6.822946541975306e-07, "loss": 0.2774, "step": 7032 }, { "epoch": 1.6740048789194977, "grad_norm": 0.38177066214722755, "learning_rate": 6.813229308531516e-07, "loss": 0.3272, "step": 7033 }, { "epoch": 1.6742428749925626, "grad_norm": 0.38578694289159443, "learning_rate": 6.803518493710243e-07, "loss": 0.4126, "step": 7034 }, { "epoch": 1.6744808710656274, "grad_norm": 0.36307309359597634, "learning_rate": 6.793814098954749e-07, "loss": 0.331, "step": 7035 }, { "epoch": 1.6747188671386923, "grad_norm": 0.4000089226486024, "learning_rate": 6.784116125707352e-07, "loss": 0.267, "step": 7036 }, { "epoch": 1.674956863211757, "grad_norm": 0.36487357951661303, "learning_rate": 6.774424575409405e-07, "loss": 0.3127, "step": 7037 }, { "epoch": 1.6751948592848218, "grad_norm": 0.35391638388560076, "learning_rate": 6.764739449501317e-07, "loss": 0.3769, "step": 7038 }, { "epoch": 1.6754328553578866, "grad_norm": 0.35186947926475576, "learning_rate": 6.755060749422537e-07, "loss": 0.2923, "step": 7039 }, { "epoch": 1.6756708514309513, "grad_norm": 0.44135361184534583, "learning_rate": 6.745388476611553e-07, "loss": 0.2981, "step": 7040 }, { "epoch": 1.675908847504016, "grad_norm": 0.45872802613072794, "learning_rate": 6.735722632505915e-07, "loss": 0.37, "step": 7041 }, { "epoch": 1.676146843577081, "grad_norm": 0.3580957612187762, "learning_rate": 6.726063218542195e-07, "loss": 0.314, "step": 7042 }, { "epoch": 1.6763848396501457, "grad_norm": 0.35669432760875613, "learning_rate": 6.716410236156029e-07, "loss": 0.3014, "step": 7043 }, { "epoch": 1.6766228357232107, "grad_norm": 0.38931922786060297, "learning_rate": 6.706763686782086e-07, "loss": 0.3144, "step": 7044 }, { "epoch": 1.6768608317962754, "grad_norm": 0.40124294635503877, "learning_rate": 6.697123571854075e-07, "loss": 0.3697, "step": 7045 }, { "epoch": 1.6770988278693402, "grad_norm": 0.4969890421919046, "learning_rate": 6.687489892804766e-07, "loss": 0.3089, "step": 7046 }, { "epoch": 1.677336823942405, "grad_norm": 0.3566446713995905, "learning_rate": 6.677862651065964e-07, "loss": 0.2827, "step": 7047 }, { "epoch": 1.6775748200154696, "grad_norm": 0.38848733751656256, "learning_rate": 6.668241848068507e-07, "loss": 0.3592, "step": 7048 }, { "epoch": 1.6778128160885344, "grad_norm": 0.5950856594781679, "learning_rate": 6.658627485242291e-07, "loss": 0.3444, "step": 7049 }, { "epoch": 1.6780508121615993, "grad_norm": 0.36830146545513864, "learning_rate": 6.649019564016246e-07, "loss": 0.2709, "step": 7050 }, { "epoch": 1.678288808234664, "grad_norm": 0.37103816586070554, "learning_rate": 6.639418085818339e-07, "loss": 0.3196, "step": 7051 }, { "epoch": 1.678526804307729, "grad_norm": 0.38203373085536374, "learning_rate": 6.629823052075602e-07, "loss": 0.402, "step": 7052 }, { "epoch": 1.6787648003807938, "grad_norm": 0.3476339385449799, "learning_rate": 6.620234464214076e-07, "loss": 0.3066, "step": 7053 }, { "epoch": 1.6790027964538585, "grad_norm": 0.3792905804858758, "learning_rate": 6.610652323658867e-07, "loss": 0.2825, "step": 7054 }, { "epoch": 1.6792407925269233, "grad_norm": 0.43917028015511345, "learning_rate": 6.601076631834135e-07, "loss": 0.3524, "step": 7055 }, { "epoch": 1.679478788599988, "grad_norm": 0.35999526768311224, "learning_rate": 6.591507390163049e-07, "loss": 0.3362, "step": 7056 }, { "epoch": 1.6797167846730527, "grad_norm": 0.3958931440700458, "learning_rate": 6.581944600067847e-07, "loss": 0.299, "step": 7057 }, { "epoch": 1.6799547807461177, "grad_norm": 0.40723904451700604, "learning_rate": 6.572388262969781e-07, "loss": 0.3382, "step": 7058 }, { "epoch": 1.6801927768191824, "grad_norm": 0.37213316814749897, "learning_rate": 6.562838380289155e-07, "loss": 0.3613, "step": 7059 }, { "epoch": 1.6804307728922474, "grad_norm": 0.3775949694068757, "learning_rate": 6.553294953445344e-07, "loss": 0.3037, "step": 7060 }, { "epoch": 1.6806687689653121, "grad_norm": 0.4302521600801045, "learning_rate": 6.543757983856724e-07, "loss": 0.2837, "step": 7061 }, { "epoch": 1.6809067650383769, "grad_norm": 0.3744153721393427, "learning_rate": 6.534227472940718e-07, "loss": 0.3633, "step": 7062 }, { "epoch": 1.6811447611114416, "grad_norm": 0.3601321543703086, "learning_rate": 6.524703422113803e-07, "loss": 0.3534, "step": 7063 }, { "epoch": 1.6813827571845064, "grad_norm": 0.3978043541470634, "learning_rate": 6.515185832791493e-07, "loss": 0.2833, "step": 7064 }, { "epoch": 1.681620753257571, "grad_norm": 0.3650335732859001, "learning_rate": 6.50567470638832e-07, "loss": 0.3149, "step": 7065 }, { "epoch": 1.681858749330636, "grad_norm": 0.38338937802115375, "learning_rate": 6.49617004431789e-07, "loss": 0.3667, "step": 7066 }, { "epoch": 1.6820967454037008, "grad_norm": 0.40126636400041343, "learning_rate": 6.486671847992826e-07, "loss": 0.3275, "step": 7067 }, { "epoch": 1.6823347414767658, "grad_norm": 0.4648961323218715, "learning_rate": 6.477180118824788e-07, "loss": 0.2984, "step": 7068 }, { "epoch": 1.6825727375498305, "grad_norm": 0.3776079096365654, "learning_rate": 6.467694858224488e-07, "loss": 0.3161, "step": 7069 }, { "epoch": 1.6828107336228952, "grad_norm": 0.3654293611333949, "learning_rate": 6.458216067601669e-07, "loss": 0.3563, "step": 7070 }, { "epoch": 1.68304872969596, "grad_norm": 0.3938912402346958, "learning_rate": 6.448743748365116e-07, "loss": 0.2843, "step": 7071 }, { "epoch": 1.6832867257690247, "grad_norm": 0.40857518732879755, "learning_rate": 6.439277901922647e-07, "loss": 0.2857, "step": 7072 }, { "epoch": 1.6835247218420895, "grad_norm": 0.37369837115535326, "learning_rate": 6.429818529681115e-07, "loss": 0.3408, "step": 7073 }, { "epoch": 1.6837627179151544, "grad_norm": 0.566489154234049, "learning_rate": 6.420365633046433e-07, "loss": 0.3525, "step": 7074 }, { "epoch": 1.6840007139882192, "grad_norm": 0.40096527952286826, "learning_rate": 6.410919213423522e-07, "loss": 0.2766, "step": 7075 }, { "epoch": 1.6842387100612841, "grad_norm": 0.3835017576074419, "learning_rate": 6.40147927221636e-07, "loss": 0.3331, "step": 7076 }, { "epoch": 1.6844767061343489, "grad_norm": 0.4193203253283927, "learning_rate": 6.392045810827957e-07, "loss": 0.3989, "step": 7077 }, { "epoch": 1.6847147022074136, "grad_norm": 0.3714016805455296, "learning_rate": 6.382618830660353e-07, "loss": 0.2883, "step": 7078 }, { "epoch": 1.6849526982804783, "grad_norm": 0.381171748269651, "learning_rate": 6.373198333114633e-07, "loss": 0.2767, "step": 7079 }, { "epoch": 1.685190694353543, "grad_norm": 0.39675713102514104, "learning_rate": 6.363784319590916e-07, "loss": 0.3425, "step": 7080 }, { "epoch": 1.6854286904266078, "grad_norm": 0.4002931733011814, "learning_rate": 6.354376791488343e-07, "loss": 0.365, "step": 7081 }, { "epoch": 1.6856666864996728, "grad_norm": 0.37518227406606824, "learning_rate": 6.344975750205129e-07, "loss": 0.2542, "step": 7082 }, { "epoch": 1.6859046825727375, "grad_norm": 0.3638772212144532, "learning_rate": 6.335581197138496e-07, "loss": 0.3343, "step": 7083 }, { "epoch": 1.6861426786458025, "grad_norm": 0.3890298957493163, "learning_rate": 6.326193133684705e-07, "loss": 0.3964, "step": 7084 }, { "epoch": 1.6863806747188672, "grad_norm": 0.36557376045651974, "learning_rate": 6.31681156123905e-07, "loss": 0.2866, "step": 7085 }, { "epoch": 1.686618670791932, "grad_norm": 0.43312850934266955, "learning_rate": 6.307436481195866e-07, "loss": 0.2639, "step": 7086 }, { "epoch": 1.6868566668649967, "grad_norm": 0.3528390333914725, "learning_rate": 6.298067894948512e-07, "loss": 0.3374, "step": 7087 }, { "epoch": 1.6870946629380614, "grad_norm": 0.34344204575606535, "learning_rate": 6.288705803889411e-07, "loss": 0.3649, "step": 7088 }, { "epoch": 1.6873326590111262, "grad_norm": 0.38507748165524075, "learning_rate": 6.279350209409995e-07, "loss": 0.2789, "step": 7089 }, { "epoch": 1.6875706550841911, "grad_norm": 0.3803975185088582, "learning_rate": 6.270001112900736e-07, "loss": 0.3158, "step": 7090 }, { "epoch": 1.6878086511572559, "grad_norm": 0.3872558923912277, "learning_rate": 6.260658515751139e-07, "loss": 0.3569, "step": 7091 }, { "epoch": 1.6880466472303208, "grad_norm": 0.3794470357203257, "learning_rate": 6.251322419349748e-07, "loss": 0.2971, "step": 7092 }, { "epoch": 1.6882846433033856, "grad_norm": 0.38546650413181716, "learning_rate": 6.241992825084131e-07, "loss": 0.2802, "step": 7093 }, { "epoch": 1.6885226393764503, "grad_norm": 0.37651470887537414, "learning_rate": 6.232669734340907e-07, "loss": 0.2954, "step": 7094 }, { "epoch": 1.688760635449515, "grad_norm": 0.3885988985334594, "learning_rate": 6.223353148505706e-07, "loss": 0.3914, "step": 7095 }, { "epoch": 1.6889986315225798, "grad_norm": 0.3511153525219931, "learning_rate": 6.21404306896321e-07, "loss": 0.2711, "step": 7096 }, { "epoch": 1.6892366275956445, "grad_norm": 0.37884092771461375, "learning_rate": 6.204739497097129e-07, "loss": 0.2851, "step": 7097 }, { "epoch": 1.6894746236687095, "grad_norm": 0.387337614231269, "learning_rate": 6.1954424342902e-07, "loss": 0.3698, "step": 7098 }, { "epoch": 1.6897126197417742, "grad_norm": 0.3546161227406718, "learning_rate": 6.186151881924202e-07, "loss": 0.3163, "step": 7099 }, { "epoch": 1.6899506158148392, "grad_norm": 0.3730093557742399, "learning_rate": 6.176867841379919e-07, "loss": 0.2882, "step": 7100 }, { "epoch": 1.690188611887904, "grad_norm": 0.38800933074459065, "learning_rate": 6.16759031403722e-07, "loss": 0.3274, "step": 7101 }, { "epoch": 1.6904266079609687, "grad_norm": 0.39567528441392813, "learning_rate": 6.158319301274962e-07, "loss": 0.3702, "step": 7102 }, { "epoch": 1.6906646040340334, "grad_norm": 0.3813083923210492, "learning_rate": 6.14905480447105e-07, "loss": 0.2895, "step": 7103 }, { "epoch": 1.6909026001070981, "grad_norm": 0.383759332023671, "learning_rate": 6.139796825002409e-07, "loss": 0.2933, "step": 7104 }, { "epoch": 1.6911405961801629, "grad_norm": 0.3886124154209392, "learning_rate": 6.130545364245011e-07, "loss": 0.3486, "step": 7105 }, { "epoch": 1.6913785922532278, "grad_norm": 0.3865197625654805, "learning_rate": 6.121300423573851e-07, "loss": 0.3864, "step": 7106 }, { "epoch": 1.6916165883262926, "grad_norm": 0.4342380259704541, "learning_rate": 6.112062004362957e-07, "loss": 0.2811, "step": 7107 }, { "epoch": 1.6918545843993575, "grad_norm": 0.38137795050806866, "learning_rate": 6.102830107985369e-07, "loss": 0.3386, "step": 7108 }, { "epoch": 1.6920925804724223, "grad_norm": 0.39290592099891664, "learning_rate": 6.093604735813202e-07, "loss": 0.3844, "step": 7109 }, { "epoch": 1.692330576545487, "grad_norm": 0.36869581745026214, "learning_rate": 6.084385889217565e-07, "loss": 0.3033, "step": 7110 }, { "epoch": 1.6925685726185518, "grad_norm": 0.3781454086182421, "learning_rate": 6.075173569568605e-07, "loss": 0.2898, "step": 7111 }, { "epoch": 1.6928065686916165, "grad_norm": 0.404850762268356, "learning_rate": 6.065967778235499e-07, "loss": 0.3371, "step": 7112 }, { "epoch": 1.6930445647646812, "grad_norm": 0.41163273024333985, "learning_rate": 6.056768516586453e-07, "loss": 0.3546, "step": 7113 }, { "epoch": 1.6932825608377462, "grad_norm": 0.3498312393278164, "learning_rate": 6.047575785988702e-07, "loss": 0.3086, "step": 7114 }, { "epoch": 1.693520556910811, "grad_norm": 0.39282228454424806, "learning_rate": 6.038389587808535e-07, "loss": 0.2948, "step": 7115 }, { "epoch": 1.693758552983876, "grad_norm": 0.3988205195809829, "learning_rate": 6.029209923411228e-07, "loss": 0.3773, "step": 7116 }, { "epoch": 1.6939965490569406, "grad_norm": 0.3666168567052365, "learning_rate": 6.02003679416111e-07, "loss": 0.3061, "step": 7117 }, { "epoch": 1.6942345451300054, "grad_norm": 0.4069498736744767, "learning_rate": 6.010870201421537e-07, "loss": 0.2989, "step": 7118 }, { "epoch": 1.6944725412030701, "grad_norm": 0.3967722726787557, "learning_rate": 6.001710146554896e-07, "loss": 0.3228, "step": 7119 }, { "epoch": 1.6947105372761349, "grad_norm": 0.36380296589170585, "learning_rate": 5.992556630922585e-07, "loss": 0.3656, "step": 7120 }, { "epoch": 1.6949485333491996, "grad_norm": 0.3690082479582151, "learning_rate": 5.983409655885053e-07, "loss": 0.3001, "step": 7121 }, { "epoch": 1.6951865294222646, "grad_norm": 0.506841855343455, "learning_rate": 5.974269222801765e-07, "loss": 0.2926, "step": 7122 }, { "epoch": 1.6954245254953293, "grad_norm": 0.4096814364673883, "learning_rate": 5.965135333031213e-07, "loss": 0.3593, "step": 7123 }, { "epoch": 1.6956625215683943, "grad_norm": 0.38736545080244283, "learning_rate": 5.956007987930923e-07, "loss": 0.3661, "step": 7124 }, { "epoch": 1.695900517641459, "grad_norm": 0.3611883206027303, "learning_rate": 5.946887188857442e-07, "loss": 0.2677, "step": 7125 }, { "epoch": 1.6961385137145237, "grad_norm": 0.38872165541623244, "learning_rate": 5.937772937166342e-07, "loss": 0.3316, "step": 7126 }, { "epoch": 1.6963765097875885, "grad_norm": 0.36656583624158623, "learning_rate": 5.928665234212233e-07, "loss": 0.4022, "step": 7127 }, { "epoch": 1.6966145058606532, "grad_norm": 0.35861126589804326, "learning_rate": 5.919564081348733e-07, "loss": 0.2982, "step": 7128 }, { "epoch": 1.696852501933718, "grad_norm": 0.36794026503347826, "learning_rate": 5.910469479928521e-07, "loss": 0.3044, "step": 7129 }, { "epoch": 1.697090498006783, "grad_norm": 0.3868057111195459, "learning_rate": 5.90138143130326e-07, "loss": 0.3525, "step": 7130 }, { "epoch": 1.6973284940798476, "grad_norm": 0.3694887390714355, "learning_rate": 5.89229993682367e-07, "loss": 0.3685, "step": 7131 }, { "epoch": 1.6975664901529126, "grad_norm": 0.3863367650360045, "learning_rate": 5.883224997839482e-07, "loss": 0.2786, "step": 7132 }, { "epoch": 1.6978044862259773, "grad_norm": 0.40288142462099663, "learning_rate": 5.874156615699455e-07, "loss": 0.3231, "step": 7133 }, { "epoch": 1.698042482299042, "grad_norm": 0.39261984171695463, "learning_rate": 5.865094791751375e-07, "loss": 0.3858, "step": 7134 }, { "epoch": 1.6982804783721068, "grad_norm": 0.38986076248425855, "learning_rate": 5.856039527342044e-07, "loss": 0.3153, "step": 7135 }, { "epoch": 1.6985184744451716, "grad_norm": 0.3869365959701934, "learning_rate": 5.846990823817316e-07, "loss": 0.2779, "step": 7136 }, { "epoch": 1.6987564705182363, "grad_norm": 0.38638500561242545, "learning_rate": 5.837948682522048e-07, "loss": 0.3351, "step": 7137 }, { "epoch": 1.6989944665913013, "grad_norm": 0.36095089742651565, "learning_rate": 5.828913104800121e-07, "loss": 0.3707, "step": 7138 }, { "epoch": 1.699232462664366, "grad_norm": 0.37138461340811796, "learning_rate": 5.819884091994444e-07, "loss": 0.2501, "step": 7139 }, { "epoch": 1.699470458737431, "grad_norm": 0.3601206372592032, "learning_rate": 5.810861645446958e-07, "loss": 0.2903, "step": 7140 }, { "epoch": 1.6997084548104957, "grad_norm": 0.3929879463329381, "learning_rate": 5.801845766498615e-07, "loss": 0.3893, "step": 7141 }, { "epoch": 1.6999464508835604, "grad_norm": 0.36103944808905125, "learning_rate": 5.792836456489392e-07, "loss": 0.3077, "step": 7142 }, { "epoch": 1.7001844469566252, "grad_norm": 0.41181030157128806, "learning_rate": 5.783833716758314e-07, "loss": 0.2777, "step": 7143 }, { "epoch": 1.70042244302969, "grad_norm": 0.36292079451006326, "learning_rate": 5.774837548643403e-07, "loss": 0.3314, "step": 7144 }, { "epoch": 1.7006604391027547, "grad_norm": 0.38043247398479435, "learning_rate": 5.765847953481707e-07, "loss": 0.3875, "step": 7145 }, { "epoch": 1.7008984351758196, "grad_norm": 0.38907488954363156, "learning_rate": 5.756864932609307e-07, "loss": 0.2598, "step": 7146 }, { "epoch": 1.7011364312488844, "grad_norm": 0.4014263611690131, "learning_rate": 5.747888487361303e-07, "loss": 0.2803, "step": 7147 }, { "epoch": 1.7013744273219493, "grad_norm": 0.37583669721002044, "learning_rate": 5.738918619071809e-07, "loss": 0.3621, "step": 7148 }, { "epoch": 1.701612423395014, "grad_norm": 0.3633761512343576, "learning_rate": 5.729955329073978e-07, "loss": 0.3107, "step": 7149 }, { "epoch": 1.7018504194680788, "grad_norm": 0.3922899931026979, "learning_rate": 5.720998618699974e-07, "loss": 0.2716, "step": 7150 }, { "epoch": 1.7020884155411435, "grad_norm": 0.3822926529873602, "learning_rate": 5.712048489280981e-07, "loss": 0.3296, "step": 7151 }, { "epoch": 1.7023264116142083, "grad_norm": 0.3755985930742893, "learning_rate": 5.703104942147214e-07, "loss": 0.3824, "step": 7152 }, { "epoch": 1.702564407687273, "grad_norm": 0.36980830385212704, "learning_rate": 5.694167978627907e-07, "loss": 0.312, "step": 7153 }, { "epoch": 1.702802403760338, "grad_norm": 0.432008104893982, "learning_rate": 5.685237600051314e-07, "loss": 0.2991, "step": 7154 }, { "epoch": 1.7030403998334027, "grad_norm": 0.3870322232137778, "learning_rate": 5.676313807744705e-07, "loss": 0.3511, "step": 7155 }, { "epoch": 1.7032783959064677, "grad_norm": 0.3975439565591834, "learning_rate": 5.667396603034369e-07, "loss": 0.3807, "step": 7156 }, { "epoch": 1.7035163919795324, "grad_norm": 0.3536210783757654, "learning_rate": 5.658485987245648e-07, "loss": 0.2829, "step": 7157 }, { "epoch": 1.7037543880525972, "grad_norm": 0.38718378271877263, "learning_rate": 5.64958196170286e-07, "loss": 0.3196, "step": 7158 }, { "epoch": 1.703992384125662, "grad_norm": 0.40914299947041166, "learning_rate": 5.640684527729373e-07, "loss": 0.3649, "step": 7159 }, { "epoch": 1.7042303801987266, "grad_norm": 0.37188576462750905, "learning_rate": 5.631793686647558e-07, "loss": 0.316, "step": 7160 }, { "epoch": 1.7044683762717914, "grad_norm": 0.37633208910101296, "learning_rate": 5.622909439778817e-07, "loss": 0.2676, "step": 7161 }, { "epoch": 1.7047063723448563, "grad_norm": 0.38454873470389395, "learning_rate": 5.614031788443563e-07, "loss": 0.3507, "step": 7162 }, { "epoch": 1.704944368417921, "grad_norm": 0.3977198853767724, "learning_rate": 5.605160733961252e-07, "loss": 0.3885, "step": 7163 }, { "epoch": 1.705182364490986, "grad_norm": 0.3933331704312476, "learning_rate": 5.596296277650332e-07, "loss": 0.2696, "step": 7164 }, { "epoch": 1.7054203605640508, "grad_norm": 0.3822921368220508, "learning_rate": 5.587438420828273e-07, "loss": 0.3277, "step": 7165 }, { "epoch": 1.7056583566371155, "grad_norm": 0.3861803935557107, "learning_rate": 5.578587164811583e-07, "loss": 0.3735, "step": 7166 }, { "epoch": 1.7058963527101803, "grad_norm": 0.34946179290548746, "learning_rate": 5.569742510915776e-07, "loss": 0.292, "step": 7167 }, { "epoch": 1.706134348783245, "grad_norm": 0.3777091948818582, "learning_rate": 5.56090446045538e-07, "loss": 0.2919, "step": 7168 }, { "epoch": 1.7063723448563097, "grad_norm": 0.39415615795103837, "learning_rate": 5.552073014743942e-07, "loss": 0.3222, "step": 7169 }, { "epoch": 1.7066103409293747, "grad_norm": 0.38807660041300635, "learning_rate": 5.543248175094051e-07, "loss": 0.3784, "step": 7170 }, { "epoch": 1.7068483370024394, "grad_norm": 0.36815447326497414, "learning_rate": 5.534429942817293e-07, "loss": 0.2682, "step": 7171 }, { "epoch": 1.7070863330755044, "grad_norm": 0.3782786257896689, "learning_rate": 5.525618319224269e-07, "loss": 0.3099, "step": 7172 }, { "epoch": 1.7073243291485691, "grad_norm": 0.3774779346032418, "learning_rate": 5.516813305624602e-07, "loss": 0.3569, "step": 7173 }, { "epoch": 1.7075623252216339, "grad_norm": 0.3825672141737441, "learning_rate": 5.508014903326941e-07, "loss": 0.339, "step": 7174 }, { "epoch": 1.7078003212946986, "grad_norm": 0.38678799012825976, "learning_rate": 5.499223113638946e-07, "loss": 0.2895, "step": 7175 }, { "epoch": 1.7080383173677633, "grad_norm": 0.45675241641075215, "learning_rate": 5.490437937867287e-07, "loss": 0.3156, "step": 7176 }, { "epoch": 1.708276313440828, "grad_norm": 0.3990012412168306, "learning_rate": 5.481659377317672e-07, "loss": 0.4195, "step": 7177 }, { "epoch": 1.708514309513893, "grad_norm": 0.361980501450529, "learning_rate": 5.472887433294799e-07, "loss": 0.2905, "step": 7178 }, { "epoch": 1.7087523055869578, "grad_norm": 0.3934064270709845, "learning_rate": 5.464122107102399e-07, "loss": 0.2835, "step": 7179 }, { "epoch": 1.7089903016600227, "grad_norm": 0.39868212939298053, "learning_rate": 5.455363400043223e-07, "loss": 0.3682, "step": 7180 }, { "epoch": 1.7092282977330875, "grad_norm": 0.37266869092332056, "learning_rate": 5.446611313419026e-07, "loss": 0.3462, "step": 7181 }, { "epoch": 1.7094662938061522, "grad_norm": 0.3622657497062413, "learning_rate": 5.437865848530588e-07, "loss": 0.2699, "step": 7182 }, { "epoch": 1.709704289879217, "grad_norm": 0.37818014208202183, "learning_rate": 5.429127006677681e-07, "loss": 0.3102, "step": 7183 }, { "epoch": 1.7099422859522817, "grad_norm": 0.4190339066632442, "learning_rate": 5.420394789159151e-07, "loss": 0.3571, "step": 7184 }, { "epoch": 1.7101802820253464, "grad_norm": 0.4294561415916188, "learning_rate": 5.411669197272795e-07, "loss": 0.2779, "step": 7185 }, { "epoch": 1.7104182780984114, "grad_norm": 0.3737799198142464, "learning_rate": 5.402950232315457e-07, "loss": 0.2662, "step": 7186 }, { "epoch": 1.7106562741714761, "grad_norm": 0.3802806165041149, "learning_rate": 5.394237895582999e-07, "loss": 0.359, "step": 7187 }, { "epoch": 1.710894270244541, "grad_norm": 0.3824886968859595, "learning_rate": 5.385532188370279e-07, "loss": 0.401, "step": 7188 }, { "epoch": 1.7111322663176058, "grad_norm": 0.3682750615164672, "learning_rate": 5.376833111971175e-07, "loss": 0.2592, "step": 7189 }, { "epoch": 1.7113702623906706, "grad_norm": 0.4080980675820481, "learning_rate": 5.368140667678607e-07, "loss": 0.3052, "step": 7190 }, { "epoch": 1.7116082584637353, "grad_norm": 0.4315797792499158, "learning_rate": 5.359454856784469e-07, "loss": 0.3828, "step": 7191 }, { "epoch": 1.7118462545368, "grad_norm": 0.3873206456391449, "learning_rate": 5.350775680579695e-07, "loss": 0.2887, "step": 7192 }, { "epoch": 1.7120842506098648, "grad_norm": 0.39557005651691873, "learning_rate": 5.342103140354226e-07, "loss": 0.2943, "step": 7193 }, { "epoch": 1.7123222466829298, "grad_norm": 0.3875924022674923, "learning_rate": 5.333437237397015e-07, "loss": 0.3258, "step": 7194 }, { "epoch": 1.7125602427559945, "grad_norm": 0.39590844421878507, "learning_rate": 5.324777972996026e-07, "loss": 0.3854, "step": 7195 }, { "epoch": 1.7127982388290595, "grad_norm": 0.37400174892709304, "learning_rate": 5.316125348438239e-07, "loss": 0.2619, "step": 7196 }, { "epoch": 1.7130362349021242, "grad_norm": 0.40507111453694483, "learning_rate": 5.307479365009644e-07, "loss": 0.2943, "step": 7197 }, { "epoch": 1.713274230975189, "grad_norm": 0.41809861423219297, "learning_rate": 5.298840023995267e-07, "loss": 0.3762, "step": 7198 }, { "epoch": 1.7135122270482537, "grad_norm": 0.3901582679717628, "learning_rate": 5.290207326679109e-07, "loss": 0.3525, "step": 7199 }, { "epoch": 1.7137502231213184, "grad_norm": 0.37330950872565466, "learning_rate": 5.281581274344216e-07, "loss": 0.2854, "step": 7200 }, { "epoch": 1.7139882191943832, "grad_norm": 0.3781668431340482, "learning_rate": 5.272961868272625e-07, "loss": 0.3256, "step": 7201 }, { "epoch": 1.7142262152674481, "grad_norm": 0.38276965275384967, "learning_rate": 5.264349109745392e-07, "loss": 0.3804, "step": 7202 }, { "epoch": 1.7144642113405129, "grad_norm": 0.37802592125250717, "learning_rate": 5.25574300004259e-07, "loss": 0.2817, "step": 7203 }, { "epoch": 1.7147022074135778, "grad_norm": 0.39422912395070164, "learning_rate": 5.247143540443295e-07, "loss": 0.3247, "step": 7204 }, { "epoch": 1.7149402034866426, "grad_norm": 0.35708805870299815, "learning_rate": 5.2385507322256e-07, "loss": 0.3755, "step": 7205 }, { "epoch": 1.7151781995597073, "grad_norm": 0.3733848792164133, "learning_rate": 5.229964576666618e-07, "loss": 0.3567, "step": 7206 }, { "epoch": 1.715416195632772, "grad_norm": 0.4043891988399861, "learning_rate": 5.221385075042451e-07, "loss": 0.2927, "step": 7207 }, { "epoch": 1.7156541917058368, "grad_norm": 0.36868782474294576, "learning_rate": 5.212812228628234e-07, "loss": 0.3259, "step": 7208 }, { "epoch": 1.7158921877789015, "grad_norm": 0.38185612816752407, "learning_rate": 5.204246038698102e-07, "loss": 0.39, "step": 7209 }, { "epoch": 1.7161301838519665, "grad_norm": 0.3691193320372495, "learning_rate": 5.195686506525205e-07, "loss": 0.306, "step": 7210 }, { "epoch": 1.7163681799250312, "grad_norm": 0.4010679948783104, "learning_rate": 5.187133633381686e-07, "loss": 0.2615, "step": 7211 }, { "epoch": 1.7166061759980962, "grad_norm": 0.3691780944853636, "learning_rate": 5.178587420538733e-07, "loss": 0.3175, "step": 7212 }, { "epoch": 1.716844172071161, "grad_norm": 0.3869003239326718, "learning_rate": 5.17004786926652e-07, "loss": 0.3637, "step": 7213 }, { "epoch": 1.7170821681442257, "grad_norm": 0.39395271592154035, "learning_rate": 5.161514980834232e-07, "loss": 0.2891, "step": 7214 }, { "epoch": 1.7173201642172904, "grad_norm": 0.3683495140171302, "learning_rate": 5.152988756510063e-07, "loss": 0.3021, "step": 7215 }, { "epoch": 1.7175581602903551, "grad_norm": 0.37064038864613685, "learning_rate": 5.144469197561231e-07, "loss": 0.3637, "step": 7216 }, { "epoch": 1.7177961563634199, "grad_norm": 0.387245884205873, "learning_rate": 5.135956305253953e-07, "loss": 0.3318, "step": 7217 }, { "epoch": 1.7180341524364848, "grad_norm": 0.3648749289239642, "learning_rate": 5.127450080853447e-07, "loss": 0.2687, "step": 7218 }, { "epoch": 1.7182721485095496, "grad_norm": 0.3709832312749167, "learning_rate": 5.118950525623955e-07, "loss": 0.315, "step": 7219 }, { "epoch": 1.7185101445826145, "grad_norm": 0.3771433733813478, "learning_rate": 5.110457640828714e-07, "loss": 0.3693, "step": 7220 }, { "epoch": 1.7187481406556793, "grad_norm": 0.37194166595478395, "learning_rate": 5.101971427729985e-07, "loss": 0.306, "step": 7221 }, { "epoch": 1.718986136728744, "grad_norm": 0.4198291358677723, "learning_rate": 5.09349188758903e-07, "loss": 0.2858, "step": 7222 }, { "epoch": 1.7192241328018087, "grad_norm": 0.4062451136719214, "learning_rate": 5.085019021666104e-07, "loss": 0.3829, "step": 7223 }, { "epoch": 1.7194621288748735, "grad_norm": 0.38423840200623954, "learning_rate": 5.076552831220505e-07, "loss": 0.3337, "step": 7224 }, { "epoch": 1.7197001249479382, "grad_norm": 0.3799743269319547, "learning_rate": 5.068093317510492e-07, "loss": 0.2786, "step": 7225 }, { "epoch": 1.7199381210210032, "grad_norm": 0.378826385850787, "learning_rate": 5.059640481793382e-07, "loss": 0.3434, "step": 7226 }, { "epoch": 1.720176117094068, "grad_norm": 0.38264535191194776, "learning_rate": 5.05119432532547e-07, "loss": 0.3617, "step": 7227 }, { "epoch": 1.7204141131671329, "grad_norm": 0.4302946941716753, "learning_rate": 5.042754849362063e-07, "loss": 0.2898, "step": 7228 }, { "epoch": 1.7206521092401976, "grad_norm": 0.35178529312525003, "learning_rate": 5.03432205515747e-07, "loss": 0.2742, "step": 7229 }, { "epoch": 1.7208901053132624, "grad_norm": 0.3644446480635659, "learning_rate": 5.025895943965021e-07, "loss": 0.3651, "step": 7230 }, { "epoch": 1.721128101386327, "grad_norm": 0.35818553869400055, "learning_rate": 5.01747651703704e-07, "loss": 0.3395, "step": 7231 }, { "epoch": 1.7213660974593918, "grad_norm": 0.3840129788680463, "learning_rate": 5.009063775624857e-07, "loss": 0.2691, "step": 7232 }, { "epoch": 1.7216040935324566, "grad_norm": 0.3982821339866859, "learning_rate": 5.000657720978824e-07, "loss": 0.3457, "step": 7233 }, { "epoch": 1.7218420896055215, "grad_norm": 0.40131118490637396, "learning_rate": 4.992258354348284e-07, "loss": 0.3598, "step": 7234 }, { "epoch": 1.7220800856785863, "grad_norm": 0.38055567769870746, "learning_rate": 4.983865676981586e-07, "loss": 0.2975, "step": 7235 }, { "epoch": 1.7223180817516512, "grad_norm": 0.41080399456136174, "learning_rate": 4.9754796901261e-07, "loss": 0.2739, "step": 7236 }, { "epoch": 1.722556077824716, "grad_norm": 0.3662126709880048, "learning_rate": 4.96710039502818e-07, "loss": 0.3467, "step": 7237 }, { "epoch": 1.7227940738977807, "grad_norm": 0.3938043911249227, "learning_rate": 4.958727792933194e-07, "loss": 0.3637, "step": 7238 }, { "epoch": 1.7230320699708455, "grad_norm": 0.4031184138478153, "learning_rate": 4.950361885085536e-07, "loss": 0.2873, "step": 7239 }, { "epoch": 1.7232700660439102, "grad_norm": 0.4025403942805333, "learning_rate": 4.942002672728575e-07, "loss": 0.2969, "step": 7240 }, { "epoch": 1.723508062116975, "grad_norm": 0.3959523479792395, "learning_rate": 4.933650157104697e-07, "loss": 0.3719, "step": 7241 }, { "epoch": 1.72374605819004, "grad_norm": 0.36786887087342524, "learning_rate": 4.925304339455289e-07, "loss": 0.3423, "step": 7242 }, { "epoch": 1.7239840542631046, "grad_norm": 0.3874665309151205, "learning_rate": 4.916965221020753e-07, "loss": 0.2973, "step": 7243 }, { "epoch": 1.7242220503361696, "grad_norm": 0.3880295711150305, "learning_rate": 4.908632803040492e-07, "loss": 0.3129, "step": 7244 }, { "epoch": 1.7244600464092343, "grad_norm": 0.3748397703051552, "learning_rate": 4.900307086752898e-07, "loss": 0.3819, "step": 7245 }, { "epoch": 1.724698042482299, "grad_norm": 0.359732975677239, "learning_rate": 4.891988073395382e-07, "loss": 0.2676, "step": 7246 }, { "epoch": 1.7249360385553638, "grad_norm": 0.35344522976400145, "learning_rate": 4.88367576420436e-07, "loss": 0.286, "step": 7247 }, { "epoch": 1.7251740346284286, "grad_norm": 0.3568660899520547, "learning_rate": 4.875370160415243e-07, "loss": 0.3399, "step": 7248 }, { "epoch": 1.7254120307014933, "grad_norm": 0.371131074798834, "learning_rate": 4.867071263262452e-07, "loss": 0.3362, "step": 7249 }, { "epoch": 1.7256500267745583, "grad_norm": 0.3910514202327229, "learning_rate": 4.858779073979408e-07, "loss": 0.2673, "step": 7250 }, { "epoch": 1.725888022847623, "grad_norm": 0.3545398248925455, "learning_rate": 4.850493593798528e-07, "loss": 0.3272, "step": 7251 }, { "epoch": 1.726126018920688, "grad_norm": 0.3979049249797813, "learning_rate": 4.842214823951236e-07, "loss": 0.359, "step": 7252 }, { "epoch": 1.7263640149937527, "grad_norm": 0.3517488882321978, "learning_rate": 4.833942765667981e-07, "loss": 0.2883, "step": 7253 }, { "epoch": 1.7266020110668174, "grad_norm": 0.37339923080107096, "learning_rate": 4.825677420178187e-07, "loss": 0.2837, "step": 7254 }, { "epoch": 1.7268400071398822, "grad_norm": 0.3888091649358466, "learning_rate": 4.817418788710287e-07, "loss": 0.3544, "step": 7255 }, { "epoch": 1.727078003212947, "grad_norm": 0.37811647136667986, "learning_rate": 4.809166872491716e-07, "loss": 0.3118, "step": 7256 }, { "epoch": 1.7273159992860116, "grad_norm": 0.3737143324650708, "learning_rate": 4.800921672748921e-07, "loss": 0.267, "step": 7257 }, { "epoch": 1.7275539953590766, "grad_norm": 0.48902821392635987, "learning_rate": 4.792683190707331e-07, "loss": 0.3132, "step": 7258 }, { "epoch": 1.7277919914321413, "grad_norm": 0.3593715757632593, "learning_rate": 4.784451427591396e-07, "loss": 0.3794, "step": 7259 }, { "epoch": 1.7280299875052063, "grad_norm": 0.37488004076926507, "learning_rate": 4.776226384624555e-07, "loss": 0.2982, "step": 7260 }, { "epoch": 1.728267983578271, "grad_norm": 0.4022945491362904, "learning_rate": 4.7680080630292613e-07, "loss": 0.3157, "step": 7261 }, { "epoch": 1.7285059796513358, "grad_norm": 0.39397197065419187, "learning_rate": 4.75979646402695e-07, "loss": 0.3123, "step": 7262 }, { "epoch": 1.7287439757244005, "grad_norm": 0.40175779262681727, "learning_rate": 4.7515915888380724e-07, "loss": 0.3971, "step": 7263 }, { "epoch": 1.7289819717974653, "grad_norm": 0.4001343316717571, "learning_rate": 4.7433934386820813e-07, "loss": 0.2903, "step": 7264 }, { "epoch": 1.72921996787053, "grad_norm": 0.39178481360532014, "learning_rate": 4.7352020147774067e-07, "loss": 0.3215, "step": 7265 }, { "epoch": 1.729457963943595, "grad_norm": 0.3668638438326148, "learning_rate": 4.7270173183415203e-07, "loss": 0.3626, "step": 7266 }, { "epoch": 1.7296959600166597, "grad_norm": 0.381515302995729, "learning_rate": 4.7188393505908594e-07, "loss": 0.3434, "step": 7267 }, { "epoch": 1.7299339560897247, "grad_norm": 0.38224529914825683, "learning_rate": 4.710668112740874e-07, "loss": 0.2686, "step": 7268 }, { "epoch": 1.7301719521627894, "grad_norm": 0.39868968626428386, "learning_rate": 4.7025036060059983e-07, "loss": 0.3514, "step": 7269 }, { "epoch": 1.7304099482358541, "grad_norm": 0.3877292058096328, "learning_rate": 4.694345831599706e-07, "loss": 0.373, "step": 7270 }, { "epoch": 1.7306479443089189, "grad_norm": 0.364088046457619, "learning_rate": 4.686194790734427e-07, "loss": 0.2926, "step": 7271 }, { "epoch": 1.7308859403819836, "grad_norm": 0.3655121541245187, "learning_rate": 4.6780504846216155e-07, "loss": 0.3324, "step": 7272 }, { "epoch": 1.7311239364550484, "grad_norm": 0.37380101505935226, "learning_rate": 4.6699129144717135e-07, "loss": 0.3662, "step": 7273 }, { "epoch": 1.7313619325281133, "grad_norm": 0.35389840940426953, "learning_rate": 4.6617820814941594e-07, "loss": 0.3308, "step": 7274 }, { "epoch": 1.731599928601178, "grad_norm": 0.37982947460360994, "learning_rate": 4.6536579868974083e-07, "loss": 0.2662, "step": 7275 }, { "epoch": 1.731837924674243, "grad_norm": 0.38660456006967564, "learning_rate": 4.6455406318888896e-07, "loss": 0.3559, "step": 7276 }, { "epoch": 1.7320759207473078, "grad_norm": 0.42338649966136893, "learning_rate": 4.6374300176750484e-07, "loss": 0.3795, "step": 7277 }, { "epoch": 1.7323139168203725, "grad_norm": 0.364628505385949, "learning_rate": 4.629326145461327e-07, "loss": 0.2946, "step": 7278 }, { "epoch": 1.7325519128934372, "grad_norm": 0.36740380393685235, "learning_rate": 4.6212290164521554e-07, "loss": 0.3445, "step": 7279 }, { "epoch": 1.732789908966502, "grad_norm": 0.40395669425071407, "learning_rate": 4.613138631850955e-07, "loss": 0.3394, "step": 7280 }, { "epoch": 1.7330279050395667, "grad_norm": 0.3954298208790007, "learning_rate": 4.6050549928601864e-07, "loss": 0.3488, "step": 7281 }, { "epoch": 1.7332659011126317, "grad_norm": 0.3667594695117182, "learning_rate": 4.59697810068126e-07, "loss": 0.3099, "step": 7282 }, { "epoch": 1.7335038971856964, "grad_norm": 0.37536423714685246, "learning_rate": 4.588907956514599e-07, "loss": 0.3318, "step": 7283 }, { "epoch": 1.7337418932587614, "grad_norm": 0.4090601125824329, "learning_rate": 4.5808445615596386e-07, "loss": 0.3491, "step": 7284 }, { "epoch": 1.7339798893318261, "grad_norm": 0.40724673701790437, "learning_rate": 4.5727879170147927e-07, "loss": 0.2832, "step": 7285 }, { "epoch": 1.7342178854048909, "grad_norm": 0.4192280684140144, "learning_rate": 4.564738024077475e-07, "loss": 0.2829, "step": 7286 }, { "epoch": 1.7344558814779556, "grad_norm": 0.3683379060762051, "learning_rate": 4.5566948839441014e-07, "loss": 0.3196, "step": 7287 }, { "epoch": 1.7346938775510203, "grad_norm": 0.37268180835647635, "learning_rate": 4.5486584978100766e-07, "loss": 0.3712, "step": 7288 }, { "epoch": 1.734931873624085, "grad_norm": 0.3896434055889146, "learning_rate": 4.5406288668698175e-07, "loss": 0.2722, "step": 7289 }, { "epoch": 1.73516986969715, "grad_norm": 0.37283077254852875, "learning_rate": 4.5326059923167185e-07, "loss": 0.2979, "step": 7290 }, { "epoch": 1.7354078657702148, "grad_norm": 0.40147117472261845, "learning_rate": 4.524589875343177e-07, "loss": 0.3637, "step": 7291 }, { "epoch": 1.7356458618432797, "grad_norm": 0.3635087671699396, "learning_rate": 4.5165805171405786e-07, "loss": 0.3019, "step": 7292 }, { "epoch": 1.7358838579163445, "grad_norm": 0.359877509083195, "learning_rate": 4.508577918899326e-07, "loss": 0.2844, "step": 7293 }, { "epoch": 1.7361218539894092, "grad_norm": 0.3989712129889001, "learning_rate": 4.500582081808802e-07, "loss": 0.3003, "step": 7294 }, { "epoch": 1.736359850062474, "grad_norm": 0.43670169269521797, "learning_rate": 4.492593007057383e-07, "loss": 0.3806, "step": 7295 }, { "epoch": 1.7365978461355387, "grad_norm": 0.4151227878379709, "learning_rate": 4.484610695832431e-07, "loss": 0.2829, "step": 7296 }, { "epoch": 1.7368358422086034, "grad_norm": 0.39427740496318525, "learning_rate": 4.476635149320341e-07, "loss": 0.2821, "step": 7297 }, { "epoch": 1.7370738382816684, "grad_norm": 0.39483412534077605, "learning_rate": 4.4686663687064537e-07, "loss": 0.3707, "step": 7298 }, { "epoch": 1.7373118343547331, "grad_norm": 0.3811903403162823, "learning_rate": 4.4607043551751385e-07, "loss": 0.3131, "step": 7299 }, { "epoch": 1.737549830427798, "grad_norm": 0.3787340344910432, "learning_rate": 4.452749109909743e-07, "loss": 0.2882, "step": 7300 }, { "epoch": 1.7377878265008628, "grad_norm": 0.3977977827225253, "learning_rate": 4.4448006340926163e-07, "loss": 0.2914, "step": 7301 }, { "epoch": 1.7380258225739276, "grad_norm": 0.3471686985062694, "learning_rate": 4.4368589289050966e-07, "loss": 0.3736, "step": 7302 }, { "epoch": 1.7382638186469923, "grad_norm": 0.3567461740090027, "learning_rate": 4.428923995527512e-07, "loss": 0.2677, "step": 7303 }, { "epoch": 1.738501814720057, "grad_norm": 0.3895718708014465, "learning_rate": 4.4209958351392024e-07, "loss": 0.2761, "step": 7304 }, { "epoch": 1.7387398107931218, "grad_norm": 0.4146289851718209, "learning_rate": 4.4130744489184805e-07, "loss": 0.336, "step": 7305 }, { "epoch": 1.7389778068661867, "grad_norm": 0.36115931059450657, "learning_rate": 4.4051598380426606e-07, "loss": 0.3318, "step": 7306 }, { "epoch": 1.7392158029392515, "grad_norm": 0.3901592985931496, "learning_rate": 4.3972520036880406e-07, "loss": 0.2939, "step": 7307 }, { "epoch": 1.7394537990123164, "grad_norm": 0.3861549670050002, "learning_rate": 4.389350947029941e-07, "loss": 0.3264, "step": 7308 }, { "epoch": 1.7396917950853812, "grad_norm": 0.39366268242787633, "learning_rate": 4.381456669242645e-07, "loss": 0.3827, "step": 7309 }, { "epoch": 1.739929791158446, "grad_norm": 0.4147331435398097, "learning_rate": 4.3735691714994366e-07, "loss": 0.3196, "step": 7310 }, { "epoch": 1.7401677872315107, "grad_norm": 0.4033560437583594, "learning_rate": 4.365688454972589e-07, "loss": 0.284, "step": 7311 }, { "epoch": 1.7404057833045754, "grad_norm": 0.36568041838421067, "learning_rate": 4.357814520833381e-07, "loss": 0.3219, "step": 7312 }, { "epoch": 1.7406437793776401, "grad_norm": 0.33395093697829437, "learning_rate": 4.349947370252067e-07, "loss": 0.3806, "step": 7313 }, { "epoch": 1.740881775450705, "grad_norm": 0.37826064768858925, "learning_rate": 4.342087004397899e-07, "loss": 0.3458, "step": 7314 }, { "epoch": 1.7411197715237698, "grad_norm": 0.36042290741386074, "learning_rate": 4.3342334244391215e-07, "loss": 0.2922, "step": 7315 }, { "epoch": 1.7413577675968348, "grad_norm": 0.3742173084173869, "learning_rate": 4.326386631542978e-07, "loss": 0.3492, "step": 7316 }, { "epoch": 1.7415957636698995, "grad_norm": 0.3611783561237634, "learning_rate": 4.3185466268756916e-07, "loss": 0.3284, "step": 7317 }, { "epoch": 1.7418337597429643, "grad_norm": 0.36444937618317436, "learning_rate": 4.310713411602485e-07, "loss": 0.284, "step": 7318 }, { "epoch": 1.742071755816029, "grad_norm": 0.3619558848348502, "learning_rate": 4.30288698688755e-07, "loss": 0.3108, "step": 7319 }, { "epoch": 1.7423097518890938, "grad_norm": 0.40390658897210363, "learning_rate": 4.295067353894111e-07, "loss": 0.3621, "step": 7320 }, { "epoch": 1.7425477479621585, "grad_norm": 0.40363738867672744, "learning_rate": 4.28725451378435e-07, "loss": 0.2972, "step": 7321 }, { "epoch": 1.7427857440352235, "grad_norm": 0.3606197822662685, "learning_rate": 4.279448467719444e-07, "loss": 0.2996, "step": 7322 }, { "epoch": 1.7430237401082882, "grad_norm": 0.37968170265939494, "learning_rate": 4.271649216859558e-07, "loss": 0.3488, "step": 7323 }, { "epoch": 1.7432617361813532, "grad_norm": 0.3581004782699803, "learning_rate": 4.263856762363877e-07, "loss": 0.3275, "step": 7324 }, { "epoch": 1.743499732254418, "grad_norm": 0.36888664733014503, "learning_rate": 4.256071105390536e-07, "loss": 0.2782, "step": 7325 }, { "epoch": 1.7437377283274826, "grad_norm": 0.3736404487957657, "learning_rate": 4.2482922470966804e-07, "loss": 0.3517, "step": 7326 }, { "epoch": 1.7439757244005474, "grad_norm": 0.3838130180426928, "learning_rate": 4.2405201886384364e-07, "loss": 0.3888, "step": 7327 }, { "epoch": 1.7442137204736121, "grad_norm": 0.39228239050902514, "learning_rate": 4.232754931170929e-07, "loss": 0.2859, "step": 7328 }, { "epoch": 1.7444517165466769, "grad_norm": 0.38813077534276835, "learning_rate": 4.22499647584827e-07, "loss": 0.2981, "step": 7329 }, { "epoch": 1.7446897126197418, "grad_norm": 0.37696497911373844, "learning_rate": 4.2172448238235464e-07, "loss": 0.3654, "step": 7330 }, { "epoch": 1.7449277086928066, "grad_norm": 0.39042415238351036, "learning_rate": 4.2094999762488597e-07, "loss": 0.3358, "step": 7331 }, { "epoch": 1.7451657047658715, "grad_norm": 0.3811058522780075, "learning_rate": 4.2017619342752723e-07, "loss": 0.2515, "step": 7332 }, { "epoch": 1.7454037008389363, "grad_norm": 0.3830369971256063, "learning_rate": 4.194030699052859e-07, "loss": 0.3463, "step": 7333 }, { "epoch": 1.745641696912001, "grad_norm": 0.3863649827385546, "learning_rate": 4.1863062717306724e-07, "loss": 0.3765, "step": 7334 }, { "epoch": 1.7458796929850657, "grad_norm": 0.3740836674286429, "learning_rate": 4.178588653456733e-07, "loss": 0.2836, "step": 7335 }, { "epoch": 1.7461176890581305, "grad_norm": 0.40533187846971, "learning_rate": 4.1708778453781017e-07, "loss": 0.285, "step": 7336 }, { "epoch": 1.7463556851311952, "grad_norm": 0.37887619317785987, "learning_rate": 4.163173848640778e-07, "loss": 0.356, "step": 7337 }, { "epoch": 1.7465936812042602, "grad_norm": 0.3617723264544318, "learning_rate": 4.155476664389768e-07, "loss": 0.3547, "step": 7338 }, { "epoch": 1.746831677277325, "grad_norm": 0.37992103116699155, "learning_rate": 4.147786293769068e-07, "loss": 0.3065, "step": 7339 }, { "epoch": 1.7470696733503899, "grad_norm": 0.3848821730711763, "learning_rate": 4.140102737921653e-07, "loss": 0.2986, "step": 7340 }, { "epoch": 1.7473076694234546, "grad_norm": 0.37504711339234986, "learning_rate": 4.1324259979894865e-07, "loss": 0.3887, "step": 7341 }, { "epoch": 1.7475456654965194, "grad_norm": 0.38275904514715237, "learning_rate": 4.1247560751135283e-07, "loss": 0.3301, "step": 7342 }, { "epoch": 1.747783661569584, "grad_norm": 0.38728504743609043, "learning_rate": 4.117092970433717e-07, "loss": 0.2825, "step": 7343 }, { "epoch": 1.7480216576426488, "grad_norm": 0.3869386086927362, "learning_rate": 4.10943668508898e-07, "loss": 0.3368, "step": 7344 }, { "epoch": 1.7482596537157136, "grad_norm": 0.41661324253240994, "learning_rate": 4.101787220217229e-07, "loss": 0.3815, "step": 7345 }, { "epoch": 1.7484976497887785, "grad_norm": 0.34535425484790955, "learning_rate": 4.09414457695535e-07, "loss": 0.2993, "step": 7346 }, { "epoch": 1.7487356458618433, "grad_norm": 0.3806804219160961, "learning_rate": 4.0865087564392556e-07, "loss": 0.3206, "step": 7347 }, { "epoch": 1.7489736419349082, "grad_norm": 0.39462354482109935, "learning_rate": 4.0788797598038054e-07, "loss": 0.3489, "step": 7348 }, { "epoch": 1.749211638007973, "grad_norm": 0.3527473981630956, "learning_rate": 4.0712575881828585e-07, "loss": 0.3032, "step": 7349 }, { "epoch": 1.7494496340810377, "grad_norm": 0.37882523971462584, "learning_rate": 4.0636422427092483e-07, "loss": 0.2507, "step": 7350 }, { "epoch": 1.7496876301541024, "grad_norm": 0.4076311890215, "learning_rate": 4.056033724514813e-07, "loss": 0.3176, "step": 7351 }, { "epoch": 1.7499256262271672, "grad_norm": 0.38737649623817405, "learning_rate": 4.048432034730371e-07, "loss": 0.3973, "step": 7352 }, { "epoch": 1.750163622300232, "grad_norm": 0.36019762556095697, "learning_rate": 4.040837174485718e-07, "loss": 0.2926, "step": 7353 }, { "epoch": 1.7504016183732969, "grad_norm": 0.3757932232128984, "learning_rate": 4.033249144909629e-07, "loss": 0.288, "step": 7354 }, { "epoch": 1.7506396144463616, "grad_norm": 0.3784418578541022, "learning_rate": 4.0256679471298856e-07, "loss": 0.3387, "step": 7355 }, { "epoch": 1.7508776105194266, "grad_norm": 0.36293048107545833, "learning_rate": 4.01809358227323e-07, "loss": 0.3649, "step": 7356 }, { "epoch": 1.7511156065924913, "grad_norm": 0.37609039329825666, "learning_rate": 4.0105260514654076e-07, "loss": 0.3159, "step": 7357 }, { "epoch": 1.751353602665556, "grad_norm": 0.4037114023056679, "learning_rate": 4.00296535583114e-07, "loss": 0.3318, "step": 7358 }, { "epoch": 1.7515915987386208, "grad_norm": 0.42110395329000605, "learning_rate": 3.9954114964941336e-07, "loss": 0.3875, "step": 7359 }, { "epoch": 1.7518295948116855, "grad_norm": 0.39591006197133266, "learning_rate": 3.9878644745770745e-07, "loss": 0.3029, "step": 7360 }, { "epoch": 1.7520675908847503, "grad_norm": 0.395799256886029, "learning_rate": 3.9803242912016427e-07, "loss": 0.288, "step": 7361 }, { "epoch": 1.7523055869578152, "grad_norm": 0.397500164340299, "learning_rate": 3.972790947488481e-07, "loss": 0.3269, "step": 7362 }, { "epoch": 1.75254358303088, "grad_norm": 0.40196952358276516, "learning_rate": 3.965264444557254e-07, "loss": 0.3584, "step": 7363 }, { "epoch": 1.752781579103945, "grad_norm": 0.37722120669417086, "learning_rate": 3.9577447835265734e-07, "loss": 0.2634, "step": 7364 }, { "epoch": 1.7530195751770097, "grad_norm": 0.40416706874239217, "learning_rate": 3.950231965514051e-07, "loss": 0.3041, "step": 7365 }, { "epoch": 1.7532575712500744, "grad_norm": 0.3524829672281888, "learning_rate": 3.94272599163627e-07, "loss": 0.3895, "step": 7366 }, { "epoch": 1.7534955673231392, "grad_norm": 0.40869397302371735, "learning_rate": 3.935226863008812e-07, "loss": 0.2801, "step": 7367 }, { "epoch": 1.753733563396204, "grad_norm": 0.3844052458895688, "learning_rate": 3.9277345807462285e-07, "loss": 0.2741, "step": 7368 }, { "epoch": 1.7539715594692686, "grad_norm": 0.3940308898609167, "learning_rate": 3.920249145962063e-07, "loss": 0.3044, "step": 7369 }, { "epoch": 1.7542095555423336, "grad_norm": 0.37516037241099764, "learning_rate": 3.9127705597688305e-07, "loss": 0.3628, "step": 7370 }, { "epoch": 1.7544475516153983, "grad_norm": 0.36404405464445827, "learning_rate": 3.9052988232780364e-07, "loss": 0.2845, "step": 7371 }, { "epoch": 1.7546855476884633, "grad_norm": 0.37799405975146255, "learning_rate": 3.8978339376001594e-07, "loss": 0.2724, "step": 7372 }, { "epoch": 1.754923543761528, "grad_norm": 0.3851431947005174, "learning_rate": 3.8903759038446676e-07, "loss": 0.3726, "step": 7373 }, { "epoch": 1.7551615398345928, "grad_norm": 0.35648046802246963, "learning_rate": 3.882924723120024e-07, "loss": 0.3407, "step": 7374 }, { "epoch": 1.7553995359076575, "grad_norm": 0.3805853248880385, "learning_rate": 3.875480396533648e-07, "loss": 0.276, "step": 7375 }, { "epoch": 1.7556375319807223, "grad_norm": 0.43047194201246963, "learning_rate": 3.8680429251919504e-07, "loss": 0.3141, "step": 7376 }, { "epoch": 1.755875528053787, "grad_norm": 0.3672797811079228, "learning_rate": 3.8606123102003124e-07, "loss": 0.3954, "step": 7377 }, { "epoch": 1.756113524126852, "grad_norm": 0.3655093853884352, "learning_rate": 3.8531885526631286e-07, "loss": 0.266, "step": 7378 }, { "epoch": 1.7563515201999167, "grad_norm": 0.4008961738434252, "learning_rate": 3.845771653683744e-07, "loss": 0.2754, "step": 7379 }, { "epoch": 1.7565895162729817, "grad_norm": 0.3754989479985082, "learning_rate": 3.8383616143644884e-07, "loss": 0.3759, "step": 7380 }, { "epoch": 1.7568275123460464, "grad_norm": 0.3998697477070844, "learning_rate": 3.8309584358066866e-07, "loss": 0.3457, "step": 7381 }, { "epoch": 1.7570655084191111, "grad_norm": 0.3805072843335442, "learning_rate": 3.823562119110624e-07, "loss": 0.305, "step": 7382 }, { "epoch": 1.7573035044921759, "grad_norm": 0.38537738584226183, "learning_rate": 3.816172665375584e-07, "loss": 0.3079, "step": 7383 }, { "epoch": 1.7575415005652406, "grad_norm": 0.37422390675327777, "learning_rate": 3.808790075699814e-07, "loss": 0.3951, "step": 7384 }, { "epoch": 1.7577794966383053, "grad_norm": 0.3857626430504062, "learning_rate": 3.80141435118056e-07, "loss": 0.2866, "step": 7385 }, { "epoch": 1.7580174927113703, "grad_norm": 0.3848693921935248, "learning_rate": 3.794045492914028e-07, "loss": 0.2799, "step": 7386 }, { "epoch": 1.758255488784435, "grad_norm": 0.37773875751263675, "learning_rate": 3.786683501995414e-07, "loss": 0.3419, "step": 7387 }, { "epoch": 1.7584934848575, "grad_norm": 0.39284310713498916, "learning_rate": 3.779328379518898e-07, "loss": 0.3779, "step": 7388 }, { "epoch": 1.7587314809305648, "grad_norm": 0.38708082552128015, "learning_rate": 3.771980126577629e-07, "loss": 0.2952, "step": 7389 }, { "epoch": 1.7589694770036295, "grad_norm": 0.3840167669470753, "learning_rate": 3.7646387442637266e-07, "loss": 0.3068, "step": 7390 }, { "epoch": 1.7592074730766942, "grad_norm": 0.37577064742826666, "learning_rate": 3.757304233668324e-07, "loss": 0.3682, "step": 7391 }, { "epoch": 1.759445469149759, "grad_norm": 0.37383506429697316, "learning_rate": 3.749976595881505e-07, "loss": 0.2946, "step": 7392 }, { "epoch": 1.7596834652228237, "grad_norm": 0.3938798316973006, "learning_rate": 3.742655831992331e-07, "loss": 0.2602, "step": 7393 }, { "epoch": 1.7599214612958887, "grad_norm": 0.382277877373375, "learning_rate": 3.735341943088855e-07, "loss": 0.3399, "step": 7394 }, { "epoch": 1.7601594573689534, "grad_norm": 0.4438405373662082, "learning_rate": 3.7280349302580954e-07, "loss": 0.4037, "step": 7395 }, { "epoch": 1.7603974534420184, "grad_norm": 0.38449221017426694, "learning_rate": 3.720734794586062e-07, "loss": 0.303, "step": 7396 }, { "epoch": 1.760635449515083, "grad_norm": 0.37868139806924034, "learning_rate": 3.7134415371577303e-07, "loss": 0.2986, "step": 7397 }, { "epoch": 1.7608734455881478, "grad_norm": 0.3943383369047064, "learning_rate": 3.7061551590570565e-07, "loss": 0.3536, "step": 7398 }, { "epoch": 1.7611114416612126, "grad_norm": 0.36959772575231714, "learning_rate": 3.698875661366985e-07, "loss": 0.2997, "step": 7399 }, { "epoch": 1.7613494377342773, "grad_norm": 0.40020141349107846, "learning_rate": 3.691603045169417e-07, "loss": 0.2694, "step": 7400 }, { "epoch": 1.761587433807342, "grad_norm": 0.37634863654754686, "learning_rate": 3.684337311545261e-07, "loss": 0.3371, "step": 7401 }, { "epoch": 1.761825429880407, "grad_norm": 0.38956164399358506, "learning_rate": 3.677078461574368e-07, "loss": 0.3591, "step": 7402 }, { "epoch": 1.7620634259534718, "grad_norm": 0.3602522289158944, "learning_rate": 3.6698264963355936e-07, "loss": 0.2936, "step": 7403 }, { "epoch": 1.7623014220265367, "grad_norm": 0.3761002128092343, "learning_rate": 3.662581416906746e-07, "loss": 0.3009, "step": 7404 }, { "epoch": 1.7625394180996015, "grad_norm": 0.4007846365643332, "learning_rate": 3.6553432243646435e-07, "loss": 0.3523, "step": 7405 }, { "epoch": 1.7627774141726662, "grad_norm": 0.35494662132278193, "learning_rate": 3.6481119197850466e-07, "loss": 0.3274, "step": 7406 }, { "epoch": 1.763015410245731, "grad_norm": 0.3790695232145245, "learning_rate": 3.640887504242707e-07, "loss": 0.2875, "step": 7407 }, { "epoch": 1.7632534063187957, "grad_norm": 0.4084614475813453, "learning_rate": 3.6336699788113605e-07, "loss": 0.3147, "step": 7408 }, { "epoch": 1.7634914023918604, "grad_norm": 0.37225641815520183, "learning_rate": 3.626459344563693e-07, "loss": 0.384, "step": 7409 }, { "epoch": 1.7637293984649254, "grad_norm": 0.36150847703005184, "learning_rate": 3.619255602571403e-07, "loss": 0.3092, "step": 7410 }, { "epoch": 1.7639673945379901, "grad_norm": 0.3522846334053475, "learning_rate": 3.612058753905129e-07, "loss": 0.2549, "step": 7411 }, { "epoch": 1.764205390611055, "grad_norm": 0.36618028447488987, "learning_rate": 3.604868799634509e-07, "loss": 0.3265, "step": 7412 }, { "epoch": 1.7644433866841198, "grad_norm": 0.3566376234452962, "learning_rate": 3.597685740828144e-07, "loss": 0.3793, "step": 7413 }, { "epoch": 1.7646813827571846, "grad_norm": 0.36692847158051417, "learning_rate": 3.5905095785536135e-07, "loss": 0.2872, "step": 7414 }, { "epoch": 1.7649193788302493, "grad_norm": 0.37257656439382925, "learning_rate": 3.5833403138774756e-07, "loss": 0.3225, "step": 7415 }, { "epoch": 1.765157374903314, "grad_norm": 0.3749686078408963, "learning_rate": 3.5761779478652614e-07, "loss": 0.3883, "step": 7416 }, { "epoch": 1.7653953709763788, "grad_norm": 0.3583625843405949, "learning_rate": 3.56902248158148e-07, "loss": 0.2977, "step": 7417 }, { "epoch": 1.7656333670494437, "grad_norm": 0.38524990025382316, "learning_rate": 3.5618739160895864e-07, "loss": 0.2877, "step": 7418 }, { "epoch": 1.7658713631225085, "grad_norm": 0.37604720661475144, "learning_rate": 3.55473225245207e-07, "loss": 0.3219, "step": 7419 }, { "epoch": 1.7661093591955734, "grad_norm": 0.3846604314430845, "learning_rate": 3.5475974917303366e-07, "loss": 0.3566, "step": 7420 }, { "epoch": 1.7663473552686382, "grad_norm": 0.3680527964727019, "learning_rate": 3.5404696349847944e-07, "loss": 0.2627, "step": 7421 }, { "epoch": 1.766585351341703, "grad_norm": 0.3991689202195992, "learning_rate": 3.5333486832748176e-07, "loss": 0.2867, "step": 7422 }, { "epoch": 1.7668233474147677, "grad_norm": 0.3999787206661557, "learning_rate": 3.5262346376587544e-07, "loss": 0.3645, "step": 7423 }, { "epoch": 1.7670613434878324, "grad_norm": 0.3757038581644989, "learning_rate": 3.5191274991939306e-07, "loss": 0.3368, "step": 7424 }, { "epoch": 1.7672993395608971, "grad_norm": 0.382050405949238, "learning_rate": 3.512027268936641e-07, "loss": 0.2718, "step": 7425 }, { "epoch": 1.767537335633962, "grad_norm": 0.38279994990044025, "learning_rate": 3.504933947942157e-07, "loss": 0.3359, "step": 7426 }, { "epoch": 1.7677753317070268, "grad_norm": 0.38419625146613884, "learning_rate": 3.4978475372647145e-07, "loss": 0.3792, "step": 7427 }, { "epoch": 1.7680133277800918, "grad_norm": 0.3984627542168908, "learning_rate": 3.4907680379575426e-07, "loss": 0.2835, "step": 7428 }, { "epoch": 1.7682513238531565, "grad_norm": 0.37364849406584333, "learning_rate": 3.4836954510728215e-07, "loss": 0.2711, "step": 7429 }, { "epoch": 1.7684893199262213, "grad_norm": 0.42900751703465184, "learning_rate": 3.476629777661716e-07, "loss": 0.3473, "step": 7430 }, { "epoch": 1.768727315999286, "grad_norm": 0.3800418075868142, "learning_rate": 3.469571018774348e-07, "loss": 0.3456, "step": 7431 }, { "epoch": 1.7689653120723507, "grad_norm": 0.43203134753145855, "learning_rate": 3.462519175459844e-07, "loss": 0.2851, "step": 7432 }, { "epoch": 1.7692033081454155, "grad_norm": 0.40985553269034813, "learning_rate": 3.4554742487662716e-07, "loss": 0.3091, "step": 7433 }, { "epoch": 1.7694413042184804, "grad_norm": 0.38887010959356444, "learning_rate": 3.448436239740682e-07, "loss": 0.3986, "step": 7434 }, { "epoch": 1.7696793002915452, "grad_norm": 0.36915654756950994, "learning_rate": 3.4414051494291e-07, "loss": 0.2855, "step": 7435 }, { "epoch": 1.7699172963646101, "grad_norm": 0.386681678539916, "learning_rate": 3.434380978876517e-07, "loss": 0.3074, "step": 7436 }, { "epoch": 1.7701552924376749, "grad_norm": 0.37941576584294967, "learning_rate": 3.4273637291268926e-07, "loss": 0.3312, "step": 7437 }, { "epoch": 1.7703932885107396, "grad_norm": 0.3603145085548946, "learning_rate": 3.4203534012231753e-07, "loss": 0.3694, "step": 7438 }, { "epoch": 1.7706312845838044, "grad_norm": 0.38237676060980935, "learning_rate": 3.413349996207266e-07, "loss": 0.2842, "step": 7439 }, { "epoch": 1.770869280656869, "grad_norm": 0.39329602306550704, "learning_rate": 3.4063535151200424e-07, "loss": 0.3089, "step": 7440 }, { "epoch": 1.7711072767299338, "grad_norm": 0.3520088960926671, "learning_rate": 3.3993639590013615e-07, "loss": 0.367, "step": 7441 }, { "epoch": 1.7713452728029988, "grad_norm": 0.3549709470925406, "learning_rate": 3.3923813288900376e-07, "loss": 0.303, "step": 7442 }, { "epoch": 1.7715832688760635, "grad_norm": 0.3698693474185596, "learning_rate": 3.3854056258238675e-07, "loss": 0.2703, "step": 7443 }, { "epoch": 1.7718212649491285, "grad_norm": 0.3744847290334765, "learning_rate": 3.378436850839612e-07, "loss": 0.3261, "step": 7444 }, { "epoch": 1.7720592610221932, "grad_norm": 0.3841273561075149, "learning_rate": 3.3714750049729903e-07, "loss": 0.3532, "step": 7445 }, { "epoch": 1.772297257095258, "grad_norm": 0.3673423324794477, "learning_rate": 3.364520089258727e-07, "loss": 0.2856, "step": 7446 }, { "epoch": 1.7725352531683227, "grad_norm": 0.39543112396129604, "learning_rate": 3.3575721047304887e-07, "loss": 0.3264, "step": 7447 }, { "epoch": 1.7727732492413875, "grad_norm": 0.38069356140983673, "learning_rate": 3.350631052420911e-07, "loss": 0.3384, "step": 7448 }, { "epoch": 1.7730112453144522, "grad_norm": 0.39721194025902407, "learning_rate": 3.3436969333616064e-07, "loss": 0.3504, "step": 7449 }, { "epoch": 1.7732492413875172, "grad_norm": 0.41294146060598996, "learning_rate": 3.3367697485831573e-07, "loss": 0.309, "step": 7450 }, { "epoch": 1.773487237460582, "grad_norm": 0.4004419253097933, "learning_rate": 3.3298494991151234e-07, "loss": 0.3409, "step": 7451 }, { "epoch": 1.7737252335336469, "grad_norm": 0.3787521305790956, "learning_rate": 3.322936185986009e-07, "loss": 0.4034, "step": 7452 }, { "epoch": 1.7739632296067116, "grad_norm": 0.36357233108658643, "learning_rate": 3.31602981022332e-07, "loss": 0.2957, "step": 7453 }, { "epoch": 1.7742012256797763, "grad_norm": 0.3702141009628778, "learning_rate": 3.309130372853492e-07, "loss": 0.2788, "step": 7454 }, { "epoch": 1.774439221752841, "grad_norm": 0.41114542834457846, "learning_rate": 3.302237874901981e-07, "loss": 0.3846, "step": 7455 }, { "epoch": 1.7746772178259058, "grad_norm": 0.35691967017327136, "learning_rate": 3.2953523173931633e-07, "loss": 0.353, "step": 7456 }, { "epoch": 1.7749152138989706, "grad_norm": 0.3949364534978649, "learning_rate": 3.2884737013504143e-07, "loss": 0.2854, "step": 7457 }, { "epoch": 1.7751532099720353, "grad_norm": 0.38350872066977443, "learning_rate": 3.2816020277960604e-07, "loss": 0.3232, "step": 7458 }, { "epoch": 1.7753912060451003, "grad_norm": 0.4021154125147116, "learning_rate": 3.2747372977513905e-07, "loss": 0.3592, "step": 7459 }, { "epoch": 1.775629202118165, "grad_norm": 0.3879804363704007, "learning_rate": 3.2678795122366933e-07, "loss": 0.2945, "step": 7460 }, { "epoch": 1.77586719819123, "grad_norm": 0.4237627394310612, "learning_rate": 3.2610286722711993e-07, "loss": 0.2732, "step": 7461 }, { "epoch": 1.7761051942642947, "grad_norm": 0.3924917993001975, "learning_rate": 3.2541847788731153e-07, "loss": 0.3461, "step": 7462 }, { "epoch": 1.7763431903373594, "grad_norm": 0.3627340154994653, "learning_rate": 3.2473478330595996e-07, "loss": 0.3414, "step": 7463 }, { "epoch": 1.7765811864104242, "grad_norm": 0.3879803415205633, "learning_rate": 3.240517835846807e-07, "loss": 0.2865, "step": 7464 }, { "epoch": 1.776819182483489, "grad_norm": 0.36721296027414413, "learning_rate": 3.233694788249836e-07, "loss": 0.3411, "step": 7465 }, { "epoch": 1.7770571785565537, "grad_norm": 0.37560707117027603, "learning_rate": 3.2268786912827645e-07, "loss": 0.3818, "step": 7466 }, { "epoch": 1.7772951746296186, "grad_norm": 0.37942928089496036, "learning_rate": 3.220069545958632e-07, "loss": 0.3262, "step": 7467 }, { "epoch": 1.7775331707026834, "grad_norm": 0.35866524083415724, "learning_rate": 3.2132673532894397e-07, "loss": 0.2738, "step": 7468 }, { "epoch": 1.7777711667757483, "grad_norm": 0.4579563149174641, "learning_rate": 3.206472114286169e-07, "loss": 0.3463, "step": 7469 }, { "epoch": 1.778009162848813, "grad_norm": 0.374846770314416, "learning_rate": 3.1996838299587604e-07, "loss": 0.369, "step": 7470 }, { "epoch": 1.7782471589218778, "grad_norm": 0.3575595417175333, "learning_rate": 3.192902501316114e-07, "loss": 0.322, "step": 7471 }, { "epoch": 1.7784851549949425, "grad_norm": 0.3793283868853329, "learning_rate": 3.186128129366112e-07, "loss": 0.2936, "step": 7472 }, { "epoch": 1.7787231510680073, "grad_norm": 0.4086026355069854, "learning_rate": 3.179360715115576e-07, "loss": 0.355, "step": 7473 }, { "epoch": 1.778961147141072, "grad_norm": 0.35603960063048445, "learning_rate": 3.172600259570335e-07, "loss": 0.3446, "step": 7474 }, { "epoch": 1.779199143214137, "grad_norm": 0.3594124787406389, "learning_rate": 3.165846763735153e-07, "loss": 0.2763, "step": 7475 }, { "epoch": 1.7794371392872017, "grad_norm": 0.35661873853503745, "learning_rate": 3.1591002286137597e-07, "loss": 0.3205, "step": 7476 }, { "epoch": 1.7796751353602667, "grad_norm": 0.37799283767267144, "learning_rate": 3.152360655208864e-07, "loss": 0.3899, "step": 7477 }, { "epoch": 1.7799131314333314, "grad_norm": 0.3943463609437691, "learning_rate": 3.1456280445221256e-07, "loss": 0.3142, "step": 7478 }, { "epoch": 1.7801511275063961, "grad_norm": 0.3930479973881174, "learning_rate": 3.138902397554183e-07, "loss": 0.2794, "step": 7479 }, { "epoch": 1.7803891235794609, "grad_norm": 0.37084479307111856, "learning_rate": 3.132183715304632e-07, "loss": 0.3584, "step": 7480 }, { "epoch": 1.7806271196525256, "grad_norm": 0.3640689537300512, "learning_rate": 3.125471998772023e-07, "loss": 0.3562, "step": 7481 }, { "epoch": 1.7808651157255904, "grad_norm": 0.3798227659828128, "learning_rate": 3.118767248953908e-07, "loss": 0.2747, "step": 7482 }, { "epoch": 1.7811031117986553, "grad_norm": 0.39461973353159785, "learning_rate": 3.112069466846762e-07, "loss": 0.3147, "step": 7483 }, { "epoch": 1.78134110787172, "grad_norm": 0.39876904022229337, "learning_rate": 3.105378653446045e-07, "loss": 0.4053, "step": 7484 }, { "epoch": 1.781579103944785, "grad_norm": 0.363220255107949, "learning_rate": 3.098694809746183e-07, "loss": 0.3208, "step": 7485 }, { "epoch": 1.7818171000178498, "grad_norm": 0.4052186140806434, "learning_rate": 3.092017936740549e-07, "loss": 0.2928, "step": 7486 }, { "epoch": 1.7820550960909145, "grad_norm": 0.3622874186549825, "learning_rate": 3.085348035421487e-07, "loss": 0.3231, "step": 7487 }, { "epoch": 1.7822930921639792, "grad_norm": 0.39892992338775146, "learning_rate": 3.0786851067803326e-07, "loss": 0.3581, "step": 7488 }, { "epoch": 1.782531088237044, "grad_norm": 0.4289428994215822, "learning_rate": 3.0720291518073485e-07, "loss": 0.2747, "step": 7489 }, { "epoch": 1.7827690843101087, "grad_norm": 0.4117645952035017, "learning_rate": 3.065380171491772e-07, "loss": 0.3095, "step": 7490 }, { "epoch": 1.7830070803831737, "grad_norm": 0.3635531180905779, "learning_rate": 3.0587381668218117e-07, "loss": 0.3608, "step": 7491 }, { "epoch": 1.7832450764562384, "grad_norm": 0.3678683816926744, "learning_rate": 3.052103138784629e-07, "loss": 0.3101, "step": 7492 }, { "epoch": 1.7834830725293034, "grad_norm": 0.3893055958544627, "learning_rate": 3.045475088366351e-07, "loss": 0.2619, "step": 7493 }, { "epoch": 1.7837210686023681, "grad_norm": 0.3745763428587718, "learning_rate": 3.038854016552079e-07, "loss": 0.3315, "step": 7494 }, { "epoch": 1.7839590646754329, "grad_norm": 0.3797581457610855, "learning_rate": 3.0322399243258583e-07, "loss": 0.3711, "step": 7495 }, { "epoch": 1.7841970607484976, "grad_norm": 0.39193763190558745, "learning_rate": 3.0256328126707147e-07, "loss": 0.3174, "step": 7496 }, { "epoch": 1.7844350568215623, "grad_norm": 0.3855942012320072, "learning_rate": 3.0190326825686234e-07, "loss": 0.3247, "step": 7497 }, { "epoch": 1.784673052894627, "grad_norm": 0.3719708597803907, "learning_rate": 3.012439535000533e-07, "loss": 0.3487, "step": 7498 }, { "epoch": 1.784911048967692, "grad_norm": 0.3389285568386847, "learning_rate": 3.005853370946338e-07, "loss": 0.3018, "step": 7499 }, { "epoch": 1.7851490450407568, "grad_norm": 0.3686403079627146, "learning_rate": 2.9992741913849044e-07, "loss": 0.2694, "step": 7500 }, { "epoch": 1.7853870411138217, "grad_norm": 0.3739646758479323, "learning_rate": 2.9927019972940785e-07, "loss": 0.3262, "step": 7501 }, { "epoch": 1.7856250371868865, "grad_norm": 0.3733801667577494, "learning_rate": 2.9861367896506397e-07, "loss": 0.3706, "step": 7502 }, { "epoch": 1.7858630332599512, "grad_norm": 0.3896825661654586, "learning_rate": 2.9795785694303413e-07, "loss": 0.3129, "step": 7503 }, { "epoch": 1.786101029333016, "grad_norm": 0.41296329329174347, "learning_rate": 2.9730273376078923e-07, "loss": 0.2796, "step": 7504 }, { "epoch": 1.7863390254060807, "grad_norm": 0.3719130039198455, "learning_rate": 2.9664830951569743e-07, "loss": 0.3465, "step": 7505 }, { "epoch": 1.7865770214791454, "grad_norm": 0.3562312311470605, "learning_rate": 2.959945843050227e-07, "loss": 0.3219, "step": 7506 }, { "epoch": 1.7868150175522104, "grad_norm": 0.38540030460210417, "learning_rate": 2.9534155822592336e-07, "loss": 0.2877, "step": 7507 }, { "epoch": 1.7870530136252751, "grad_norm": 0.378036461217732, "learning_rate": 2.9468923137545626e-07, "loss": 0.3406, "step": 7508 }, { "epoch": 1.78729100969834, "grad_norm": 0.402519853904251, "learning_rate": 2.940376038505732e-07, "loss": 0.3822, "step": 7509 }, { "epoch": 1.7875290057714048, "grad_norm": 0.37522413264398013, "learning_rate": 2.933866757481224e-07, "loss": 0.3143, "step": 7510 }, { "epoch": 1.7877670018444696, "grad_norm": 0.3791804024356112, "learning_rate": 2.9273644716484753e-07, "loss": 0.2764, "step": 7511 }, { "epoch": 1.7880049979175343, "grad_norm": 0.40095271597833637, "learning_rate": 2.9208691819738844e-07, "loss": 0.3297, "step": 7512 }, { "epoch": 1.788242993990599, "grad_norm": 0.39865047915199964, "learning_rate": 2.914380889422819e-07, "loss": 0.3583, "step": 7513 }, { "epoch": 1.7884809900636638, "grad_norm": 0.35173957915577636, "learning_rate": 2.9078995949595847e-07, "loss": 0.289, "step": 7514 }, { "epoch": 1.7887189861367288, "grad_norm": 0.3817357829174564, "learning_rate": 2.901425299547483e-07, "loss": 0.3104, "step": 7515 }, { "epoch": 1.7889569822097935, "grad_norm": 0.37478155473085156, "learning_rate": 2.8949580041487457e-07, "loss": 0.391, "step": 7516 }, { "epoch": 1.7891949782828585, "grad_norm": 0.3725958003240469, "learning_rate": 2.8884977097245694e-07, "loss": 0.3156, "step": 7517 }, { "epoch": 1.7894329743559232, "grad_norm": 0.3801605189515548, "learning_rate": 2.8820444172351137e-07, "loss": 0.281, "step": 7518 }, { "epoch": 1.789670970428988, "grad_norm": 0.3623288254644941, "learning_rate": 2.8755981276395005e-07, "loss": 0.3224, "step": 7519 }, { "epoch": 1.7899089665020527, "grad_norm": 0.3776619119078162, "learning_rate": 2.869158841895808e-07, "loss": 0.3689, "step": 7520 }, { "epoch": 1.7901469625751174, "grad_norm": 0.35832837886741215, "learning_rate": 2.862726560961071e-07, "loss": 0.2744, "step": 7521 }, { "epoch": 1.7903849586481821, "grad_norm": 0.38760652888202674, "learning_rate": 2.8563012857912906e-07, "loss": 0.303, "step": 7522 }, { "epoch": 1.790622954721247, "grad_norm": 0.3754292098719095, "learning_rate": 2.849883017341415e-07, "loss": 0.3525, "step": 7523 }, { "epoch": 1.7908609507943118, "grad_norm": 0.3492865594689972, "learning_rate": 2.8434717565653635e-07, "loss": 0.337, "step": 7524 }, { "epoch": 1.7910989468673768, "grad_norm": 0.3666136541523488, "learning_rate": 2.837067504416002e-07, "loss": 0.3009, "step": 7525 }, { "epoch": 1.7913369429404415, "grad_norm": 0.37743641568773106, "learning_rate": 2.830670261845164e-07, "loss": 0.3058, "step": 7526 }, { "epoch": 1.7915749390135063, "grad_norm": 0.4041150603093485, "learning_rate": 2.8242800298036443e-07, "loss": 0.3588, "step": 7527 }, { "epoch": 1.791812935086571, "grad_norm": 0.3678697803590038, "learning_rate": 2.8178968092411717e-07, "loss": 0.2898, "step": 7528 }, { "epoch": 1.7920509311596358, "grad_norm": 0.40176844938042455, "learning_rate": 2.8115206011064655e-07, "loss": 0.2558, "step": 7529 }, { "epoch": 1.7922889272327005, "grad_norm": 0.3917439000088701, "learning_rate": 2.80515140634719e-07, "loss": 0.3533, "step": 7530 }, { "epoch": 1.7925269233057655, "grad_norm": 0.36028134434707115, "learning_rate": 2.798789225909959e-07, "loss": 0.3531, "step": 7531 }, { "epoch": 1.7927649193788302, "grad_norm": 0.37200867528731646, "learning_rate": 2.792434060740351e-07, "loss": 0.2834, "step": 7532 }, { "epoch": 1.7930029154518952, "grad_norm": 0.40052191062969406, "learning_rate": 2.7860859117828985e-07, "loss": 0.3288, "step": 7533 }, { "epoch": 1.79324091152496, "grad_norm": 0.36692664516075085, "learning_rate": 2.779744779981097e-07, "loss": 0.4008, "step": 7534 }, { "epoch": 1.7934789075980246, "grad_norm": 0.37862040939536806, "learning_rate": 2.773410666277382e-07, "loss": 0.3141, "step": 7535 }, { "epoch": 1.7937169036710894, "grad_norm": 0.4291885543813882, "learning_rate": 2.767083571613183e-07, "loss": 0.2753, "step": 7536 }, { "epoch": 1.7939548997441541, "grad_norm": 0.3603867639177607, "learning_rate": 2.7607634969288535e-07, "loss": 0.3465, "step": 7537 }, { "epoch": 1.7941928958172189, "grad_norm": 0.3909700151084424, "learning_rate": 2.7544504431637085e-07, "loss": 0.4081, "step": 7538 }, { "epoch": 1.7944308918902838, "grad_norm": 0.37728790668800677, "learning_rate": 2.748144411256026e-07, "loss": 0.2785, "step": 7539 }, { "epoch": 1.7946688879633486, "grad_norm": 0.36294912722499145, "learning_rate": 2.74184540214304e-07, "loss": 0.3051, "step": 7540 }, { "epoch": 1.7949068840364135, "grad_norm": 0.35721883529172505, "learning_rate": 2.7355534167609334e-07, "loss": 0.3684, "step": 7541 }, { "epoch": 1.7951448801094783, "grad_norm": 0.3554032641789872, "learning_rate": 2.7292684560448537e-07, "loss": 0.2988, "step": 7542 }, { "epoch": 1.795382876182543, "grad_norm": 0.43060025594932566, "learning_rate": 2.722990520928903e-07, "loss": 0.3254, "step": 7543 }, { "epoch": 1.7956208722556077, "grad_norm": 0.380455936842252, "learning_rate": 2.716719612346147e-07, "loss": 0.3326, "step": 7544 }, { "epoch": 1.7958588683286725, "grad_norm": 0.4177701326166782, "learning_rate": 2.7104557312285786e-07, "loss": 0.3889, "step": 7545 }, { "epoch": 1.7960968644017372, "grad_norm": 0.3649041520875534, "learning_rate": 2.7041988785071804e-07, "loss": 0.2986, "step": 7546 }, { "epoch": 1.7963348604748022, "grad_norm": 0.39864708497247475, "learning_rate": 2.697949055111876e-07, "loss": 0.2909, "step": 7547 }, { "epoch": 1.796572856547867, "grad_norm": 0.380155366498838, "learning_rate": 2.691706261971533e-07, "loss": 0.363, "step": 7548 }, { "epoch": 1.7968108526209319, "grad_norm": 0.3973304332381195, "learning_rate": 2.685470500013987e-07, "loss": 0.3397, "step": 7549 }, { "epoch": 1.7970488486939966, "grad_norm": 0.3718159670234423, "learning_rate": 2.679241770166036e-07, "loss": 0.2915, "step": 7550 }, { "epoch": 1.7972868447670614, "grad_norm": 0.43325048967411206, "learning_rate": 2.673020073353411e-07, "loss": 0.3344, "step": 7551 }, { "epoch": 1.797524840840126, "grad_norm": 0.4179971224778607, "learning_rate": 2.666805410500822e-07, "loss": 0.4318, "step": 7552 }, { "epoch": 1.7977628369131908, "grad_norm": 0.37436449309549835, "learning_rate": 2.6605977825319094e-07, "loss": 0.301, "step": 7553 }, { "epoch": 1.7980008329862556, "grad_norm": 0.407183373159236, "learning_rate": 2.6543971903692954e-07, "loss": 0.2842, "step": 7554 }, { "epoch": 1.7982388290593205, "grad_norm": 0.47155428570797986, "learning_rate": 2.6482036349345265e-07, "loss": 0.3289, "step": 7555 }, { "epoch": 1.7984768251323853, "grad_norm": 0.3675704624500037, "learning_rate": 2.642017117148116e-07, "loss": 0.3144, "step": 7556 }, { "epoch": 1.7987148212054502, "grad_norm": 0.3654773621312083, "learning_rate": 2.63583763792955e-07, "loss": 0.2829, "step": 7557 }, { "epoch": 1.798952817278515, "grad_norm": 0.38467306352137903, "learning_rate": 2.629665198197251e-07, "loss": 0.3096, "step": 7558 }, { "epoch": 1.7991908133515797, "grad_norm": 0.3771099539638736, "learning_rate": 2.623499798868584e-07, "loss": 0.3867, "step": 7559 }, { "epoch": 1.7994288094246444, "grad_norm": 0.38421855757957385, "learning_rate": 2.617341440859883e-07, "loss": 0.2757, "step": 7560 }, { "epoch": 1.7996668054977092, "grad_norm": 0.36935340044670584, "learning_rate": 2.6111901250864325e-07, "loss": 0.267, "step": 7561 }, { "epoch": 1.799904801570774, "grad_norm": 0.3520631923142841, "learning_rate": 2.6050458524624735e-07, "loss": 0.3649, "step": 7562 }, { "epoch": 1.8001427976438389, "grad_norm": 0.3642419618021161, "learning_rate": 2.5989086239011975e-07, "loss": 0.3463, "step": 7563 }, { "epoch": 1.8003807937169036, "grad_norm": 0.3716992913649046, "learning_rate": 2.5927784403147473e-07, "loss": 0.2595, "step": 7564 }, { "epoch": 1.8006187897899686, "grad_norm": 0.37707110768373664, "learning_rate": 2.586655302614216e-07, "loss": 0.3036, "step": 7565 }, { "epoch": 1.8008567858630333, "grad_norm": 0.3722945299659591, "learning_rate": 2.5805392117096597e-07, "loss": 0.3856, "step": 7566 }, { "epoch": 1.801094781936098, "grad_norm": 0.37881870157045106, "learning_rate": 2.5744301685100727e-07, "loss": 0.3017, "step": 7567 }, { "epoch": 1.8013327780091628, "grad_norm": 0.3840903049457169, "learning_rate": 2.5683281739234233e-07, "loss": 0.2878, "step": 7568 }, { "epoch": 1.8015707740822275, "grad_norm": 0.3737271689190538, "learning_rate": 2.5622332288565975e-07, "loss": 0.3099, "step": 7569 }, { "epoch": 1.8018087701552923, "grad_norm": 0.410673409572434, "learning_rate": 2.5561453342154763e-07, "loss": 0.3553, "step": 7570 }, { "epoch": 1.8020467662283572, "grad_norm": 0.3897577800417089, "learning_rate": 2.5500644909048577e-07, "loss": 0.2892, "step": 7571 }, { "epoch": 1.802284762301422, "grad_norm": 0.3795777307796096, "learning_rate": 2.543990699828519e-07, "loss": 0.304, "step": 7572 }, { "epoch": 1.802522758374487, "grad_norm": 0.3926970531258347, "learning_rate": 2.5379239618891604e-07, "loss": 0.3838, "step": 7573 }, { "epoch": 1.8027607544475517, "grad_norm": 0.35843032279557285, "learning_rate": 2.5318642779884605e-07, "loss": 0.3366, "step": 7574 }, { "epoch": 1.8029987505206164, "grad_norm": 0.3687701099994784, "learning_rate": 2.525811649027032e-07, "loss": 0.3038, "step": 7575 }, { "epoch": 1.8032367465936812, "grad_norm": 0.3906979385647688, "learning_rate": 2.5197660759044505e-07, "loss": 0.3117, "step": 7576 }, { "epoch": 1.803474742666746, "grad_norm": 0.434483976268019, "learning_rate": 2.51372755951923e-07, "loss": 0.382, "step": 7577 }, { "epoch": 1.8037127387398106, "grad_norm": 0.3842828684371924, "learning_rate": 2.5076961007688526e-07, "loss": 0.2864, "step": 7578 }, { "epoch": 1.8039507348128756, "grad_norm": 0.42535367755859443, "learning_rate": 2.5016717005497347e-07, "loss": 0.2755, "step": 7579 }, { "epoch": 1.8041887308859403, "grad_norm": 0.376588372683319, "learning_rate": 2.4956543597572546e-07, "loss": 0.3643, "step": 7580 }, { "epoch": 1.8044267269590053, "grad_norm": 0.3587861059882926, "learning_rate": 2.4896440792857355e-07, "loss": 0.3672, "step": 7581 }, { "epoch": 1.80466472303207, "grad_norm": 0.40501079901398007, "learning_rate": 2.483640860028458e-07, "loss": 0.2751, "step": 7582 }, { "epoch": 1.8049027191051348, "grad_norm": 0.3908238593281349, "learning_rate": 2.4776447028776404e-07, "loss": 0.3438, "step": 7583 }, { "epoch": 1.8051407151781995, "grad_norm": 0.4008384905708165, "learning_rate": 2.4716556087244716e-07, "loss": 0.3841, "step": 7584 }, { "epoch": 1.8053787112512643, "grad_norm": 0.363703944202256, "learning_rate": 2.465673578459077e-07, "loss": 0.2752, "step": 7585 }, { "epoch": 1.805616707324329, "grad_norm": 0.40343185473148424, "learning_rate": 2.459698612970529e-07, "loss": 0.2837, "step": 7586 }, { "epoch": 1.805854703397394, "grad_norm": 0.35789641416139295, "learning_rate": 2.4537307131468566e-07, "loss": 0.3523, "step": 7587 }, { "epoch": 1.8060926994704587, "grad_norm": 0.3926977742362301, "learning_rate": 2.447769879875039e-07, "loss": 0.4072, "step": 7588 }, { "epoch": 1.8063306955435237, "grad_norm": 0.3757795347751587, "learning_rate": 2.441816114040996e-07, "loss": 0.2978, "step": 7589 }, { "epoch": 1.8065686916165884, "grad_norm": 0.3829503186037233, "learning_rate": 2.435869416529618e-07, "loss": 0.3131, "step": 7590 }, { "epoch": 1.8068066876896531, "grad_norm": 0.41211846502557, "learning_rate": 2.429929788224722e-07, "loss": 0.3494, "step": 7591 }, { "epoch": 1.8070446837627179, "grad_norm": 0.37268683629691657, "learning_rate": 2.4239972300090897e-07, "loss": 0.2897, "step": 7592 }, { "epoch": 1.8072826798357826, "grad_norm": 0.3841386899279831, "learning_rate": 2.418071742764444e-07, "loss": 0.2877, "step": 7593 }, { "epoch": 1.8075206759088474, "grad_norm": 0.36351385879987314, "learning_rate": 2.4121533273714524e-07, "loss": 0.3227, "step": 7594 }, { "epoch": 1.8077586719819123, "grad_norm": 0.37194110953788245, "learning_rate": 2.4062419847097507e-07, "loss": 0.3593, "step": 7595 }, { "epoch": 1.807996668054977, "grad_norm": 0.3905909596183342, "learning_rate": 2.4003377156578967e-07, "loss": 0.294, "step": 7596 }, { "epoch": 1.808234664128042, "grad_norm": 0.37292298295629817, "learning_rate": 2.3944405210934106e-07, "loss": 0.3196, "step": 7597 }, { "epoch": 1.8084726602011068, "grad_norm": 0.3933254447982632, "learning_rate": 2.388550401892775e-07, "loss": 0.3323, "step": 7598 }, { "epoch": 1.8087106562741715, "grad_norm": 0.3710230013969392, "learning_rate": 2.382667358931401e-07, "loss": 0.3092, "step": 7599 }, { "epoch": 1.8089486523472362, "grad_norm": 0.3914008246491944, "learning_rate": 2.3767913930836552e-07, "loss": 0.2787, "step": 7600 }, { "epoch": 1.809186648420301, "grad_norm": 0.402510376468686, "learning_rate": 2.37092250522285e-07, "loss": 0.3047, "step": 7601 }, { "epoch": 1.8094246444933657, "grad_norm": 0.3923964917222654, "learning_rate": 2.3650606962212442e-07, "loss": 0.3777, "step": 7602 }, { "epoch": 1.8096626405664307, "grad_norm": 0.390152017290336, "learning_rate": 2.3592059669500512e-07, "loss": 0.2928, "step": 7603 }, { "epoch": 1.8099006366394954, "grad_norm": 0.39619044275128396, "learning_rate": 2.353358318279425e-07, "loss": 0.2758, "step": 7604 }, { "epoch": 1.8101386327125604, "grad_norm": 0.3778971109266139, "learning_rate": 2.347517751078482e-07, "loss": 0.373, "step": 7605 }, { "epoch": 1.810376628785625, "grad_norm": 0.41970787322500813, "learning_rate": 2.3416842662152606e-07, "loss": 0.3298, "step": 7606 }, { "epoch": 1.8106146248586898, "grad_norm": 0.37056563319434105, "learning_rate": 2.3358578645567676e-07, "loss": 0.276, "step": 7607 }, { "epoch": 1.8108526209317546, "grad_norm": 0.4059835978022746, "learning_rate": 2.3300385469689491e-07, "loss": 0.32, "step": 7608 }, { "epoch": 1.8110906170048193, "grad_norm": 0.3951261910417104, "learning_rate": 2.324226314316702e-07, "loss": 0.3878, "step": 7609 }, { "epoch": 1.811328613077884, "grad_norm": 0.3525934295874246, "learning_rate": 2.318421167463869e-07, "loss": 0.2804, "step": 7610 }, { "epoch": 1.811566609150949, "grad_norm": 0.4037839992260597, "learning_rate": 2.3126231072732264e-07, "loss": 0.3023, "step": 7611 }, { "epoch": 1.8118046052240138, "grad_norm": 0.3608118803223138, "learning_rate": 2.3068321346065236e-07, "loss": 0.3508, "step": 7612 }, { "epoch": 1.8120426012970787, "grad_norm": 0.3707655208185674, "learning_rate": 2.3010482503244447e-07, "loss": 0.3564, "step": 7613 }, { "epoch": 1.8122805973701435, "grad_norm": 0.3741317976188311, "learning_rate": 2.2952714552866017e-07, "loss": 0.2794, "step": 7614 }, { "epoch": 1.8125185934432082, "grad_norm": 0.39603813001981003, "learning_rate": 2.2895017503515859e-07, "loss": 0.2905, "step": 7615 }, { "epoch": 1.812756589516273, "grad_norm": 0.3998027621001329, "learning_rate": 2.2837391363769e-07, "loss": 0.3547, "step": 7616 }, { "epoch": 1.8129945855893377, "grad_norm": 0.37531093259849246, "learning_rate": 2.2779836142190314e-07, "loss": 0.3318, "step": 7617 }, { "epoch": 1.8132325816624024, "grad_norm": 0.3743872139458294, "learning_rate": 2.2722351847333844e-07, "loss": 0.2864, "step": 7618 }, { "epoch": 1.8134705777354674, "grad_norm": 0.3736656227074288, "learning_rate": 2.266493848774315e-07, "loss": 0.3149, "step": 7619 }, { "epoch": 1.8137085738085321, "grad_norm": 0.3917461855932985, "learning_rate": 2.2607596071951288e-07, "loss": 0.3824, "step": 7620 }, { "epoch": 1.813946569881597, "grad_norm": 0.38346753454473853, "learning_rate": 2.255032460848078e-07, "loss": 0.2818, "step": 7621 }, { "epoch": 1.8141845659546618, "grad_norm": 0.38249166251681005, "learning_rate": 2.2493124105843534e-07, "loss": 0.2997, "step": 7622 }, { "epoch": 1.8144225620277266, "grad_norm": 0.39871403476731015, "learning_rate": 2.243599457254103e-07, "loss": 0.3529, "step": 7623 }, { "epoch": 1.8146605581007913, "grad_norm": 0.38237212611132454, "learning_rate": 2.2378936017064035e-07, "loss": 0.3541, "step": 7624 }, { "epoch": 1.814898554173856, "grad_norm": 0.4176523388972532, "learning_rate": 2.2321948447892984e-07, "loss": 0.2793, "step": 7625 }, { "epoch": 1.8151365502469208, "grad_norm": 0.3765532620043632, "learning_rate": 2.22650318734976e-07, "loss": 0.3217, "step": 7626 }, { "epoch": 1.8153745463199857, "grad_norm": 0.5502986454158951, "learning_rate": 2.2208186302337064e-07, "loss": 0.363, "step": 7627 }, { "epoch": 1.8156125423930505, "grad_norm": 0.3691080359982276, "learning_rate": 2.2151411742860008e-07, "loss": 0.3043, "step": 7628 }, { "epoch": 1.8158505384661154, "grad_norm": 0.4185986270949634, "learning_rate": 2.2094708203504623e-07, "loss": 0.3095, "step": 7629 }, { "epoch": 1.8160885345391802, "grad_norm": 0.39746628135216966, "learning_rate": 2.2038075692698392e-07, "loss": 0.3555, "step": 7630 }, { "epoch": 1.816326530612245, "grad_norm": 0.38678877865572725, "learning_rate": 2.1981514218858302e-07, "loss": 0.3517, "step": 7631 }, { "epoch": 1.8165645266853097, "grad_norm": 0.3839615523300972, "learning_rate": 2.1925023790390797e-07, "loss": 0.2845, "step": 7632 }, { "epoch": 1.8168025227583744, "grad_norm": 0.3969919578339475, "learning_rate": 2.1868604415691775e-07, "loss": 0.3163, "step": 7633 }, { "epoch": 1.8170405188314391, "grad_norm": 0.40042063644372505, "learning_rate": 2.1812256103146523e-07, "loss": 0.3821, "step": 7634 }, { "epoch": 1.817278514904504, "grad_norm": 0.3595786143418067, "learning_rate": 2.1755978861129846e-07, "loss": 0.2926, "step": 7635 }, { "epoch": 1.8175165109775688, "grad_norm": 0.4000391772571078, "learning_rate": 2.1699772698005884e-07, "loss": 0.2917, "step": 7636 }, { "epoch": 1.8177545070506338, "grad_norm": 0.39209491326042045, "learning_rate": 2.164363762212829e-07, "loss": 0.3798, "step": 7637 }, { "epoch": 1.8179925031236985, "grad_norm": 0.36009581505557847, "learning_rate": 2.1587573641839999e-07, "loss": 0.3549, "step": 7638 }, { "epoch": 1.8182304991967633, "grad_norm": 0.3891544552915791, "learning_rate": 2.1531580765473737e-07, "loss": 0.2979, "step": 7639 }, { "epoch": 1.818468495269828, "grad_norm": 0.34833762751724157, "learning_rate": 2.147565900135129e-07, "loss": 0.3132, "step": 7640 }, { "epoch": 1.8187064913428928, "grad_norm": 0.4102701100568015, "learning_rate": 2.141980835778401e-07, "loss": 0.3732, "step": 7641 }, { "epoch": 1.8189444874159575, "grad_norm": 0.374891996884048, "learning_rate": 2.136402884307276e-07, "loss": 0.3313, "step": 7642 }, { "epoch": 1.8191824834890225, "grad_norm": 0.36912237819842353, "learning_rate": 2.130832046550757e-07, "loss": 0.3006, "step": 7643 }, { "epoch": 1.8194204795620872, "grad_norm": 0.4056734928284316, "learning_rate": 2.1252683233368377e-07, "loss": 0.3093, "step": 7644 }, { "epoch": 1.8196584756351522, "grad_norm": 0.37038662624715685, "learning_rate": 2.1197117154924006e-07, "loss": 0.3657, "step": 7645 }, { "epoch": 1.819896471708217, "grad_norm": 0.3710751679952538, "learning_rate": 2.114162223843308e-07, "loss": 0.2794, "step": 7646 }, { "epoch": 1.8201344677812816, "grad_norm": 0.3778232887871572, "learning_rate": 2.1086198492143494e-07, "loss": 0.2793, "step": 7647 }, { "epoch": 1.8203724638543464, "grad_norm": 0.41311741726892776, "learning_rate": 2.1030845924292553e-07, "loss": 0.3549, "step": 7648 }, { "epoch": 1.820610459927411, "grad_norm": 0.3539492859309911, "learning_rate": 2.0975564543107007e-07, "loss": 0.3133, "step": 7649 }, { "epoch": 1.8208484560004758, "grad_norm": 0.3918727984124256, "learning_rate": 2.0920354356803118e-07, "loss": 0.282, "step": 7650 }, { "epoch": 1.8210864520735408, "grad_norm": 0.3829591506819489, "learning_rate": 2.0865215373586377e-07, "loss": 0.3235, "step": 7651 }, { "epoch": 1.8213244481466055, "grad_norm": 0.3848423040694204, "learning_rate": 2.081014760165184e-07, "loss": 0.3783, "step": 7652 }, { "epoch": 1.8215624442196705, "grad_norm": 0.3682491320220431, "learning_rate": 2.0755151049183963e-07, "loss": 0.2923, "step": 7653 }, { "epoch": 1.8218004402927352, "grad_norm": 0.3710419857253826, "learning_rate": 2.070022572435665e-07, "loss": 0.2838, "step": 7654 }, { "epoch": 1.8220384363658, "grad_norm": 0.3940817637494536, "learning_rate": 2.0645371635333032e-07, "loss": 0.3577, "step": 7655 }, { "epoch": 1.8222764324388647, "grad_norm": 0.34803178778167626, "learning_rate": 2.0590588790265874e-07, "loss": 0.3054, "step": 7656 }, { "epoch": 1.8225144285119295, "grad_norm": 0.3559915393103781, "learning_rate": 2.0535877197297271e-07, "loss": 0.285, "step": 7657 }, { "epoch": 1.8227524245849942, "grad_norm": 0.3952700916510172, "learning_rate": 2.0481236864558663e-07, "loss": 0.3013, "step": 7658 }, { "epoch": 1.8229904206580592, "grad_norm": 0.37451269015209115, "learning_rate": 2.0426667800170996e-07, "loss": 0.3538, "step": 7659 }, { "epoch": 1.823228416731124, "grad_norm": 0.37289422275429907, "learning_rate": 2.0372170012244563e-07, "loss": 0.3015, "step": 7660 }, { "epoch": 1.8234664128041889, "grad_norm": 0.3849495025851358, "learning_rate": 2.031774350887905e-07, "loss": 0.2954, "step": 7661 }, { "epoch": 1.8237044088772536, "grad_norm": 0.3772895053852847, "learning_rate": 2.0263388298163655e-07, "loss": 0.3223, "step": 7662 }, { "epoch": 1.8239424049503183, "grad_norm": 0.35964348698217047, "learning_rate": 2.0209104388176858e-07, "loss": 0.3607, "step": 7663 }, { "epoch": 1.824180401023383, "grad_norm": 0.37541695568154915, "learning_rate": 2.0154891786986595e-07, "loss": 0.2967, "step": 7664 }, { "epoch": 1.8244183970964478, "grad_norm": 0.3958066722757771, "learning_rate": 2.0100750502650258e-07, "loss": 0.3289, "step": 7665 }, { "epoch": 1.8246563931695126, "grad_norm": 0.4131768602497947, "learning_rate": 2.0046680543214403e-07, "loss": 0.3372, "step": 7666 }, { "epoch": 1.8248943892425775, "grad_norm": 0.42405294941130334, "learning_rate": 1.9992681916715385e-07, "loss": 0.3229, "step": 7667 }, { "epoch": 1.8251323853156423, "grad_norm": 0.40593441902537913, "learning_rate": 1.993875463117867e-07, "loss": 0.2836, "step": 7668 }, { "epoch": 1.8253703813887072, "grad_norm": 0.37653545873556477, "learning_rate": 1.9884898694619127e-07, "loss": 0.3173, "step": 7669 }, { "epoch": 1.825608377461772, "grad_norm": 0.41304477293098335, "learning_rate": 1.9831114115041017e-07, "loss": 0.3672, "step": 7670 }, { "epoch": 1.8258463735348367, "grad_norm": 0.33869784993849245, "learning_rate": 1.9777400900438283e-07, "loss": 0.2859, "step": 7671 }, { "epoch": 1.8260843696079014, "grad_norm": 0.3714603948136559, "learning_rate": 1.9723759058793868e-07, "loss": 0.2998, "step": 7672 }, { "epoch": 1.8263223656809662, "grad_norm": 0.40178937677479326, "learning_rate": 1.9670188598080342e-07, "loss": 0.3435, "step": 7673 }, { "epoch": 1.826560361754031, "grad_norm": 0.3755897088267994, "learning_rate": 1.9616689526259557e-07, "loss": 0.3184, "step": 7674 }, { "epoch": 1.8267983578270959, "grad_norm": 0.3412859979077663, "learning_rate": 1.9563261851282822e-07, "loss": 0.2572, "step": 7675 }, { "epoch": 1.8270363539001606, "grad_norm": 0.369227042248507, "learning_rate": 1.9509905581090837e-07, "loss": 0.3358, "step": 7676 }, { "epoch": 1.8272743499732256, "grad_norm": 0.37802690397166483, "learning_rate": 1.9456620723613596e-07, "loss": 0.4107, "step": 7677 }, { "epoch": 1.8275123460462903, "grad_norm": 1.2815424892667338, "learning_rate": 1.9403407286770592e-07, "loss": 0.2987, "step": 7678 }, { "epoch": 1.827750342119355, "grad_norm": 0.37575903236434194, "learning_rate": 1.935026527847067e-07, "loss": 0.2836, "step": 7679 }, { "epoch": 1.8279883381924198, "grad_norm": 0.37225786292995694, "learning_rate": 1.9297194706612012e-07, "loss": 0.3522, "step": 7680 }, { "epoch": 1.8282263342654845, "grad_norm": 0.4069520217778423, "learning_rate": 1.9244195579082193e-07, "loss": 0.3215, "step": 7681 }, { "epoch": 1.8284643303385493, "grad_norm": 0.3777178114060692, "learning_rate": 1.9191267903758304e-07, "loss": 0.2767, "step": 7682 }, { "epoch": 1.8287023264116142, "grad_norm": 0.3802098683973281, "learning_rate": 1.913841168850661e-07, "loss": 0.308, "step": 7683 }, { "epoch": 1.828940322484679, "grad_norm": 0.49271704172627867, "learning_rate": 1.9085626941182932e-07, "loss": 0.3787, "step": 7684 }, { "epoch": 1.829178318557744, "grad_norm": 0.3772677332419929, "learning_rate": 1.903291366963228e-07, "loss": 0.2788, "step": 7685 }, { "epoch": 1.8294163146308087, "grad_norm": 0.36065380388280177, "learning_rate": 1.8980271881689216e-07, "loss": 0.2706, "step": 7686 }, { "epoch": 1.8296543107038734, "grad_norm": 0.377496168662484, "learning_rate": 1.892770158517765e-07, "loss": 0.3517, "step": 7687 }, { "epoch": 1.8298923067769381, "grad_norm": 0.35698841434630135, "learning_rate": 1.8875202787910774e-07, "loss": 0.3576, "step": 7688 }, { "epoch": 1.8301303028500029, "grad_norm": 0.366279133501771, "learning_rate": 1.882277549769118e-07, "loss": 0.2936, "step": 7689 }, { "epoch": 1.8303682989230676, "grad_norm": 0.3754152778664585, "learning_rate": 1.8770419722310916e-07, "loss": 0.2969, "step": 7690 }, { "epoch": 1.8306062949961326, "grad_norm": 0.3887220576270878, "learning_rate": 1.8718135469551313e-07, "loss": 0.3747, "step": 7691 }, { "epoch": 1.8308442910691973, "grad_norm": 0.38990633714055073, "learning_rate": 1.866592274718315e-07, "loss": 0.3552, "step": 7692 }, { "epoch": 1.8310822871422623, "grad_norm": 0.37677144527387463, "learning_rate": 1.8613781562966392e-07, "loss": 0.2833, "step": 7693 }, { "epoch": 1.831320283215327, "grad_norm": 0.425287347061427, "learning_rate": 1.8561711924650728e-07, "loss": 0.3178, "step": 7694 }, { "epoch": 1.8315582792883918, "grad_norm": 0.38942860989951505, "learning_rate": 1.8509713839974852e-07, "loss": 0.3685, "step": 7695 }, { "epoch": 1.8317962753614565, "grad_norm": 0.3639559057337997, "learning_rate": 1.8457787316667032e-07, "loss": 0.2576, "step": 7696 }, { "epoch": 1.8320342714345212, "grad_norm": 0.3541221788742815, "learning_rate": 1.84059323624447e-07, "loss": 0.2868, "step": 7697 }, { "epoch": 1.832272267507586, "grad_norm": 0.3872637366190305, "learning_rate": 1.835414898501492e-07, "loss": 0.3573, "step": 7698 }, { "epoch": 1.832510263580651, "grad_norm": 0.3678827746800215, "learning_rate": 1.8302437192073975e-07, "loss": 0.3303, "step": 7699 }, { "epoch": 1.8327482596537157, "grad_norm": 0.36454295345870347, "learning_rate": 1.8250796991307494e-07, "loss": 0.296, "step": 7700 }, { "epoch": 1.8329862557267806, "grad_norm": 0.37266982455556436, "learning_rate": 1.8199228390390457e-07, "loss": 0.3295, "step": 7701 }, { "epoch": 1.8332242517998454, "grad_norm": 0.4376692678565518, "learning_rate": 1.814773139698728e-07, "loss": 0.3667, "step": 7702 }, { "epoch": 1.8334622478729101, "grad_norm": 0.3683739228990514, "learning_rate": 1.8096306018751675e-07, "loss": 0.2748, "step": 7703 }, { "epoch": 1.8337002439459749, "grad_norm": 0.4073240247250818, "learning_rate": 1.80449522633267e-07, "loss": 0.2995, "step": 7704 }, { "epoch": 1.8339382400190396, "grad_norm": 0.39393736737366614, "learning_rate": 1.7993670138344798e-07, "loss": 0.3601, "step": 7705 }, { "epoch": 1.8341762360921043, "grad_norm": 0.3767167773666181, "learning_rate": 1.7942459651427825e-07, "loss": 0.3498, "step": 7706 }, { "epoch": 1.8344142321651693, "grad_norm": 0.4089511356922392, "learning_rate": 1.789132081018674e-07, "loss": 0.3199, "step": 7707 }, { "epoch": 1.834652228238234, "grad_norm": 0.3812109106624504, "learning_rate": 1.7840253622222303e-07, "loss": 0.3192, "step": 7708 }, { "epoch": 1.834890224311299, "grad_norm": 0.36263457008589506, "learning_rate": 1.7789258095124217e-07, "loss": 0.3782, "step": 7709 }, { "epoch": 1.8351282203843637, "grad_norm": 0.3724803988209711, "learning_rate": 1.77383342364717e-07, "loss": 0.2963, "step": 7710 }, { "epoch": 1.8353662164574285, "grad_norm": 0.3765829889635161, "learning_rate": 1.7687482053833304e-07, "loss": 0.2866, "step": 7711 }, { "epoch": 1.8356042125304932, "grad_norm": 0.3644840472067164, "learning_rate": 1.7636701554766877e-07, "loss": 0.3348, "step": 7712 }, { "epoch": 1.835842208603558, "grad_norm": 0.4078076416765129, "learning_rate": 1.7585992746819713e-07, "loss": 0.3769, "step": 7713 }, { "epoch": 1.8360802046766227, "grad_norm": 0.37270746494698037, "learning_rate": 1.753535563752834e-07, "loss": 0.2779, "step": 7714 }, { "epoch": 1.8363182007496877, "grad_norm": 0.38980061433134444, "learning_rate": 1.7484790234418791e-07, "loss": 0.3094, "step": 7715 }, { "epoch": 1.8365561968227524, "grad_norm": 0.3952094967807275, "learning_rate": 1.7434296545006224e-07, "loss": 0.3964, "step": 7716 }, { "epoch": 1.8367941928958174, "grad_norm": 0.3729540471305891, "learning_rate": 1.73838745767953e-07, "loss": 0.3146, "step": 7717 }, { "epoch": 1.837032188968882, "grad_norm": 0.40875312184567286, "learning_rate": 1.7333524337279918e-07, "loss": 0.3322, "step": 7718 }, { "epoch": 1.8372701850419468, "grad_norm": 0.40437790963447295, "learning_rate": 1.7283245833943473e-07, "loss": 0.327, "step": 7719 }, { "epoch": 1.8375081811150116, "grad_norm": 0.37395892343895687, "learning_rate": 1.723303907425844e-07, "loss": 0.3752, "step": 7720 }, { "epoch": 1.8377461771880763, "grad_norm": 0.35037991077375436, "learning_rate": 1.7182904065686956e-07, "loss": 0.289, "step": 7721 }, { "epoch": 1.837984173261141, "grad_norm": 0.39297451358371965, "learning_rate": 1.713284081568023e-07, "loss": 0.3139, "step": 7722 }, { "epoch": 1.838222169334206, "grad_norm": 0.4465414484001565, "learning_rate": 1.708284933167892e-07, "loss": 0.3373, "step": 7723 }, { "epoch": 1.8384601654072708, "grad_norm": 0.3641160770057071, "learning_rate": 1.703292962111297e-07, "loss": 0.3339, "step": 7724 }, { "epoch": 1.8386981614803357, "grad_norm": 0.37173673768425497, "learning_rate": 1.6983081691401727e-07, "loss": 0.2737, "step": 7725 }, { "epoch": 1.8389361575534005, "grad_norm": 0.37865859937279667, "learning_rate": 1.6933305549953817e-07, "loss": 0.3234, "step": 7726 }, { "epoch": 1.8391741536264652, "grad_norm": 0.3858905080470261, "learning_rate": 1.688360120416721e-07, "loss": 0.3567, "step": 7727 }, { "epoch": 1.83941214969953, "grad_norm": 0.36393548918786084, "learning_rate": 1.6833968661429168e-07, "loss": 0.2794, "step": 7728 }, { "epoch": 1.8396501457725947, "grad_norm": 0.38271700713219603, "learning_rate": 1.6784407929116342e-07, "loss": 0.2761, "step": 7729 }, { "epoch": 1.8398881418456594, "grad_norm": 0.4049677071792201, "learning_rate": 1.6734919014594674e-07, "loss": 0.3441, "step": 7730 }, { "epoch": 1.8401261379187244, "grad_norm": 0.36928102997215106, "learning_rate": 1.668550192521945e-07, "loss": 0.3377, "step": 7731 }, { "epoch": 1.840364133991789, "grad_norm": 0.37686053081429005, "learning_rate": 1.6636156668335236e-07, "loss": 0.2798, "step": 7732 }, { "epoch": 1.840602130064854, "grad_norm": 0.37607937056638047, "learning_rate": 1.6586883251275998e-07, "loss": 0.3279, "step": 7733 }, { "epoch": 1.8408401261379188, "grad_norm": 0.4077890202353418, "learning_rate": 1.6537681681364993e-07, "loss": 0.3957, "step": 7734 }, { "epoch": 1.8410781222109835, "grad_norm": 0.3665125487760819, "learning_rate": 1.64885519659147e-07, "loss": 0.2984, "step": 7735 }, { "epoch": 1.8413161182840483, "grad_norm": 0.3856906973870745, "learning_rate": 1.6439494112227173e-07, "loss": 0.2753, "step": 7736 }, { "epoch": 1.841554114357113, "grad_norm": 0.370985815282774, "learning_rate": 1.6390508127593463e-07, "loss": 0.3377, "step": 7737 }, { "epoch": 1.8417921104301778, "grad_norm": 0.3622514504955658, "learning_rate": 1.634159401929425e-07, "loss": 0.3836, "step": 7738 }, { "epoch": 1.8420301065032427, "grad_norm": 0.3860158792125223, "learning_rate": 1.6292751794599216e-07, "loss": 0.2713, "step": 7739 }, { "epoch": 1.8422681025763075, "grad_norm": 0.3973498950561496, "learning_rate": 1.6243981460767666e-07, "loss": 0.3111, "step": 7740 }, { "epoch": 1.8425060986493724, "grad_norm": 0.3638871826673044, "learning_rate": 1.619528302504797e-07, "loss": 0.3788, "step": 7741 }, { "epoch": 1.8427440947224372, "grad_norm": 0.3705555624225468, "learning_rate": 1.6146656494678003e-07, "loss": 0.3012, "step": 7742 }, { "epoch": 1.842982090795502, "grad_norm": 0.3584047014227033, "learning_rate": 1.609810187688482e-07, "loss": 0.2983, "step": 7743 }, { "epoch": 1.8432200868685666, "grad_norm": 0.3877701919251873, "learning_rate": 1.6049619178884868e-07, "loss": 0.3207, "step": 7744 }, { "epoch": 1.8434580829416314, "grad_norm": 0.3813895710119214, "learning_rate": 1.6001208407883884e-07, "loss": 0.3819, "step": 7745 }, { "epoch": 1.8436960790146961, "grad_norm": 0.376270806834782, "learning_rate": 1.5952869571076835e-07, "loss": 0.2711, "step": 7746 }, { "epoch": 1.843934075087761, "grad_norm": 0.3843012907616639, "learning_rate": 1.5904602675648083e-07, "loss": 0.2944, "step": 7747 }, { "epoch": 1.8441720711608258, "grad_norm": 0.3918616144624181, "learning_rate": 1.5856407728771394e-07, "loss": 0.3551, "step": 7748 }, { "epoch": 1.8444100672338908, "grad_norm": 0.3541860195426503, "learning_rate": 1.5808284737609592e-07, "loss": 0.3476, "step": 7749 }, { "epoch": 1.8446480633069555, "grad_norm": 0.4021959688745201, "learning_rate": 1.5760233709315064e-07, "loss": 0.2617, "step": 7750 }, { "epoch": 1.8448860593800203, "grad_norm": 0.4117631142432679, "learning_rate": 1.571225465102927e-07, "loss": 0.3208, "step": 7751 }, { "epoch": 1.845124055453085, "grad_norm": 0.41626948663412294, "learning_rate": 1.566434756988311e-07, "loss": 0.3668, "step": 7752 }, { "epoch": 1.8453620515261497, "grad_norm": 0.3503906065492793, "learning_rate": 1.561651247299689e-07, "loss": 0.2722, "step": 7753 }, { "epoch": 1.8456000475992145, "grad_norm": 0.3878284628086832, "learning_rate": 1.556874936747993e-07, "loss": 0.3053, "step": 7754 }, { "epoch": 1.8458380436722794, "grad_norm": 0.3664751216986663, "learning_rate": 1.5521058260431043e-07, "loss": 0.3567, "step": 7755 }, { "epoch": 1.8460760397453442, "grad_norm": 0.38503978655873494, "learning_rate": 1.5473439158938398e-07, "loss": 0.3633, "step": 7756 }, { "epoch": 1.8463140358184091, "grad_norm": 0.37539732052773167, "learning_rate": 1.5425892070079274e-07, "loss": 0.2755, "step": 7757 }, { "epoch": 1.8465520318914739, "grad_norm": 0.3905485468486571, "learning_rate": 1.5378417000920355e-07, "loss": 0.3062, "step": 7758 }, { "epoch": 1.8467900279645386, "grad_norm": 0.3717300860222802, "learning_rate": 1.5331013958517604e-07, "loss": 0.3826, "step": 7759 }, { "epoch": 1.8470280240376034, "grad_norm": 0.3825543677637209, "learning_rate": 1.528368294991639e-07, "loss": 0.2877, "step": 7760 }, { "epoch": 1.847266020110668, "grad_norm": 0.38491103167235174, "learning_rate": 1.523642398215114e-07, "loss": 0.2753, "step": 7761 }, { "epoch": 1.8475040161837328, "grad_norm": 0.385893795694505, "learning_rate": 1.5189237062245732e-07, "loss": 0.3507, "step": 7762 }, { "epoch": 1.8477420122567978, "grad_norm": 0.36863061120979945, "learning_rate": 1.5142122197213338e-07, "loss": 0.3815, "step": 7763 }, { "epoch": 1.8479800083298625, "grad_norm": 0.371943151815035, "learning_rate": 1.5095079394056466e-07, "loss": 0.2779, "step": 7764 }, { "epoch": 1.8482180044029275, "grad_norm": 0.41031142975223894, "learning_rate": 1.5048108659766693e-07, "loss": 0.2951, "step": 7765 }, { "epoch": 1.8484560004759922, "grad_norm": 0.36678805969636596, "learning_rate": 1.500121000132515e-07, "loss": 0.3523, "step": 7766 }, { "epoch": 1.848693996549057, "grad_norm": 0.3656310119733287, "learning_rate": 1.4954383425702102e-07, "loss": 0.3059, "step": 7767 }, { "epoch": 1.8489319926221217, "grad_norm": 0.4742087761434599, "learning_rate": 1.4907628939857087e-07, "loss": 0.3005, "step": 7768 }, { "epoch": 1.8491699886951865, "grad_norm": 0.3832956933946555, "learning_rate": 1.4860946550739052e-07, "loss": 0.3152, "step": 7769 }, { "epoch": 1.8494079847682512, "grad_norm": 0.3707364198788508, "learning_rate": 1.4814336265286112e-07, "loss": 0.3801, "step": 7770 }, { "epoch": 1.8496459808413162, "grad_norm": 0.3794533583364384, "learning_rate": 1.476779809042572e-07, "loss": 0.2977, "step": 7771 }, { "epoch": 1.849883976914381, "grad_norm": 0.4142293643990621, "learning_rate": 1.4721332033074575e-07, "loss": 0.31, "step": 7772 }, { "epoch": 1.8501219729874459, "grad_norm": 0.364668075185516, "learning_rate": 1.467493810013876e-07, "loss": 0.3325, "step": 7773 }, { "epoch": 1.8503599690605106, "grad_norm": 0.36286815622733415, "learning_rate": 1.462861629851342e-07, "loss": 0.3554, "step": 7774 }, { "epoch": 1.8505979651335753, "grad_norm": 0.3942139877708106, "learning_rate": 1.4582366635083223e-07, "loss": 0.2767, "step": 7775 }, { "epoch": 1.85083596120664, "grad_norm": 0.37049438808023333, "learning_rate": 1.4536189116722056e-07, "loss": 0.2968, "step": 7776 }, { "epoch": 1.8510739572797048, "grad_norm": 0.42757648044300495, "learning_rate": 1.4490083750292984e-07, "loss": 0.3868, "step": 7777 }, { "epoch": 1.8513119533527695, "grad_norm": 0.3666344543082101, "learning_rate": 1.4444050542648302e-07, "loss": 0.289, "step": 7778 }, { "epoch": 1.8515499494258345, "grad_norm": 0.4136576161910318, "learning_rate": 1.4398089500629874e-07, "loss": 0.2883, "step": 7779 }, { "epoch": 1.8517879454988992, "grad_norm": 0.38875371128162894, "learning_rate": 1.4352200631068515e-07, "loss": 0.3883, "step": 7780 }, { "epoch": 1.8520259415719642, "grad_norm": 0.37524678870829536, "learning_rate": 1.430638394078454e-07, "loss": 0.3115, "step": 7781 }, { "epoch": 1.852263937645029, "grad_norm": 0.3915337777058191, "learning_rate": 1.4260639436587398e-07, "loss": 0.2911, "step": 7782 }, { "epoch": 1.8525019337180937, "grad_norm": 0.4065274951070914, "learning_rate": 1.4214967125275814e-07, "loss": 0.3267, "step": 7783 }, { "epoch": 1.8527399297911584, "grad_norm": 0.3753625153872455, "learning_rate": 1.4169367013637857e-07, "loss": 0.374, "step": 7784 }, { "epoch": 1.8529779258642232, "grad_norm": 0.39450510369704206, "learning_rate": 1.4123839108450832e-07, "loss": 0.3065, "step": 7785 }, { "epoch": 1.853215921937288, "grad_norm": 0.38706116936482216, "learning_rate": 1.4078383416481321e-07, "loss": 0.273, "step": 7786 }, { "epoch": 1.8534539180103529, "grad_norm": 0.3993104090511267, "learning_rate": 1.4032999944485203e-07, "loss": 0.3124, "step": 7787 }, { "epoch": 1.8536919140834176, "grad_norm": 0.389522809964699, "learning_rate": 1.398768869920747e-07, "loss": 0.3647, "step": 7788 }, { "epoch": 1.8539299101564826, "grad_norm": 0.4039304741367637, "learning_rate": 1.3942449687382565e-07, "loss": 0.2761, "step": 7789 }, { "epoch": 1.8541679062295473, "grad_norm": 0.42136351566968644, "learning_rate": 1.389728291573411e-07, "loss": 0.328, "step": 7790 }, { "epoch": 1.854405902302612, "grad_norm": 0.4212557672996947, "learning_rate": 1.3852188390975073e-07, "loss": 0.3918, "step": 7791 }, { "epoch": 1.8546438983756768, "grad_norm": 0.3603764523291382, "learning_rate": 1.380716611980748e-07, "loss": 0.3351, "step": 7792 }, { "epoch": 1.8548818944487415, "grad_norm": 0.35966797992735267, "learning_rate": 1.3762216108922922e-07, "loss": 0.2732, "step": 7793 }, { "epoch": 1.8551198905218063, "grad_norm": 0.36333670692392717, "learning_rate": 1.3717338365001943e-07, "loss": 0.329, "step": 7794 }, { "epoch": 1.8553578865948712, "grad_norm": 0.38329227054003784, "learning_rate": 1.367253289471454e-07, "loss": 0.3802, "step": 7795 }, { "epoch": 1.855595882667936, "grad_norm": 0.3514869841098985, "learning_rate": 1.3627799704719947e-07, "loss": 0.2712, "step": 7796 }, { "epoch": 1.855833878741001, "grad_norm": 0.381197916996613, "learning_rate": 1.358313880166656e-07, "loss": 0.302, "step": 7797 }, { "epoch": 1.8560718748140657, "grad_norm": 0.402462868821984, "learning_rate": 1.3538550192192078e-07, "loss": 0.3535, "step": 7798 }, { "epoch": 1.8563098708871304, "grad_norm": 0.3514030830100683, "learning_rate": 1.3494033882923586e-07, "loss": 0.3455, "step": 7799 }, { "epoch": 1.8565478669601951, "grad_norm": 0.3781598067445243, "learning_rate": 1.3449589880477176e-07, "loss": 0.2882, "step": 7800 }, { "epoch": 1.8567858630332599, "grad_norm": 0.38189268313397884, "learning_rate": 1.3405218191458402e-07, "loss": 0.3177, "step": 7801 }, { "epoch": 1.8570238591063246, "grad_norm": 0.3890034707887386, "learning_rate": 1.3360918822461989e-07, "loss": 0.3846, "step": 7802 }, { "epoch": 1.8572618551793896, "grad_norm": 0.354555606174368, "learning_rate": 1.331669178007189e-07, "loss": 0.2828, "step": 7803 }, { "epoch": 1.8574998512524543, "grad_norm": 0.39468523966196106, "learning_rate": 1.32725370708614e-07, "loss": 0.2641, "step": 7804 }, { "epoch": 1.8577378473255193, "grad_norm": 0.4158745671593587, "learning_rate": 1.322845470139289e-07, "loss": 0.3666, "step": 7805 }, { "epoch": 1.857975843398584, "grad_norm": 0.38138886762707447, "learning_rate": 1.3184444678218223e-07, "loss": 0.3276, "step": 7806 }, { "epoch": 1.8582138394716488, "grad_norm": 0.35114076586398596, "learning_rate": 1.314050700787828e-07, "loss": 0.2949, "step": 7807 }, { "epoch": 1.8584518355447135, "grad_norm": 0.36724157814202074, "learning_rate": 1.3096641696903334e-07, "loss": 0.3347, "step": 7808 }, { "epoch": 1.8586898316177782, "grad_norm": 0.38853816925181384, "learning_rate": 1.3052848751812842e-07, "loss": 0.3861, "step": 7809 }, { "epoch": 1.858927827690843, "grad_norm": 0.3903142761173342, "learning_rate": 1.3009128179115539e-07, "loss": 0.3111, "step": 7810 }, { "epoch": 1.859165823763908, "grad_norm": 0.38876795886287885, "learning_rate": 1.2965479985309338e-07, "loss": 0.2738, "step": 7811 }, { "epoch": 1.8594038198369727, "grad_norm": 0.4101960252962491, "learning_rate": 1.2921904176881494e-07, "loss": 0.3681, "step": 7812 }, { "epoch": 1.8596418159100376, "grad_norm": 0.36520915728705955, "learning_rate": 1.2878400760308385e-07, "loss": 0.3571, "step": 7813 }, { "epoch": 1.8598798119831024, "grad_norm": 0.40612725661487625, "learning_rate": 1.2834969742055725e-07, "loss": 0.2713, "step": 7814 }, { "epoch": 1.860117808056167, "grad_norm": 0.6769806659333333, "learning_rate": 1.2791611128578463e-07, "loss": 0.3343, "step": 7815 }, { "epoch": 1.8603558041292318, "grad_norm": 0.40138777376249124, "learning_rate": 1.2748324926320777e-07, "loss": 0.3592, "step": 7816 }, { "epoch": 1.8605938002022966, "grad_norm": 0.37763740906507537, "learning_rate": 1.270511114171591e-07, "loss": 0.3313, "step": 7817 }, { "epoch": 1.8608317962753613, "grad_norm": 0.4238056700691326, "learning_rate": 1.2661969781186723e-07, "loss": 0.2498, "step": 7818 }, { "epoch": 1.8610697923484263, "grad_norm": 0.37963559080128195, "learning_rate": 1.2618900851144976e-07, "loss": 0.3321, "step": 7819 }, { "epoch": 1.861307788421491, "grad_norm": 0.3885643564174667, "learning_rate": 1.2575904357991775e-07, "loss": 0.3975, "step": 7820 }, { "epoch": 1.861545784494556, "grad_norm": 0.34011787013626105, "learning_rate": 1.2532980308117503e-07, "loss": 0.272, "step": 7821 }, { "epoch": 1.8617837805676207, "grad_norm": 0.3891036676630237, "learning_rate": 1.2490128707901727e-07, "loss": 0.293, "step": 7822 }, { "epoch": 1.8620217766406855, "grad_norm": 0.38193328581420083, "learning_rate": 1.2447349563713186e-07, "loss": 0.3707, "step": 7823 }, { "epoch": 1.8622597727137502, "grad_norm": 0.3658925256499234, "learning_rate": 1.2404642881910012e-07, "loss": 0.3051, "step": 7824 }, { "epoch": 1.862497768786815, "grad_norm": 0.3801598075071461, "learning_rate": 1.236200866883941e-07, "loss": 0.2614, "step": 7825 }, { "epoch": 1.8627357648598797, "grad_norm": 0.36720137768282524, "learning_rate": 1.231944693083792e-07, "loss": 0.3171, "step": 7826 }, { "epoch": 1.8629737609329446, "grad_norm": 0.4198297803663176, "learning_rate": 1.2276957674231204e-07, "loss": 0.3851, "step": 7827 }, { "epoch": 1.8632117570060094, "grad_norm": 0.35023106406242704, "learning_rate": 1.223454090533427e-07, "loss": 0.2795, "step": 7828 }, { "epoch": 1.8634497530790743, "grad_norm": 0.34830108173573165, "learning_rate": 1.219219663045129e-07, "loss": 0.2655, "step": 7829 }, { "epoch": 1.863687749152139, "grad_norm": 0.40557375679941676, "learning_rate": 1.2149924855875737e-07, "loss": 0.386, "step": 7830 }, { "epoch": 1.8639257452252038, "grad_norm": 0.38135226051859966, "learning_rate": 1.210772558789014e-07, "loss": 0.3438, "step": 7831 }, { "epoch": 1.8641637412982686, "grad_norm": 0.3573950365210543, "learning_rate": 1.2065598832766369e-07, "loss": 0.2979, "step": 7832 }, { "epoch": 1.8644017373713333, "grad_norm": 0.35921086327394625, "learning_rate": 1.202354459676558e-07, "loss": 0.2962, "step": 7833 }, { "epoch": 1.864639733444398, "grad_norm": 0.3826967666594862, "learning_rate": 1.1981562886137998e-07, "loss": 0.375, "step": 7834 }, { "epoch": 1.864877729517463, "grad_norm": 0.3639494907850754, "learning_rate": 1.1939653707123132e-07, "loss": 0.281, "step": 7835 }, { "epoch": 1.8651157255905277, "grad_norm": 0.37370185714825943, "learning_rate": 1.1897817065949835e-07, "loss": 0.2973, "step": 7836 }, { "epoch": 1.8653537216635927, "grad_norm": 0.5048736432794805, "learning_rate": 1.1856052968835907e-07, "loss": 0.3218, "step": 7837 }, { "epoch": 1.8655917177366574, "grad_norm": 0.36966116531403587, "learning_rate": 1.1814361421988662e-07, "loss": 0.3956, "step": 7838 }, { "epoch": 1.8658297138097222, "grad_norm": 0.3560493828010198, "learning_rate": 1.1772742431604423e-07, "loss": 0.2868, "step": 7839 }, { "epoch": 1.866067709882787, "grad_norm": 0.5370977594652094, "learning_rate": 1.1731196003868794e-07, "loss": 0.3322, "step": 7840 }, { "epoch": 1.8663057059558517, "grad_norm": 0.3922980128293181, "learning_rate": 1.1689722144956672e-07, "loss": 0.3587, "step": 7841 }, { "epoch": 1.8665437020289164, "grad_norm": 0.32486417759838265, "learning_rate": 1.164832086103207e-07, "loss": 0.3013, "step": 7842 }, { "epoch": 1.8667816981019814, "grad_norm": 0.38729519267969026, "learning_rate": 1.1606992158248177e-07, "loss": 0.2793, "step": 7843 }, { "epoch": 1.867019694175046, "grad_norm": 0.37956208767813593, "learning_rate": 1.1565736042747522e-07, "loss": 0.3162, "step": 7844 }, { "epoch": 1.867257690248111, "grad_norm": 0.4116795086325041, "learning_rate": 1.1524552520661702e-07, "loss": 0.3733, "step": 7845 }, { "epoch": 1.8674956863211758, "grad_norm": 0.39437952445062263, "learning_rate": 1.1483441598111766e-07, "loss": 0.3036, "step": 7846 }, { "epoch": 1.8677336823942405, "grad_norm": 0.39085643388306573, "learning_rate": 1.1442403281207714e-07, "loss": 0.3244, "step": 7847 }, { "epoch": 1.8679716784673053, "grad_norm": 0.4014816625744995, "learning_rate": 1.140143757604889e-07, "loss": 0.3738, "step": 7848 }, { "epoch": 1.86820967454037, "grad_norm": 0.3769060375261056, "learning_rate": 1.1360544488723756e-07, "loss": 0.3044, "step": 7849 }, { "epoch": 1.8684476706134348, "grad_norm": 0.3904744507763302, "learning_rate": 1.1319724025310063e-07, "loss": 0.2969, "step": 7850 }, { "epoch": 1.8686856666864997, "grad_norm": 0.3773232634572751, "learning_rate": 1.1278976191874735e-07, "loss": 0.3294, "step": 7851 }, { "epoch": 1.8689236627595645, "grad_norm": 0.3775912202758717, "learning_rate": 1.1238300994473983e-07, "loss": 0.3817, "step": 7852 }, { "epoch": 1.8691616588326294, "grad_norm": 0.36177147793921166, "learning_rate": 1.1197698439153027e-07, "loss": 0.2878, "step": 7853 }, { "epoch": 1.8693996549056942, "grad_norm": 0.3942909323087775, "learning_rate": 1.1157168531946483e-07, "loss": 0.2825, "step": 7854 }, { "epoch": 1.869637650978759, "grad_norm": 0.38479407162289014, "learning_rate": 1.1116711278878034e-07, "loss": 0.3565, "step": 7855 }, { "epoch": 1.8698756470518236, "grad_norm": 0.37536080195437777, "learning_rate": 1.1076326685960758e-07, "loss": 0.3585, "step": 7856 }, { "epoch": 1.8701136431248884, "grad_norm": 0.4072733768621761, "learning_rate": 1.1036014759196689e-07, "loss": 0.2605, "step": 7857 }, { "epoch": 1.870351639197953, "grad_norm": 0.3794776719624951, "learning_rate": 1.09957755045772e-07, "loss": 0.3195, "step": 7858 }, { "epoch": 1.870589635271018, "grad_norm": 0.3781656516512505, "learning_rate": 1.095560892808284e-07, "loss": 0.3695, "step": 7859 }, { "epoch": 1.8708276313440828, "grad_norm": 0.3477470340626564, "learning_rate": 1.0915515035683444e-07, "loss": 0.2976, "step": 7860 }, { "epoch": 1.8710656274171478, "grad_norm": 0.39868562095433935, "learning_rate": 1.0875493833337803e-07, "loss": 0.2701, "step": 7861 }, { "epoch": 1.8713036234902125, "grad_norm": 0.4054869894626742, "learning_rate": 1.0835545326994213e-07, "loss": 0.3525, "step": 7862 }, { "epoch": 1.8715416195632772, "grad_norm": 0.36521275528859204, "learning_rate": 1.079566952258987e-07, "loss": 0.3625, "step": 7863 }, { "epoch": 1.871779615636342, "grad_norm": 0.3635342656993625, "learning_rate": 1.075586642605142e-07, "loss": 0.268, "step": 7864 }, { "epoch": 1.8720176117094067, "grad_norm": 0.37356788241263744, "learning_rate": 1.0716136043294468e-07, "loss": 0.2874, "step": 7865 }, { "epoch": 1.8722556077824715, "grad_norm": 0.3961313481479438, "learning_rate": 1.0676478380224065e-07, "loss": 0.3772, "step": 7866 }, { "epoch": 1.8724936038555364, "grad_norm": 0.37248128983442935, "learning_rate": 1.063689344273422e-07, "loss": 0.291, "step": 7867 }, { "epoch": 1.8727315999286012, "grad_norm": 0.42564199958608884, "learning_rate": 1.0597381236708282e-07, "loss": 0.2925, "step": 7868 }, { "epoch": 1.8729695960016661, "grad_norm": 0.3945675089261431, "learning_rate": 1.055794176801872e-07, "loss": 0.3273, "step": 7869 }, { "epoch": 1.8732075920747309, "grad_norm": 0.36858121380582176, "learning_rate": 1.0518575042527235e-07, "loss": 0.425, "step": 7870 }, { "epoch": 1.8734455881477956, "grad_norm": 0.4008228433529883, "learning_rate": 1.0479281066084701e-07, "loss": 0.2753, "step": 7871 }, { "epoch": 1.8736835842208603, "grad_norm": 0.3970329160462698, "learning_rate": 1.044005984453117e-07, "loss": 0.3065, "step": 7872 }, { "epoch": 1.873921580293925, "grad_norm": 0.3752973477487869, "learning_rate": 1.0400911383695756e-07, "loss": 0.3393, "step": 7873 }, { "epoch": 1.8741595763669898, "grad_norm": 0.34507839687636127, "learning_rate": 1.0361835689397137e-07, "loss": 0.3171, "step": 7874 }, { "epoch": 1.8743975724400548, "grad_norm": 0.3956732220335401, "learning_rate": 1.0322832767442726e-07, "loss": 0.2712, "step": 7875 }, { "epoch": 1.8746355685131195, "grad_norm": 0.3478429638128932, "learning_rate": 1.0283902623629439e-07, "loss": 0.3163, "step": 7876 }, { "epoch": 1.8748735645861845, "grad_norm": 0.43937488881046505, "learning_rate": 1.0245045263743203e-07, "loss": 0.3866, "step": 7877 }, { "epoch": 1.8751115606592492, "grad_norm": 0.3712312999960514, "learning_rate": 1.0206260693559234e-07, "loss": 0.2936, "step": 7878 }, { "epoch": 1.875349556732314, "grad_norm": 0.3667219720522898, "learning_rate": 1.0167548918841752e-07, "loss": 0.2841, "step": 7879 }, { "epoch": 1.8755875528053787, "grad_norm": 0.3758275879637338, "learning_rate": 1.0128909945344433e-07, "loss": 0.3117, "step": 7880 }, { "epoch": 1.8758255488784434, "grad_norm": 0.3693311113464016, "learning_rate": 1.0090343778809908e-07, "loss": 0.3607, "step": 7881 }, { "epoch": 1.8760635449515082, "grad_norm": 0.3689248776098616, "learning_rate": 1.0051850424970034e-07, "loss": 0.287, "step": 7882 }, { "epoch": 1.8763015410245731, "grad_norm": 0.3877904699869412, "learning_rate": 1.0013429889546011e-07, "loss": 0.3327, "step": 7883 }, { "epoch": 1.8765395370976379, "grad_norm": 0.3813592994481828, "learning_rate": 9.975082178247942e-08, "loss": 0.3817, "step": 7884 }, { "epoch": 1.8767775331707028, "grad_norm": 0.39447485339729055, "learning_rate": 9.936807296775264e-08, "loss": 0.3179, "step": 7885 }, { "epoch": 1.8770155292437676, "grad_norm": 0.36838965887067854, "learning_rate": 9.898605250816596e-08, "loss": 0.2767, "step": 7886 }, { "epoch": 1.8772535253168323, "grad_norm": 0.40695179381212354, "learning_rate": 9.860476046049783e-08, "loss": 0.3116, "step": 7887 }, { "epoch": 1.877491521389897, "grad_norm": 0.37922684546428304, "learning_rate": 9.822419688141627e-08, "loss": 0.371, "step": 7888 }, { "epoch": 1.8777295174629618, "grad_norm": 0.3605426474272245, "learning_rate": 9.784436182748381e-08, "loss": 0.3157, "step": 7889 }, { "epoch": 1.8779675135360265, "grad_norm": 0.3855508493344283, "learning_rate": 9.74652553551525e-08, "loss": 0.3126, "step": 7890 }, { "epoch": 1.8782055096090915, "grad_norm": 0.36654027571515835, "learning_rate": 9.708687752076673e-08, "loss": 0.3796, "step": 7891 }, { "epoch": 1.8784435056821562, "grad_norm": 0.37798898644072126, "learning_rate": 9.670922838056374e-08, "loss": 0.3268, "step": 7892 }, { "epoch": 1.8786815017552212, "grad_norm": 0.37273754822520255, "learning_rate": 9.633230799067084e-08, "loss": 0.2805, "step": 7893 }, { "epoch": 1.878919497828286, "grad_norm": 0.38458823706391126, "learning_rate": 9.595611640710767e-08, "loss": 0.3494, "step": 7894 }, { "epoch": 1.8791574939013507, "grad_norm": 0.39167494539281444, "learning_rate": 9.558065368578561e-08, "loss": 0.3506, "step": 7895 }, { "epoch": 1.8793954899744154, "grad_norm": 0.3574825980828279, "learning_rate": 9.520591988250838e-08, "loss": 0.2689, "step": 7896 }, { "epoch": 1.8796334860474802, "grad_norm": 0.37303769358161776, "learning_rate": 9.483191505296974e-08, "loss": 0.3235, "step": 7897 }, { "epoch": 1.879871482120545, "grad_norm": 0.3651354155087522, "learning_rate": 9.44586392527569e-08, "loss": 0.3755, "step": 7898 }, { "epoch": 1.8801094781936099, "grad_norm": 0.39194478737773597, "learning_rate": 9.408609253734713e-08, "loss": 0.3255, "step": 7899 }, { "epoch": 1.8803474742666746, "grad_norm": 0.3713897973414568, "learning_rate": 9.371427496211061e-08, "loss": 0.2869, "step": 7900 }, { "epoch": 1.8805854703397396, "grad_norm": 0.36090648920272766, "learning_rate": 9.334318658230867e-08, "loss": 0.3191, "step": 7901 }, { "epoch": 1.8808234664128043, "grad_norm": 0.41318377703708387, "learning_rate": 9.297282745309389e-08, "loss": 0.3773, "step": 7902 }, { "epoch": 1.881061462485869, "grad_norm": 0.3713769438671181, "learning_rate": 9.260319762951109e-08, "loss": 0.3186, "step": 7903 }, { "epoch": 1.8812994585589338, "grad_norm": 0.3858895133643765, "learning_rate": 9.223429716649634e-08, "loss": 0.2755, "step": 7904 }, { "epoch": 1.8815374546319985, "grad_norm": 0.422907620593951, "learning_rate": 9.186612611887691e-08, "loss": 0.3659, "step": 7905 }, { "epoch": 1.8817754507050632, "grad_norm": 0.4024148982753146, "learning_rate": 9.14986845413729e-08, "loss": 0.3693, "step": 7906 }, { "epoch": 1.8820134467781282, "grad_norm": 0.3803493722755298, "learning_rate": 9.113197248859451e-08, "loss": 0.2724, "step": 7907 }, { "epoch": 1.882251442851193, "grad_norm": 0.4118593592302728, "learning_rate": 9.076599001504482e-08, "loss": 0.3282, "step": 7908 }, { "epoch": 1.882489438924258, "grad_norm": 0.3953308094670974, "learning_rate": 9.0400737175117e-08, "loss": 0.3764, "step": 7909 }, { "epoch": 1.8827274349973226, "grad_norm": 0.3696749681639606, "learning_rate": 9.003621402309815e-08, "loss": 0.277, "step": 7910 }, { "epoch": 1.8829654310703874, "grad_norm": 0.3615369062856125, "learning_rate": 8.96724206131644e-08, "loss": 0.2998, "step": 7911 }, { "epoch": 1.8832034271434521, "grad_norm": 0.41428190444047164, "learning_rate": 8.930935699938415e-08, "loss": 0.3354, "step": 7912 }, { "epoch": 1.8834414232165169, "grad_norm": 0.3929646777015546, "learning_rate": 8.89470232357187e-08, "loss": 0.3684, "step": 7913 }, { "epoch": 1.8836794192895816, "grad_norm": 0.3589833395799497, "learning_rate": 8.858541937601827e-08, "loss": 0.2739, "step": 7914 }, { "epoch": 1.8839174153626466, "grad_norm": 0.3889496001692624, "learning_rate": 8.82245454740277e-08, "loss": 0.2876, "step": 7915 }, { "epoch": 1.8841554114357113, "grad_norm": 0.42247196753368266, "learning_rate": 8.786440158338072e-08, "loss": 0.3592, "step": 7916 }, { "epoch": 1.8843934075087763, "grad_norm": 0.36693957355643053, "learning_rate": 8.750498775760453e-08, "loss": 0.3229, "step": 7917 }, { "epoch": 1.884631403581841, "grad_norm": 0.37575006564385804, "learning_rate": 8.714630405011637e-08, "loss": 0.2745, "step": 7918 }, { "epoch": 1.8848693996549057, "grad_norm": 0.3699668444902597, "learning_rate": 8.678835051422585e-08, "loss": 0.3479, "step": 7919 }, { "epoch": 1.8851073957279705, "grad_norm": 0.37951849531963455, "learning_rate": 8.643112720313262e-08, "loss": 0.3748, "step": 7920 }, { "epoch": 1.8853453918010352, "grad_norm": 0.401706429927378, "learning_rate": 8.607463416993034e-08, "loss": 0.2929, "step": 7921 }, { "epoch": 1.8855833878741, "grad_norm": 0.40506962687618053, "learning_rate": 8.571887146760217e-08, "loss": 0.3326, "step": 7922 }, { "epoch": 1.885821383947165, "grad_norm": 0.38167411063561124, "learning_rate": 8.536383914902301e-08, "loss": 0.3743, "step": 7923 }, { "epoch": 1.8860593800202297, "grad_norm": 0.37951337247623673, "learning_rate": 8.500953726695959e-08, "loss": 0.3368, "step": 7924 }, { "epoch": 1.8862973760932946, "grad_norm": 0.3807047055623607, "learning_rate": 8.465596587406977e-08, "loss": 0.2633, "step": 7925 }, { "epoch": 1.8865353721663594, "grad_norm": 0.37255448111092837, "learning_rate": 8.430312502290316e-08, "loss": 0.3475, "step": 7926 }, { "epoch": 1.886773368239424, "grad_norm": 0.3904240340365388, "learning_rate": 8.395101476590062e-08, "loss": 0.3999, "step": 7927 }, { "epoch": 1.8870113643124888, "grad_norm": 0.3590248194841884, "learning_rate": 8.359963515539416e-08, "loss": 0.2924, "step": 7928 }, { "epoch": 1.8872493603855536, "grad_norm": 0.38702468210446556, "learning_rate": 8.324898624360867e-08, "loss": 0.2733, "step": 7929 }, { "epoch": 1.8874873564586183, "grad_norm": 0.3814942319217911, "learning_rate": 8.289906808265746e-08, "loss": 0.3281, "step": 7930 }, { "epoch": 1.8877253525316833, "grad_norm": 0.40004094569529125, "learning_rate": 8.254988072454895e-08, "loss": 0.3284, "step": 7931 }, { "epoch": 1.887963348604748, "grad_norm": 0.3723689654618086, "learning_rate": 8.220142422117939e-08, "loss": 0.2681, "step": 7932 }, { "epoch": 1.888201344677813, "grad_norm": 0.44726403333459647, "learning_rate": 8.185369862433845e-08, "loss": 0.3056, "step": 7933 }, { "epoch": 1.8884393407508777, "grad_norm": 0.3806982139299245, "learning_rate": 8.150670398570759e-08, "loss": 0.3877, "step": 7934 }, { "epoch": 1.8886773368239425, "grad_norm": 0.35216108572000016, "learning_rate": 8.116044035685777e-08, "loss": 0.2884, "step": 7935 }, { "epoch": 1.8889153328970072, "grad_norm": 0.39643635386909837, "learning_rate": 8.081490778925283e-08, "loss": 0.2902, "step": 7936 }, { "epoch": 1.889153328970072, "grad_norm": 0.4923610296056875, "learning_rate": 8.047010633424723e-08, "loss": 0.3359, "step": 7937 }, { "epoch": 1.8893913250431367, "grad_norm": 0.379519284347565, "learning_rate": 8.012603604308721e-08, "loss": 0.3535, "step": 7938 }, { "epoch": 1.8896293211162016, "grad_norm": 0.3635457197510201, "learning_rate": 7.978269696691021e-08, "loss": 0.2947, "step": 7939 }, { "epoch": 1.8898673171892664, "grad_norm": 0.4033859742678164, "learning_rate": 7.944008915674484e-08, "loss": 0.311, "step": 7940 }, { "epoch": 1.8901053132623313, "grad_norm": 0.3982200170533466, "learning_rate": 7.909821266351092e-08, "loss": 0.3703, "step": 7941 }, { "epoch": 1.890343309335396, "grad_norm": 0.35373099489430676, "learning_rate": 7.875706753801949e-08, "loss": 0.3103, "step": 7942 }, { "epoch": 1.8905813054084608, "grad_norm": 0.36747336775173683, "learning_rate": 7.841665383097386e-08, "loss": 0.2825, "step": 7943 }, { "epoch": 1.8908193014815255, "grad_norm": 0.3650587611745797, "learning_rate": 7.807697159296746e-08, "loss": 0.3499, "step": 7944 }, { "epoch": 1.8910572975545903, "grad_norm": 0.40187256746234257, "learning_rate": 7.773802087448545e-08, "loss": 0.356, "step": 7945 }, { "epoch": 1.891295293627655, "grad_norm": 0.37435669326276105, "learning_rate": 7.739980172590477e-08, "loss": 0.275, "step": 7946 }, { "epoch": 1.89153328970072, "grad_norm": 0.36212200367835184, "learning_rate": 7.706231419749243e-08, "loss": 0.3142, "step": 7947 }, { "epoch": 1.8917712857737847, "grad_norm": 0.39951418029554553, "learning_rate": 7.672555833940832e-08, "loss": 0.3621, "step": 7948 }, { "epoch": 1.8920092818468497, "grad_norm": 0.3786672956664201, "learning_rate": 7.638953420170181e-08, "loss": 0.3187, "step": 7949 }, { "epoch": 1.8922472779199144, "grad_norm": 0.3923921053093728, "learning_rate": 7.605424183431464e-08, "loss": 0.2786, "step": 7950 }, { "epoch": 1.8924852739929792, "grad_norm": 0.38868628053542476, "learning_rate": 7.57196812870803e-08, "loss": 0.3226, "step": 7951 }, { "epoch": 1.892723270066044, "grad_norm": 0.3985157429933502, "learning_rate": 7.538585260972175e-08, "loss": 0.365, "step": 7952 }, { "epoch": 1.8929612661391086, "grad_norm": 0.41970802109407324, "learning_rate": 7.505275585185434e-08, "loss": 0.2964, "step": 7953 }, { "epoch": 1.8931992622121734, "grad_norm": 0.38733261918545264, "learning_rate": 7.472039106298512e-08, "loss": 0.3075, "step": 7954 }, { "epoch": 1.8934372582852383, "grad_norm": 0.4141672520453948, "learning_rate": 7.438875829251069e-08, "loss": 0.3563, "step": 7955 }, { "epoch": 1.893675254358303, "grad_norm": 0.3763459702365932, "learning_rate": 7.405785758972106e-08, "loss": 0.3256, "step": 7956 }, { "epoch": 1.893913250431368, "grad_norm": 0.41340503131702716, "learning_rate": 7.372768900379579e-08, "loss": 0.276, "step": 7957 }, { "epoch": 1.8941512465044328, "grad_norm": 0.3690483636794343, "learning_rate": 7.339825258380618e-08, "loss": 0.3253, "step": 7958 }, { "epoch": 1.8943892425774975, "grad_norm": 0.3890667108974889, "learning_rate": 7.306954837871415e-08, "loss": 0.3858, "step": 7959 }, { "epoch": 1.8946272386505623, "grad_norm": 0.3686373090623398, "learning_rate": 7.2741576437374e-08, "loss": 0.3156, "step": 7960 }, { "epoch": 1.894865234723627, "grad_norm": 0.3662856876936937, "learning_rate": 7.241433680852949e-08, "loss": 0.2893, "step": 7961 }, { "epoch": 1.8951032307966917, "grad_norm": 0.4337210767045377, "learning_rate": 7.208782954081784e-08, "loss": 0.347, "step": 7962 }, { "epoch": 1.8953412268697567, "grad_norm": 0.37458324393332965, "learning_rate": 7.176205468276465e-08, "loss": 0.395, "step": 7963 }, { "epoch": 1.8955792229428214, "grad_norm": 0.36173109464150077, "learning_rate": 7.143701228278899e-08, "loss": 0.2887, "step": 7964 }, { "epoch": 1.8958172190158864, "grad_norm": 0.38262759512926825, "learning_rate": 7.111270238920054e-08, "loss": 0.2915, "step": 7965 }, { "epoch": 1.8960552150889511, "grad_norm": 0.3827036517273416, "learning_rate": 7.078912505019908e-08, "loss": 0.3438, "step": 7966 }, { "epoch": 1.8962932111620159, "grad_norm": 0.3848814530905264, "learning_rate": 7.046628031387615e-08, "loss": 0.301, "step": 7967 }, { "epoch": 1.8965312072350806, "grad_norm": 0.4067525298074377, "learning_rate": 7.014416822821557e-08, "loss": 0.2432, "step": 7968 }, { "epoch": 1.8967692033081454, "grad_norm": 0.3812327537956296, "learning_rate": 6.982278884108907e-08, "loss": 0.3234, "step": 7969 }, { "epoch": 1.89700719938121, "grad_norm": 0.3605852849420692, "learning_rate": 6.950214220026397e-08, "loss": 0.4051, "step": 7970 }, { "epoch": 1.897245195454275, "grad_norm": 0.3958665551698103, "learning_rate": 6.918222835339438e-08, "loss": 0.2686, "step": 7971 }, { "epoch": 1.8974831915273398, "grad_norm": 0.38258672492708956, "learning_rate": 6.886304734802896e-08, "loss": 0.3141, "step": 7972 }, { "epoch": 1.8977211876004048, "grad_norm": 0.3981035167445995, "learning_rate": 6.854459923160472e-08, "loss": 0.3582, "step": 7973 }, { "epoch": 1.8979591836734695, "grad_norm": 0.36696157556372344, "learning_rate": 6.822688405145161e-08, "loss": 0.3243, "step": 7974 }, { "epoch": 1.8981971797465342, "grad_norm": 0.3713373387211765, "learning_rate": 6.790990185478963e-08, "loss": 0.2546, "step": 7975 }, { "epoch": 1.898435175819599, "grad_norm": 0.37672668107775037, "learning_rate": 6.759365268872997e-08, "loss": 0.3332, "step": 7976 }, { "epoch": 1.8986731718926637, "grad_norm": 0.359979308125644, "learning_rate": 6.727813660027616e-08, "loss": 0.3621, "step": 7977 }, { "epoch": 1.8989111679657285, "grad_norm": 0.3608027535934777, "learning_rate": 6.696335363632012e-08, "loss": 0.2963, "step": 7978 }, { "epoch": 1.8991491640387934, "grad_norm": 0.37376170686319327, "learning_rate": 6.66493038436472e-08, "loss": 0.2775, "step": 7979 }, { "epoch": 1.8993871601118582, "grad_norm": 0.38424326860289765, "learning_rate": 6.633598726893342e-08, "loss": 0.3589, "step": 7980 }, { "epoch": 1.8996251561849231, "grad_norm": 0.394059524418179, "learning_rate": 6.602340395874484e-08, "loss": 0.3771, "step": 7981 }, { "epoch": 1.8998631522579879, "grad_norm": 0.37382562755513166, "learning_rate": 6.571155395953877e-08, "loss": 0.2874, "step": 7982 }, { "epoch": 1.9001011483310526, "grad_norm": 0.3874100204834066, "learning_rate": 6.540043731766421e-08, "loss": 0.3195, "step": 7983 }, { "epoch": 1.9003391444041173, "grad_norm": 0.48921624228327715, "learning_rate": 6.509005407936087e-08, "loss": 0.3603, "step": 7984 }, { "epoch": 1.900577140477182, "grad_norm": 0.35858308962539986, "learning_rate": 6.478040429075961e-08, "loss": 0.2859, "step": 7985 }, { "epoch": 1.9008151365502468, "grad_norm": 0.3932336834908693, "learning_rate": 6.447148799788139e-08, "loss": 0.2843, "step": 7986 }, { "epoch": 1.9010531326233118, "grad_norm": 0.36226032388387175, "learning_rate": 6.416330524663895e-08, "loss": 0.3366, "step": 7987 }, { "epoch": 1.9012911286963765, "grad_norm": 0.41269859656532076, "learning_rate": 6.385585608283673e-08, "loss": 0.3668, "step": 7988 }, { "epoch": 1.9015291247694415, "grad_norm": 0.3845523305990824, "learning_rate": 6.35491405521682e-08, "loss": 0.2758, "step": 7989 }, { "epoch": 1.9017671208425062, "grad_norm": 0.40301323636395514, "learning_rate": 6.324315870021858e-08, "loss": 0.3034, "step": 7990 }, { "epoch": 1.902005116915571, "grad_norm": 0.3907642073015502, "learning_rate": 6.29379105724659e-08, "loss": 0.3739, "step": 7991 }, { "epoch": 1.9022431129886357, "grad_norm": 0.3606625473176197, "learning_rate": 6.263339621427666e-08, "loss": 0.3433, "step": 7992 }, { "epoch": 1.9024811090617004, "grad_norm": 0.4090491853767471, "learning_rate": 6.232961567090912e-08, "loss": 0.2904, "step": 7993 }, { "epoch": 1.9027191051347652, "grad_norm": 0.3812316669315538, "learning_rate": 6.202656898751324e-08, "loss": 0.3236, "step": 7994 }, { "epoch": 1.9029571012078301, "grad_norm": 0.37564697095435645, "learning_rate": 6.172425620912859e-08, "loss": 0.3895, "step": 7995 }, { "epoch": 1.9031950972808949, "grad_norm": 0.38971270053161045, "learning_rate": 6.142267738068641e-08, "loss": 0.2818, "step": 7996 }, { "epoch": 1.9034330933539596, "grad_norm": 0.4139644761266531, "learning_rate": 6.112183254700866e-08, "loss": 0.3051, "step": 7997 }, { "epoch": 1.9036710894270246, "grad_norm": 0.4000612669015111, "learning_rate": 6.082172175280843e-08, "loss": 0.3858, "step": 7998 }, { "epoch": 1.9039090855000893, "grad_norm": 0.39138000482768825, "learning_rate": 6.052234504269006e-08, "loss": 0.2829, "step": 7999 }, { "epoch": 1.904147081573154, "grad_norm": 0.4111965153470907, "learning_rate": 6.022370246114795e-08, "loss": 0.2599, "step": 8000 }, { "epoch": 1.9043850776462188, "grad_norm": 0.38739615879574596, "learning_rate": 5.992579405256826e-08, "loss": 0.3336, "step": 8001 }, { "epoch": 1.9046230737192835, "grad_norm": 0.35399850972575364, "learning_rate": 5.96286198612267e-08, "loss": 0.4036, "step": 8002 }, { "epoch": 1.9048610697923483, "grad_norm": 0.3501272012525231, "learning_rate": 5.933217993129126e-08, "loss": 0.2928, "step": 8003 }, { "epoch": 1.9050990658654132, "grad_norm": 0.3940432999472092, "learning_rate": 5.903647430682002e-08, "loss": 0.2813, "step": 8004 }, { "epoch": 1.905337061938478, "grad_norm": 0.37153990539467646, "learning_rate": 5.8741503031762294e-08, "loss": 0.3495, "step": 8005 }, { "epoch": 1.905575058011543, "grad_norm": 0.3650091291465742, "learning_rate": 5.844726614995799e-08, "loss": 0.3493, "step": 8006 }, { "epoch": 1.9058130540846077, "grad_norm": 0.35578280548382063, "learning_rate": 5.815376370513825e-08, "loss": 0.2911, "step": 8007 }, { "epoch": 1.9060510501576724, "grad_norm": 0.4008983674611723, "learning_rate": 5.7860995740924296e-08, "loss": 0.3143, "step": 8008 }, { "epoch": 1.9062890462307371, "grad_norm": 0.38432833244657477, "learning_rate": 5.756896230082909e-08, "loss": 0.3776, "step": 8009 }, { "epoch": 1.9065270423038019, "grad_norm": 0.3632804695720169, "learning_rate": 5.7277663428256245e-08, "loss": 0.2985, "step": 8010 }, { "epoch": 1.9067650383768666, "grad_norm": 0.3906672244339681, "learning_rate": 5.698709916649892e-08, "loss": 0.2724, "step": 8011 }, { "epoch": 1.9070030344499316, "grad_norm": 0.38051344490028755, "learning_rate": 5.669726955874366e-08, "loss": 0.3162, "step": 8012 }, { "epoch": 1.9072410305229963, "grad_norm": 0.35282134896325107, "learning_rate": 5.640817464806547e-08, "loss": 0.3467, "step": 8013 }, { "epoch": 1.9074790265960613, "grad_norm": 0.38263778679767824, "learning_rate": 5.611981447743109e-08, "loss": 0.2883, "step": 8014 }, { "epoch": 1.907717022669126, "grad_norm": 0.37534374522335623, "learning_rate": 5.5832189089697895e-08, "loss": 0.3081, "step": 8015 }, { "epoch": 1.9079550187421908, "grad_norm": 0.3824203414300469, "learning_rate": 5.554529852761337e-08, "loss": 0.3706, "step": 8016 }, { "epoch": 1.9081930148152555, "grad_norm": 0.37799082292639585, "learning_rate": 5.525914283381839e-08, "loss": 0.3255, "step": 8017 }, { "epoch": 1.9084310108883202, "grad_norm": 0.3920668628878149, "learning_rate": 5.497372205084173e-08, "loss": 0.2966, "step": 8018 }, { "epoch": 1.908669006961385, "grad_norm": 0.34537104511664757, "learning_rate": 5.468903622110389e-08, "loss": 0.3038, "step": 8019 }, { "epoch": 1.90890700303445, "grad_norm": 0.3841678485218043, "learning_rate": 5.440508538691658e-08, "loss": 0.3513, "step": 8020 }, { "epoch": 1.9091449991075147, "grad_norm": 0.34656903960442637, "learning_rate": 5.412186959048105e-08, "loss": 0.2848, "step": 8021 }, { "epoch": 1.9093829951805796, "grad_norm": 0.3940028400776739, "learning_rate": 5.38393888738914e-08, "loss": 0.2805, "step": 8022 }, { "epoch": 1.9096209912536444, "grad_norm": 0.3819034767089527, "learning_rate": 5.355764327913071e-08, "loss": 0.3605, "step": 8023 }, { "epoch": 1.9098589873267091, "grad_norm": 0.4050020295830134, "learning_rate": 5.3276632848072716e-08, "loss": 0.3406, "step": 8024 }, { "epoch": 1.9100969833997739, "grad_norm": 0.3711188459584683, "learning_rate": 5.299635762248345e-08, "loss": 0.2649, "step": 8025 }, { "epoch": 1.9103349794728386, "grad_norm": 0.3825043059653578, "learning_rate": 5.271681764401848e-08, "loss": 0.297, "step": 8026 }, { "epoch": 1.9105729755459033, "grad_norm": 0.3872078261635392, "learning_rate": 5.243801295422457e-08, "loss": 0.3961, "step": 8027 }, { "epoch": 1.9108109716189683, "grad_norm": 0.3675581463780866, "learning_rate": 5.215994359453858e-08, "loss": 0.2728, "step": 8028 }, { "epoch": 1.911048967692033, "grad_norm": 0.39794626943566946, "learning_rate": 5.188260960628855e-08, "loss": 0.2939, "step": 8029 }, { "epoch": 1.911286963765098, "grad_norm": 0.3881930755761182, "learning_rate": 5.1606011030693184e-08, "loss": 0.3508, "step": 8030 }, { "epoch": 1.9115249598381627, "grad_norm": 0.34966274001131864, "learning_rate": 5.1330147908861814e-08, "loss": 0.3254, "step": 8031 }, { "epoch": 1.9117629559112275, "grad_norm": 0.3810722565822783, "learning_rate": 5.1055020281794987e-08, "loss": 0.2676, "step": 8032 }, { "epoch": 1.9120009519842922, "grad_norm": 0.3913423806435706, "learning_rate": 5.078062819038332e-08, "loss": 0.3274, "step": 8033 }, { "epoch": 1.912238948057357, "grad_norm": 0.3957900932077373, "learning_rate": 5.0506971675407526e-08, "loss": 0.3859, "step": 8034 }, { "epoch": 1.9124769441304217, "grad_norm": 0.41296040287870084, "learning_rate": 5.0234050777540625e-08, "loss": 0.2761, "step": 8035 }, { "epoch": 1.9127149402034866, "grad_norm": 0.3617049666384471, "learning_rate": 4.996186553734517e-08, "loss": 0.258, "step": 8036 }, { "epoch": 1.9129529362765514, "grad_norm": 0.4056471829864505, "learning_rate": 4.9690415995274354e-08, "loss": 0.3416, "step": 8037 }, { "epoch": 1.9131909323496163, "grad_norm": 0.3575725862341246, "learning_rate": 4.941970219167203e-08, "loss": 0.3405, "step": 8038 }, { "epoch": 1.913428928422681, "grad_norm": 0.3654418605929413, "learning_rate": 4.914972416677433e-08, "loss": 0.2714, "step": 8039 }, { "epoch": 1.9136669244957458, "grad_norm": 0.3928718133203763, "learning_rate": 4.8880481960705274e-08, "loss": 0.3084, "step": 8040 }, { "epoch": 1.9139049205688106, "grad_norm": 0.3860998147396003, "learning_rate": 4.861197561348119e-08, "loss": 0.3449, "step": 8041 }, { "epoch": 1.9141429166418753, "grad_norm": 0.3828325589358172, "learning_rate": 4.83442051650096e-08, "loss": 0.3055, "step": 8042 }, { "epoch": 1.91438091271494, "grad_norm": 0.39352418664155536, "learning_rate": 4.8077170655086436e-08, "loss": 0.2848, "step": 8043 }, { "epoch": 1.914618908788005, "grad_norm": 0.4101375769165912, "learning_rate": 4.781087212340052e-08, "loss": 0.3486, "step": 8044 }, { "epoch": 1.9148569048610697, "grad_norm": 0.39533684005769876, "learning_rate": 4.754530960953074e-08, "loss": 0.3536, "step": 8045 }, { "epoch": 1.9150949009341347, "grad_norm": 0.37662010337344887, "learning_rate": 4.728048315294553e-08, "loss": 0.315, "step": 8046 }, { "epoch": 1.9153328970071994, "grad_norm": 0.3934959783113809, "learning_rate": 4.701639279300507e-08, "loss": 0.3085, "step": 8047 }, { "epoch": 1.9155708930802642, "grad_norm": 0.37377434092719386, "learning_rate": 4.675303856895907e-08, "loss": 0.381, "step": 8048 }, { "epoch": 1.915808889153329, "grad_norm": 0.37694394724451996, "learning_rate": 4.649042051994956e-08, "loss": 0.3324, "step": 8049 }, { "epoch": 1.9160468852263937, "grad_norm": 0.37595294353982567, "learning_rate": 4.622853868500699e-08, "loss": 0.2523, "step": 8050 }, { "epoch": 1.9162848812994584, "grad_norm": 0.40725905564623294, "learning_rate": 4.596739310305409e-08, "loss": 0.3108, "step": 8051 }, { "epoch": 1.9165228773725234, "grad_norm": 0.38543823318432485, "learning_rate": 4.570698381290317e-08, "loss": 0.3867, "step": 8052 }, { "epoch": 1.916760873445588, "grad_norm": 0.3693394614353527, "learning_rate": 4.5447310853258265e-08, "loss": 0.306, "step": 8053 }, { "epoch": 1.916998869518653, "grad_norm": 0.374918757902991, "learning_rate": 4.5188374262712385e-08, "loss": 0.2909, "step": 8054 }, { "epoch": 1.9172368655917178, "grad_norm": 0.4268585385107588, "learning_rate": 4.493017407975087e-08, "loss": 0.3706, "step": 8055 }, { "epoch": 1.9174748616647825, "grad_norm": 0.3550622241647807, "learning_rate": 4.467271034274745e-08, "loss": 0.3147, "step": 8056 }, { "epoch": 1.9177128577378473, "grad_norm": 0.3673246840410985, "learning_rate": 4.44159830899682e-08, "loss": 0.2714, "step": 8057 }, { "epoch": 1.917950853810912, "grad_norm": 0.38166747800870954, "learning_rate": 4.415999235956925e-08, "loss": 0.3132, "step": 8058 }, { "epoch": 1.9181888498839768, "grad_norm": 0.37082445385628915, "learning_rate": 4.390473818959684e-08, "loss": 0.3427, "step": 8059 }, { "epoch": 1.9184268459570417, "grad_norm": 0.3502851478207926, "learning_rate": 4.3650220617988404e-08, "loss": 0.3055, "step": 8060 }, { "epoch": 1.9186648420301065, "grad_norm": 0.38376398396770767, "learning_rate": 4.3396439682570904e-08, "loss": 0.2819, "step": 8061 }, { "epoch": 1.9189028381031714, "grad_norm": 0.37104295492217804, "learning_rate": 4.3143395421063607e-08, "loss": 0.3136, "step": 8062 }, { "epoch": 1.9191408341762362, "grad_norm": 0.39064486463386716, "learning_rate": 4.2891087871073656e-08, "loss": 0.3645, "step": 8063 }, { "epoch": 1.919378830249301, "grad_norm": 0.3959614629937633, "learning_rate": 4.263951707010161e-08, "loss": 0.2889, "step": 8064 }, { "epoch": 1.9196168263223656, "grad_norm": 0.3976404851904098, "learning_rate": 4.238868305553645e-08, "loss": 0.2977, "step": 8065 }, { "epoch": 1.9198548223954304, "grad_norm": 0.4187234518989236, "learning_rate": 4.2138585864658354e-08, "loss": 0.3897, "step": 8066 }, { "epoch": 1.920092818468495, "grad_norm": 0.37755488054016684, "learning_rate": 4.188922553463759e-08, "loss": 0.3252, "step": 8067 }, { "epoch": 1.92033081454156, "grad_norm": 0.3915281355437284, "learning_rate": 4.164060210253618e-08, "loss": 0.2707, "step": 8068 }, { "epoch": 1.9205688106146248, "grad_norm": 0.39725191077466404, "learning_rate": 4.1392715605305114e-08, "loss": 0.346, "step": 8069 }, { "epoch": 1.9208068066876898, "grad_norm": 0.3714723524123798, "learning_rate": 4.1145566079786034e-08, "loss": 0.3478, "step": 8070 }, { "epoch": 1.9210448027607545, "grad_norm": 0.3707101045708407, "learning_rate": 4.089915356271234e-08, "loss": 0.3095, "step": 8071 }, { "epoch": 1.9212827988338192, "grad_norm": 0.4055723864611074, "learning_rate": 4.0653478090706965e-08, "loss": 0.2812, "step": 8072 }, { "epoch": 1.921520794906884, "grad_norm": 0.3801871133061815, "learning_rate": 4.040853970028291e-08, "loss": 0.3632, "step": 8073 }, { "epoch": 1.9217587909799487, "grad_norm": 0.3539168169356299, "learning_rate": 4.01643384278444e-08, "loss": 0.3052, "step": 8074 }, { "epoch": 1.9219967870530135, "grad_norm": 0.3611686215376351, "learning_rate": 3.992087430968516e-08, "loss": 0.2806, "step": 8075 }, { "epoch": 1.9222347831260784, "grad_norm": 0.3913596751920842, "learning_rate": 3.967814738199072e-08, "loss": 0.3105, "step": 8076 }, { "epoch": 1.9224727791991432, "grad_norm": 0.39705193175327785, "learning_rate": 3.943615768083609e-08, "loss": 0.3657, "step": 8077 }, { "epoch": 1.9227107752722081, "grad_norm": 0.35680102901673433, "learning_rate": 3.919490524218694e-08, "loss": 0.2939, "step": 8078 }, { "epoch": 1.9229487713452729, "grad_norm": 0.3936950953927909, "learning_rate": 3.89543901018985e-08, "loss": 0.2862, "step": 8079 }, { "epoch": 1.9231867674183376, "grad_norm": 0.41880114246300687, "learning_rate": 3.8714612295718824e-08, "loss": 0.3537, "step": 8080 }, { "epoch": 1.9234247634914023, "grad_norm": 0.3639233905209205, "learning_rate": 3.8475571859283855e-08, "loss": 0.3672, "step": 8081 }, { "epoch": 1.923662759564467, "grad_norm": 0.3903343153212714, "learning_rate": 3.823726882812129e-08, "loss": 0.2758, "step": 8082 }, { "epoch": 1.9239007556375318, "grad_norm": 0.3900369810611726, "learning_rate": 3.7999703237648346e-08, "loss": 0.3241, "step": 8083 }, { "epoch": 1.9241387517105968, "grad_norm": 0.39117092554001687, "learning_rate": 3.776287512317345e-08, "loss": 0.3595, "step": 8084 }, { "epoch": 1.9243767477836615, "grad_norm": 0.36654968786594255, "learning_rate": 3.752678451989567e-08, "loss": 0.2833, "step": 8085 }, { "epoch": 1.9246147438567265, "grad_norm": 0.3809066672279184, "learning_rate": 3.729143146290304e-08, "loss": 0.2862, "step": 8086 }, { "epoch": 1.9248527399297912, "grad_norm": 0.3490062069699892, "learning_rate": 3.705681598717481e-08, "loss": 0.3525, "step": 8087 }, { "epoch": 1.925090736002856, "grad_norm": 0.379879949063031, "learning_rate": 3.682293812758142e-08, "loss": 0.3721, "step": 8088 }, { "epoch": 1.9253287320759207, "grad_norm": 0.3778359839016277, "learning_rate": 3.658979791888284e-08, "loss": 0.3119, "step": 8089 }, { "epoch": 1.9255667281489854, "grad_norm": 0.36956910970266094, "learning_rate": 3.635739539572858e-08, "loss": 0.3345, "step": 8090 }, { "epoch": 1.9258047242220502, "grad_norm": 0.41589648566449683, "learning_rate": 3.6125730592660445e-08, "loss": 0.4026, "step": 8091 }, { "epoch": 1.9260427202951151, "grad_norm": 0.37675858564217196, "learning_rate": 3.589480354410868e-08, "loss": 0.3182, "step": 8092 }, { "epoch": 1.9262807163681799, "grad_norm": 0.3885940611238435, "learning_rate": 3.5664614284395274e-08, "loss": 0.2901, "step": 8093 }, { "epoch": 1.9265187124412448, "grad_norm": 0.4267648886909655, "learning_rate": 3.543516284773174e-08, "loss": 0.3237, "step": 8094 }, { "epoch": 1.9267567085143096, "grad_norm": 0.37790981095161896, "learning_rate": 3.520644926822081e-08, "loss": 0.4189, "step": 8095 }, { "epoch": 1.9269947045873743, "grad_norm": 0.35608236467137666, "learning_rate": 3.497847357985418e-08, "loss": 0.3035, "step": 8096 }, { "epoch": 1.927232700660439, "grad_norm": 0.35125924042990425, "learning_rate": 3.475123581651529e-08, "loss": 0.2894, "step": 8097 }, { "epoch": 1.9274706967335038, "grad_norm": 0.3867965627570178, "learning_rate": 3.452473601197659e-08, "loss": 0.3634, "step": 8098 }, { "epoch": 1.9277086928065685, "grad_norm": 0.35092993273506357, "learning_rate": 3.42989741999028e-08, "loss": 0.3621, "step": 8099 }, { "epoch": 1.9279466888796335, "grad_norm": 0.37737964092644094, "learning_rate": 3.4073950413846536e-08, "loss": 0.2531, "step": 8100 }, { "epoch": 1.9281846849526982, "grad_norm": 0.37016176061370865, "learning_rate": 3.38496646872527e-08, "loss": 0.3185, "step": 8101 }, { "epoch": 1.9284226810257632, "grad_norm": 0.41856577365765557, "learning_rate": 3.36261170534552e-08, "loss": 0.3914, "step": 8102 }, { "epoch": 1.928660677098828, "grad_norm": 0.350550972272308, "learning_rate": 3.340330754567911e-08, "loss": 0.295, "step": 8103 }, { "epoch": 1.9288986731718927, "grad_norm": 0.37139534698154514, "learning_rate": 3.3181236197038505e-08, "loss": 0.295, "step": 8104 }, { "epoch": 1.9291366692449574, "grad_norm": 0.4026861513587683, "learning_rate": 3.295990304054031e-08, "loss": 0.374, "step": 8105 }, { "epoch": 1.9293746653180222, "grad_norm": 0.36621879141961106, "learning_rate": 3.2739308109078215e-08, "loss": 0.3791, "step": 8106 }, { "epoch": 1.929612661391087, "grad_norm": 0.37842728624086314, "learning_rate": 3.251945143543933e-08, "loss": 0.2516, "step": 8107 }, { "epoch": 1.9298506574641519, "grad_norm": 0.4177544855172531, "learning_rate": 3.230033305229974e-08, "loss": 0.3046, "step": 8108 }, { "epoch": 1.9300886535372166, "grad_norm": 0.37909632728985976, "learning_rate": 3.2081952992225605e-08, "loss": 0.374, "step": 8109 }, { "epoch": 1.9303266496102816, "grad_norm": 0.4361384086133121, "learning_rate": 3.186431128767375e-08, "loss": 0.3021, "step": 8110 }, { "epoch": 1.9305646456833463, "grad_norm": 0.370480970702569, "learning_rate": 3.164740797099053e-08, "loss": 0.2905, "step": 8111 }, { "epoch": 1.930802641756411, "grad_norm": 0.41765762330060385, "learning_rate": 3.143124307441403e-08, "loss": 0.3399, "step": 8112 }, { "epoch": 1.9310406378294758, "grad_norm": 0.36824830434988537, "learning_rate": 3.121581663007134e-08, "loss": 0.3557, "step": 8113 }, { "epoch": 1.9312786339025405, "grad_norm": 0.3842531168711303, "learning_rate": 3.100112866997962e-08, "loss": 0.2953, "step": 8114 }, { "epoch": 1.9315166299756052, "grad_norm": 0.3817089022291876, "learning_rate": 3.078717922604779e-08, "loss": 0.3135, "step": 8115 }, { "epoch": 1.9317546260486702, "grad_norm": 0.3710256940967274, "learning_rate": 3.05739683300732e-08, "loss": 0.3639, "step": 8116 }, { "epoch": 1.931992622121735, "grad_norm": 0.39056691671635796, "learning_rate": 3.036149601374494e-08, "loss": 0.3277, "step": 8117 }, { "epoch": 1.9322306181948, "grad_norm": 0.48173619690845865, "learning_rate": 3.0149762308641083e-08, "loss": 0.2814, "step": 8118 }, { "epoch": 1.9324686142678646, "grad_norm": 0.3992688487992293, "learning_rate": 2.993876724623035e-08, "loss": 0.3308, "step": 8119 }, { "epoch": 1.9327066103409294, "grad_norm": 0.3846765879945684, "learning_rate": 2.972851085787265e-08, "loss": 0.3655, "step": 8120 }, { "epoch": 1.9329446064139941, "grad_norm": 0.335030815815972, "learning_rate": 2.9518993174816323e-08, "loss": 0.2838, "step": 8121 }, { "epoch": 1.9331826024870589, "grad_norm": 0.39220164580944505, "learning_rate": 2.9310214228202016e-08, "loss": 0.2987, "step": 8122 }, { "epoch": 1.9334205985601236, "grad_norm": 0.38126990911469083, "learning_rate": 2.9102174049058796e-08, "loss": 0.3555, "step": 8123 }, { "epoch": 1.9336585946331886, "grad_norm": 0.37534376126487273, "learning_rate": 2.8894872668305816e-08, "loss": 0.3167, "step": 8124 }, { "epoch": 1.9338965907062533, "grad_norm": 0.3782285028819716, "learning_rate": 2.8688310116754546e-08, "loss": 0.2798, "step": 8125 }, { "epoch": 1.9341345867793183, "grad_norm": 0.4017596879571187, "learning_rate": 2.848248642510487e-08, "loss": 0.3332, "step": 8126 }, { "epoch": 1.934372582852383, "grad_norm": 0.385095717269012, "learning_rate": 2.8277401623946764e-08, "loss": 0.3786, "step": 8127 }, { "epoch": 1.9346105789254477, "grad_norm": 0.3681786253505828, "learning_rate": 2.8073055743761956e-08, "loss": 0.2888, "step": 8128 }, { "epoch": 1.9348485749985125, "grad_norm": 0.40494290707801794, "learning_rate": 2.7869448814920042e-08, "loss": 0.2698, "step": 8129 }, { "epoch": 1.9350865710715772, "grad_norm": 0.39678215471443756, "learning_rate": 2.766658086768237e-08, "loss": 0.3654, "step": 8130 }, { "epoch": 1.935324567144642, "grad_norm": 0.38096391433914467, "learning_rate": 2.746445193220093e-08, "loss": 0.3331, "step": 8131 }, { "epoch": 1.935562563217707, "grad_norm": 0.4207033973400047, "learning_rate": 2.726306203851614e-08, "loss": 0.291, "step": 8132 }, { "epoch": 1.9358005592907717, "grad_norm": 0.35674390767561825, "learning_rate": 2.706241121656017e-08, "loss": 0.2914, "step": 8133 }, { "epoch": 1.9360385553638366, "grad_norm": 0.37966258077198645, "learning_rate": 2.6862499496154713e-08, "loss": 0.4079, "step": 8134 }, { "epoch": 1.9362765514369014, "grad_norm": 0.366878622762437, "learning_rate": 2.6663326907010457e-08, "loss": 0.2792, "step": 8135 }, { "epoch": 1.936514547509966, "grad_norm": 0.3694716880277891, "learning_rate": 2.6464893478730936e-08, "loss": 0.2941, "step": 8136 }, { "epoch": 1.9367525435830308, "grad_norm": 0.36316133821050267, "learning_rate": 2.6267199240807563e-08, "loss": 0.3586, "step": 8137 }, { "epoch": 1.9369905396560956, "grad_norm": 0.37714358252792873, "learning_rate": 2.6070244222622387e-08, "loss": 0.3521, "step": 8138 }, { "epoch": 1.9372285357291603, "grad_norm": 0.3846436958910601, "learning_rate": 2.5874028453448106e-08, "loss": 0.2677, "step": 8139 }, { "epoch": 1.9374665318022253, "grad_norm": 0.3714862068184088, "learning_rate": 2.567855196244695e-08, "loss": 0.2874, "step": 8140 }, { "epoch": 1.93770452787529, "grad_norm": 0.3857626420570937, "learning_rate": 2.548381477867179e-08, "loss": 0.3467, "step": 8141 }, { "epoch": 1.937942523948355, "grad_norm": 0.3698968880808057, "learning_rate": 2.528981693106558e-08, "loss": 0.3375, "step": 8142 }, { "epoch": 1.9381805200214197, "grad_norm": 0.3576794108662344, "learning_rate": 2.5096558448460817e-08, "loss": 0.2723, "step": 8143 }, { "epoch": 1.9384185160944845, "grad_norm": 0.3902217274757619, "learning_rate": 2.4904039359580635e-08, "loss": 0.3334, "step": 8144 }, { "epoch": 1.9386565121675492, "grad_norm": 0.3916377148051716, "learning_rate": 2.4712259693038254e-08, "loss": 0.3588, "step": 8145 }, { "epoch": 1.938894508240614, "grad_norm": 0.3658288720995766, "learning_rate": 2.452121947733699e-08, "loss": 0.2968, "step": 8146 }, { "epoch": 1.9391325043136787, "grad_norm": 0.4357430764134552, "learning_rate": 2.4330918740869125e-08, "loss": 0.314, "step": 8147 }, { "epoch": 1.9393705003867436, "grad_norm": 0.40149441715579426, "learning_rate": 2.414135751191926e-08, "loss": 0.374, "step": 8148 }, { "epoch": 1.9396084964598084, "grad_norm": 0.3611645193048641, "learning_rate": 2.395253581866097e-08, "loss": 0.3328, "step": 8149 }, { "epoch": 1.9398464925328733, "grad_norm": 0.39467006084967055, "learning_rate": 2.3764453689156808e-08, "loss": 0.2535, "step": 8150 }, { "epoch": 1.940084488605938, "grad_norm": 0.4124421719142737, "learning_rate": 2.3577111151361078e-08, "loss": 0.3229, "step": 8151 }, { "epoch": 1.9403224846790028, "grad_norm": 0.3578248332226124, "learning_rate": 2.3390508233117615e-08, "loss": 0.4014, "step": 8152 }, { "epoch": 1.9405604807520676, "grad_norm": 0.35974756105539724, "learning_rate": 2.3204644962159793e-08, "loss": 0.2963, "step": 8153 }, { "epoch": 1.9407984768251323, "grad_norm": 0.3930501887956103, "learning_rate": 2.301952136611163e-08, "loss": 0.255, "step": 8154 }, { "epoch": 1.941036472898197, "grad_norm": 0.39726170086743057, "learning_rate": 2.2835137472487223e-08, "loss": 0.3411, "step": 8155 }, { "epoch": 1.941274468971262, "grad_norm": 0.36986727713553336, "learning_rate": 2.2651493308690765e-08, "loss": 0.3355, "step": 8156 }, { "epoch": 1.9415124650443267, "grad_norm": 0.35051444026980566, "learning_rate": 2.2468588902015975e-08, "loss": 0.2846, "step": 8157 }, { "epoch": 1.9417504611173917, "grad_norm": 0.37221414208027037, "learning_rate": 2.2286424279646668e-08, "loss": 0.3196, "step": 8158 }, { "epoch": 1.9419884571904564, "grad_norm": 0.38381167717462056, "learning_rate": 2.2104999468657852e-08, "loss": 0.3715, "step": 8159 }, { "epoch": 1.9422264532635212, "grad_norm": 0.3787505872883276, "learning_rate": 2.192431449601351e-08, "loss": 0.301, "step": 8160 }, { "epoch": 1.942464449336586, "grad_norm": 0.36125888810575163, "learning_rate": 2.1744369388567167e-08, "loss": 0.2484, "step": 8161 }, { "epoch": 1.9427024454096506, "grad_norm": 0.4252944882802151, "learning_rate": 2.1565164173063536e-08, "loss": 0.343, "step": 8162 }, { "epoch": 1.9429404414827154, "grad_norm": 0.38454550235674245, "learning_rate": 2.1386698876137424e-08, "loss": 0.3642, "step": 8163 }, { "epoch": 1.9431784375557803, "grad_norm": 0.3722701639410283, "learning_rate": 2.1208973524312616e-08, "loss": 0.3339, "step": 8164 }, { "epoch": 1.943416433628845, "grad_norm": 0.39979583818697223, "learning_rate": 2.103198814400409e-08, "loss": 0.2935, "step": 8165 }, { "epoch": 1.94365442970191, "grad_norm": 0.3753339389528479, "learning_rate": 2.0855742761515808e-08, "loss": 0.3629, "step": 8166 }, { "epoch": 1.9438924257749748, "grad_norm": 0.36576216964926017, "learning_rate": 2.0680237403041815e-08, "loss": 0.3144, "step": 8167 }, { "epoch": 1.9441304218480395, "grad_norm": 0.40044108633786357, "learning_rate": 2.0505472094667356e-08, "loss": 0.3043, "step": 8168 }, { "epoch": 1.9443684179211043, "grad_norm": 0.39121889961558187, "learning_rate": 2.0331446862366098e-08, "loss": 0.3215, "step": 8169 }, { "epoch": 1.944606413994169, "grad_norm": 0.3966140516297892, "learning_rate": 2.0158161732003467e-08, "loss": 0.3878, "step": 8170 }, { "epoch": 1.9448444100672337, "grad_norm": 0.36014975036534674, "learning_rate": 1.9985616729332747e-08, "loss": 0.2826, "step": 8171 }, { "epoch": 1.9450824061402987, "grad_norm": 0.3712453532736723, "learning_rate": 1.9813811879999533e-08, "loss": 0.2923, "step": 8172 }, { "epoch": 1.9453204022133634, "grad_norm": 0.4017756063096361, "learning_rate": 1.9642747209537295e-08, "loss": 0.338, "step": 8173 }, { "epoch": 1.9455583982864284, "grad_norm": 0.37378197959060855, "learning_rate": 1.9472422743371245e-08, "loss": 0.3247, "step": 8174 }, { "epoch": 1.9457963943594931, "grad_norm": 0.4124391596209017, "learning_rate": 1.9302838506815026e-08, "loss": 0.2884, "step": 8175 }, { "epoch": 1.9460343904325579, "grad_norm": 0.4033106524212199, "learning_rate": 1.913399452507403e-08, "loss": 0.3301, "step": 8176 }, { "epoch": 1.9462723865056226, "grad_norm": 0.3966965084126736, "learning_rate": 1.8965890823242072e-08, "loss": 0.3688, "step": 8177 }, { "epoch": 1.9465103825786874, "grad_norm": 0.33755930518081106, "learning_rate": 1.879852742630306e-08, "loss": 0.2721, "step": 8178 }, { "epoch": 1.946748378651752, "grad_norm": 0.36588329404854725, "learning_rate": 1.8631904359132646e-08, "loss": 0.2917, "step": 8179 }, { "epoch": 1.946986374724817, "grad_norm": 0.3587494911753443, "learning_rate": 1.8466021646493802e-08, "loss": 0.343, "step": 8180 }, { "epoch": 1.9472243707978818, "grad_norm": 0.38139976856694574, "learning_rate": 1.830087931304181e-08, "loss": 0.3703, "step": 8181 }, { "epoch": 1.9474623668709468, "grad_norm": 0.4031204694527784, "learning_rate": 1.8136477383319805e-08, "loss": 0.2817, "step": 8182 }, { "epoch": 1.9477003629440115, "grad_norm": 0.3888962919598794, "learning_rate": 1.797281588176325e-08, "loss": 0.299, "step": 8183 }, { "epoch": 1.9479383590170762, "grad_norm": 0.4082574436849942, "learning_rate": 1.7809894832695463e-08, "loss": 0.3874, "step": 8184 }, { "epoch": 1.948176355090141, "grad_norm": 0.3623742069248171, "learning_rate": 1.7647714260330407e-08, "loss": 0.3092, "step": 8185 }, { "epoch": 1.9484143511632057, "grad_norm": 0.39366833031118265, "learning_rate": 1.748627418877269e-08, "loss": 0.2897, "step": 8186 }, { "epoch": 1.9486523472362705, "grad_norm": 0.3874663652919054, "learning_rate": 1.7325574642016453e-08, "loss": 0.3632, "step": 8187 }, { "epoch": 1.9488903433093354, "grad_norm": 0.4022386962255987, "learning_rate": 1.716561564394481e-08, "loss": 0.3729, "step": 8188 }, { "epoch": 1.9491283393824002, "grad_norm": 0.3760810663300212, "learning_rate": 1.7006397218332084e-08, "loss": 0.2598, "step": 8189 }, { "epoch": 1.9493663354554651, "grad_norm": 0.3838950169229048, "learning_rate": 1.6847919388842115e-08, "loss": 0.3023, "step": 8190 }, { "epoch": 1.9496043315285299, "grad_norm": 0.3762323973720483, "learning_rate": 1.6690182179028845e-08, "loss": 0.358, "step": 8191 }, { "epoch": 1.9498423276015946, "grad_norm": 0.40170151586759195, "learning_rate": 1.6533185612335188e-08, "loss": 0.2977, "step": 8192 }, { "epoch": 1.9500803236746593, "grad_norm": 0.38021345018959474, "learning_rate": 1.6376929712095813e-08, "loss": 0.2929, "step": 8193 }, { "epoch": 1.950318319747724, "grad_norm": 0.3951132465369512, "learning_rate": 1.6221414501532694e-08, "loss": 0.3475, "step": 8194 }, { "epoch": 1.9505563158207888, "grad_norm": 0.38568162349813606, "learning_rate": 1.606664000376068e-08, "loss": 0.3766, "step": 8195 }, { "epoch": 1.9507943118938538, "grad_norm": 0.3780659119974086, "learning_rate": 1.591260624178248e-08, "loss": 0.2771, "step": 8196 }, { "epoch": 1.9510323079669185, "grad_norm": 0.3958482547520693, "learning_rate": 1.5759313238491447e-08, "loss": 0.2964, "step": 8197 }, { "epoch": 1.9512703040399835, "grad_norm": 0.39144688735676086, "learning_rate": 1.5606761016670467e-08, "loss": 0.3604, "step": 8198 }, { "epoch": 1.9515083001130482, "grad_norm": 0.4028520449978215, "learning_rate": 1.5454949598993075e-08, "loss": 0.3177, "step": 8199 }, { "epoch": 1.951746296186113, "grad_norm": 0.3749353165874801, "learning_rate": 1.5303879008021773e-08, "loss": 0.2832, "step": 8200 }, { "epoch": 1.9519842922591777, "grad_norm": 0.39758890288042986, "learning_rate": 1.5153549266209154e-08, "loss": 0.3128, "step": 8201 }, { "epoch": 1.9522222883322424, "grad_norm": 0.3508156781517721, "learning_rate": 1.5003960395898465e-08, "loss": 0.4, "step": 8202 }, { "epoch": 1.9524602844053072, "grad_norm": 0.35738164902463865, "learning_rate": 1.4855112419321916e-08, "loss": 0.3172, "step": 8203 }, { "epoch": 1.9526982804783721, "grad_norm": 0.3828648741919708, "learning_rate": 1.4707005358602367e-08, "loss": 0.2917, "step": 8204 }, { "epoch": 1.9529362765514369, "grad_norm": 0.4386268966748521, "learning_rate": 1.4559639235751654e-08, "loss": 0.3149, "step": 8205 }, { "epoch": 1.9531742726245018, "grad_norm": 0.3676937839979994, "learning_rate": 1.4413014072672816e-08, "loss": 0.3793, "step": 8206 }, { "epoch": 1.9534122686975666, "grad_norm": 0.3692252990914482, "learning_rate": 1.426712989115786e-08, "loss": 0.2811, "step": 8207 }, { "epoch": 1.9536502647706313, "grad_norm": 0.37336901880476925, "learning_rate": 1.4121986712888336e-08, "loss": 0.316, "step": 8208 }, { "epoch": 1.953888260843696, "grad_norm": 0.40798571701364533, "learning_rate": 1.3977584559435874e-08, "loss": 0.374, "step": 8209 }, { "epoch": 1.9541262569167608, "grad_norm": 0.36868651161847604, "learning_rate": 1.3833923452262754e-08, "loss": 0.3055, "step": 8210 }, { "epoch": 1.9543642529898255, "grad_norm": 0.38007255786831645, "learning_rate": 1.3691003412720783e-08, "loss": 0.2946, "step": 8211 }, { "epoch": 1.9546022490628905, "grad_norm": 0.35949632759077443, "learning_rate": 1.3548824462050747e-08, "loss": 0.3456, "step": 8212 }, { "epoch": 1.9548402451359552, "grad_norm": 0.37479143142903726, "learning_rate": 1.3407386621384078e-08, "loss": 0.4028, "step": 8213 }, { "epoch": 1.9550782412090202, "grad_norm": 0.3666312123014584, "learning_rate": 1.3266689911742291e-08, "loss": 0.2829, "step": 8214 }, { "epoch": 1.955316237282085, "grad_norm": 0.4020518839752495, "learning_rate": 1.312673435403644e-08, "loss": 0.3137, "step": 8215 }, { "epoch": 1.9555542333551497, "grad_norm": 0.4198314847703424, "learning_rate": 1.2987519969067109e-08, "loss": 0.3647, "step": 8216 }, { "epoch": 1.9557922294282144, "grad_norm": 0.37449752005583575, "learning_rate": 1.284904677752441e-08, "loss": 0.3045, "step": 8217 }, { "epoch": 1.9560302255012791, "grad_norm": 0.3788052759823918, "learning_rate": 1.2711314799990216e-08, "loss": 0.2877, "step": 8218 }, { "epoch": 1.9562682215743439, "grad_norm": 0.3865156770622441, "learning_rate": 1.2574324056934262e-08, "loss": 0.3561, "step": 8219 }, { "epoch": 1.9565062176474088, "grad_norm": 0.3781434327446371, "learning_rate": 1.2438074568716374e-08, "loss": 0.3798, "step": 8220 }, { "epoch": 1.9567442137204736, "grad_norm": 0.36790890617104577, "learning_rate": 1.230256635558702e-08, "loss": 0.2838, "step": 8221 }, { "epoch": 1.9569822097935385, "grad_norm": 0.39260234101874714, "learning_rate": 1.21677994376862e-08, "loss": 0.3047, "step": 8222 }, { "epoch": 1.9572202058666033, "grad_norm": 0.3574524348159013, "learning_rate": 1.2033773835042894e-08, "loss": 0.3746, "step": 8223 }, { "epoch": 1.957458201939668, "grad_norm": 0.3612325904669084, "learning_rate": 1.1900489567577277e-08, "loss": 0.3464, "step": 8224 }, { "epoch": 1.9576961980127328, "grad_norm": 0.38077468030121714, "learning_rate": 1.1767946655099061e-08, "loss": 0.2666, "step": 8225 }, { "epoch": 1.9579341940857975, "grad_norm": 0.368563482866289, "learning_rate": 1.1636145117306374e-08, "loss": 0.3406, "step": 8226 }, { "epoch": 1.9581721901588622, "grad_norm": 0.42477667717805123, "learning_rate": 1.1505084973789105e-08, "loss": 0.3935, "step": 8227 }, { "epoch": 1.9584101862319272, "grad_norm": 0.3438577540547306, "learning_rate": 1.1374766244025003e-08, "loss": 0.2929, "step": 8228 }, { "epoch": 1.958648182304992, "grad_norm": 0.3782776548245008, "learning_rate": 1.1245188947384133e-08, "loss": 0.2818, "step": 8229 }, { "epoch": 1.958886178378057, "grad_norm": 0.36863077536612854, "learning_rate": 1.1116353103123312e-08, "loss": 0.3458, "step": 8230 }, { "epoch": 1.9591241744511216, "grad_norm": 0.3659543529085472, "learning_rate": 1.0988258730391665e-08, "loss": 0.3483, "step": 8231 }, { "epoch": 1.9593621705241864, "grad_norm": 0.38260543409474423, "learning_rate": 1.0860905848227298e-08, "loss": 0.2871, "step": 8232 }, { "epoch": 1.9596001665972511, "grad_norm": 0.41483183809961477, "learning_rate": 1.0734294475557294e-08, "loss": 0.3166, "step": 8233 }, { "epoch": 1.9598381626703159, "grad_norm": 0.39222204343126627, "learning_rate": 1.0608424631199376e-08, "loss": 0.3949, "step": 8234 }, { "epoch": 1.9600761587433806, "grad_norm": 0.3711141699280681, "learning_rate": 1.0483296333861914e-08, "loss": 0.3006, "step": 8235 }, { "epoch": 1.9603141548164456, "grad_norm": 0.3763503542360473, "learning_rate": 1.0358909602140588e-08, "loss": 0.3047, "step": 8236 }, { "epoch": 1.9605521508895103, "grad_norm": 0.3969121772442897, "learning_rate": 1.0235264454523385e-08, "loss": 0.3416, "step": 8237 }, { "epoch": 1.9607901469625753, "grad_norm": 0.3816122834946716, "learning_rate": 1.011236090938672e-08, "loss": 0.3718, "step": 8238 }, { "epoch": 1.96102814303564, "grad_norm": 0.38396544116333275, "learning_rate": 9.99019898499709e-09, "loss": 0.2717, "step": 8239 }, { "epoch": 1.9612661391087047, "grad_norm": 0.44666178093194486, "learning_rate": 9.868778699511083e-09, "loss": 0.3074, "step": 8240 }, { "epoch": 1.9615041351817695, "grad_norm": 0.4143851166339734, "learning_rate": 9.748100070974265e-09, "loss": 0.3759, "step": 8241 }, { "epoch": 1.9617421312548342, "grad_norm": 0.3921730498280631, "learning_rate": 9.628163117322286e-09, "loss": 0.3331, "step": 8242 }, { "epoch": 1.961980127327899, "grad_norm": 0.3726163829999817, "learning_rate": 9.508967856381445e-09, "loss": 0.2988, "step": 8243 }, { "epoch": 1.962218123400964, "grad_norm": 0.39354313502014604, "learning_rate": 9.390514305867015e-09, "loss": 0.3275, "step": 8244 }, { "epoch": 1.9624561194740286, "grad_norm": 0.37253432454149743, "learning_rate": 9.272802483383248e-09, "loss": 0.3857, "step": 8245 }, { "epoch": 1.9626941155470936, "grad_norm": 0.3981252148153215, "learning_rate": 9.155832406426147e-09, "loss": 0.265, "step": 8246 }, { "epoch": 1.9629321116201583, "grad_norm": 0.39768413263890057, "learning_rate": 9.039604092379583e-09, "loss": 0.2868, "step": 8247 }, { "epoch": 1.963170107693223, "grad_norm": 0.4075926684645318, "learning_rate": 8.92411755851863e-09, "loss": 0.3498, "step": 8248 }, { "epoch": 1.9634081037662878, "grad_norm": 0.3715207251676553, "learning_rate": 8.809372822006779e-09, "loss": 0.3479, "step": 8249 }, { "epoch": 1.9636460998393526, "grad_norm": 0.3869938419383704, "learning_rate": 8.69536989989872e-09, "loss": 0.2767, "step": 8250 }, { "epoch": 1.9638840959124173, "grad_norm": 0.37358902937459904, "learning_rate": 8.582108809137013e-09, "loss": 0.3084, "step": 8251 }, { "epoch": 1.9641220919854823, "grad_norm": 0.4092803779218533, "learning_rate": 8.469589566555968e-09, "loss": 0.3679, "step": 8252 }, { "epoch": 1.964360088058547, "grad_norm": 0.36047460363307426, "learning_rate": 8.357812188878323e-09, "loss": 0.2806, "step": 8253 }, { "epoch": 1.964598084131612, "grad_norm": 0.3678747427954382, "learning_rate": 8.246776692716896e-09, "loss": 0.2568, "step": 8254 }, { "epoch": 1.9648360802046767, "grad_norm": 0.40832629901247686, "learning_rate": 8.1364830945746e-09, "loss": 0.333, "step": 8255 }, { "epoch": 1.9650740762777414, "grad_norm": 0.37115745740832373, "learning_rate": 8.026931410843874e-09, "loss": 0.3326, "step": 8256 }, { "epoch": 1.9653120723508062, "grad_norm": 0.37423048974217404, "learning_rate": 7.918121657806699e-09, "loss": 0.2807, "step": 8257 }, { "epoch": 1.965550068423871, "grad_norm": 0.3864398760686447, "learning_rate": 7.81005385163458e-09, "loss": 0.3276, "step": 8258 }, { "epoch": 1.9657880644969357, "grad_norm": 0.388715422162384, "learning_rate": 7.702728008389116e-09, "loss": 0.3942, "step": 8259 }, { "epoch": 1.9660260605700006, "grad_norm": 0.3429967987431223, "learning_rate": 7.596144144021988e-09, "loss": 0.2696, "step": 8260 }, { "epoch": 1.9662640566430654, "grad_norm": 0.3884832865704859, "learning_rate": 7.490302274373862e-09, "loss": 0.2719, "step": 8261 }, { "epoch": 1.9665020527161303, "grad_norm": 0.3804734222913664, "learning_rate": 7.385202415175485e-09, "loss": 0.3317, "step": 8262 }, { "epoch": 1.966740048789195, "grad_norm": 0.36816873312551346, "learning_rate": 7.280844582047142e-09, "loss": 0.3705, "step": 8263 }, { "epoch": 1.9669780448622598, "grad_norm": 0.36983723483013164, "learning_rate": 7.1772287904997575e-09, "loss": 0.3021, "step": 8264 }, { "epoch": 1.9672160409353245, "grad_norm": 0.38603048071401885, "learning_rate": 7.07435505593268e-09, "loss": 0.2876, "step": 8265 }, { "epoch": 1.9674540370083893, "grad_norm": 0.4003003944120439, "learning_rate": 6.972223393634792e-09, "loss": 0.3849, "step": 8266 }, { "epoch": 1.967692033081454, "grad_norm": 0.3602611770065517, "learning_rate": 6.870833818786727e-09, "loss": 0.2768, "step": 8267 }, { "epoch": 1.967930029154519, "grad_norm": 0.3884374384546158, "learning_rate": 6.770186346456431e-09, "loss": 0.2914, "step": 8268 }, { "epoch": 1.9681680252275837, "grad_norm": 0.3744232480095091, "learning_rate": 6.670280991603606e-09, "loss": 0.3281, "step": 8269 }, { "epoch": 1.9684060213006487, "grad_norm": 0.42375613579156163, "learning_rate": 6.571117769075264e-09, "loss": 0.3981, "step": 8270 }, { "epoch": 1.9686440173737134, "grad_norm": 0.35315666764990467, "learning_rate": 6.472696693610725e-09, "loss": 0.2874, "step": 8271 }, { "epoch": 1.9688820134467782, "grad_norm": 0.3452396601565405, "learning_rate": 6.375017779837178e-09, "loss": 0.2947, "step": 8272 }, { "epoch": 1.969120009519843, "grad_norm": 0.3557979924222308, "learning_rate": 6.278081042272455e-09, "loss": 0.3548, "step": 8273 }, { "epoch": 1.9693580055929076, "grad_norm": 0.3864839080975044, "learning_rate": 6.181886495323364e-09, "loss": 0.3309, "step": 8274 }, { "epoch": 1.9695960016659724, "grad_norm": 0.4527567996253969, "learning_rate": 6.086434153287357e-09, "loss": 0.2887, "step": 8275 }, { "epoch": 1.9698339977390373, "grad_norm": 0.4701596918796658, "learning_rate": 5.991724030350865e-09, "loss": 0.2977, "step": 8276 }, { "epoch": 1.970071993812102, "grad_norm": 0.40077106587480205, "learning_rate": 5.8977561405898496e-09, "loss": 0.37, "step": 8277 }, { "epoch": 1.970309989885167, "grad_norm": 0.36798667561511195, "learning_rate": 5.804530497970362e-09, "loss": 0.3101, "step": 8278 }, { "epoch": 1.9705479859582318, "grad_norm": 0.4143172864878241, "learning_rate": 5.712047116347985e-09, "loss": 0.2745, "step": 8279 }, { "epoch": 1.9707859820312965, "grad_norm": 0.3852482915444085, "learning_rate": 5.620306009467835e-09, "loss": 0.3629, "step": 8280 }, { "epoch": 1.9710239781043613, "grad_norm": 0.37496502608000093, "learning_rate": 5.529307190965671e-09, "loss": 0.344, "step": 8281 }, { "epoch": 1.971261974177426, "grad_norm": 0.3832082730157154, "learning_rate": 5.439050674365676e-09, "loss": 0.2708, "step": 8282 }, { "epoch": 1.9714999702504907, "grad_norm": 0.40313323861297806, "learning_rate": 5.349536473082118e-09, "loss": 0.3132, "step": 8283 }, { "epoch": 1.9717379663235557, "grad_norm": 0.3707010760106356, "learning_rate": 5.260764600419354e-09, "loss": 0.4018, "step": 8284 }, { "epoch": 1.9719759623966204, "grad_norm": 0.43019076768807374, "learning_rate": 5.172735069570722e-09, "loss": 0.3143, "step": 8285 }, { "epoch": 1.9722139584696854, "grad_norm": 0.46022861405119286, "learning_rate": 5.0854478936190884e-09, "loss": 0.2626, "step": 8286 }, { "epoch": 1.9724519545427501, "grad_norm": 0.3912662063723855, "learning_rate": 4.998903085539075e-09, "loss": 0.319, "step": 8287 }, { "epoch": 1.9726899506158149, "grad_norm": 0.3793180333217797, "learning_rate": 4.913100658192061e-09, "loss": 0.3523, "step": 8288 }, { "epoch": 1.9729279466888796, "grad_norm": 0.3865661630275148, "learning_rate": 4.828040624330621e-09, "loss": 0.2683, "step": 8289 }, { "epoch": 1.9731659427619443, "grad_norm": 0.37603160286322296, "learning_rate": 4.743722996597422e-09, "loss": 0.3024, "step": 8290 }, { "epoch": 1.973403938835009, "grad_norm": 0.3751677087026661, "learning_rate": 4.6601477875235505e-09, "loss": 0.34, "step": 8291 }, { "epoch": 1.973641934908074, "grad_norm": 0.376225703087134, "learning_rate": 4.577315009530181e-09, "loss": 0.3369, "step": 8292 }, { "epoch": 1.9738799309811388, "grad_norm": 0.3927228549545841, "learning_rate": 4.495224674928578e-09, "loss": 0.2813, "step": 8293 }, { "epoch": 1.9741179270542037, "grad_norm": 0.3827330662097907, "learning_rate": 4.413876795919536e-09, "loss": 0.3303, "step": 8294 }, { "epoch": 1.9743559231272685, "grad_norm": 0.3862347082915683, "learning_rate": 4.333271384593385e-09, "loss": 0.3676, "step": 8295 }, { "epoch": 1.9745939192003332, "grad_norm": 0.40932286583844174, "learning_rate": 4.253408452929986e-09, "loss": 0.27, "step": 8296 }, { "epoch": 1.974831915273398, "grad_norm": 0.4000770232069227, "learning_rate": 4.174288012798733e-09, "loss": 0.3247, "step": 8297 }, { "epoch": 1.9750699113464627, "grad_norm": 0.36945290953037635, "learning_rate": 4.095910075959108e-09, "loss": 0.3289, "step": 8298 }, { "epoch": 1.9753079074195274, "grad_norm": 0.4060885406421163, "learning_rate": 4.018274654059573e-09, "loss": 0.3047, "step": 8299 }, { "epoch": 1.9755459034925924, "grad_norm": 0.4119247754609785, "learning_rate": 3.941381758639784e-09, "loss": 0.2876, "step": 8300 }, { "epoch": 1.9757838995656571, "grad_norm": 0.39855318475174173, "learning_rate": 3.865231401126712e-09, "loss": 0.3215, "step": 8301 }, { "epoch": 1.976021895638722, "grad_norm": 0.3834493602365561, "learning_rate": 3.789823592838526e-09, "loss": 0.381, "step": 8302 }, { "epoch": 1.9762598917117868, "grad_norm": 0.39370603464488824, "learning_rate": 3.7151583449834826e-09, "loss": 0.2854, "step": 8303 }, { "epoch": 1.9764978877848516, "grad_norm": 0.38092804030560223, "learning_rate": 3.6412356686577056e-09, "loss": 0.2866, "step": 8304 }, { "epoch": 1.9767358838579163, "grad_norm": 0.359154440466524, "learning_rate": 3.5680555748479617e-09, "loss": 0.3367, "step": 8305 }, { "epoch": 1.976973879930981, "grad_norm": 0.3866822121685151, "learning_rate": 3.4956180744311063e-09, "loss": 0.3345, "step": 8306 }, { "epoch": 1.9772118760040458, "grad_norm": 0.3877651597061357, "learning_rate": 3.423923178172972e-09, "loss": 0.2589, "step": 8307 }, { "epoch": 1.9774498720771108, "grad_norm": 0.3888359989652597, "learning_rate": 3.3529708967294794e-09, "loss": 0.3375, "step": 8308 }, { "epoch": 1.9776878681501755, "grad_norm": 0.38304911568033706, "learning_rate": 3.282761240645527e-09, "loss": 0.3554, "step": 8309 }, { "epoch": 1.9779258642232405, "grad_norm": 0.3470736673254614, "learning_rate": 3.213294220355545e-09, "loss": 0.3175, "step": 8310 }, { "epoch": 1.9781638602963052, "grad_norm": 0.377926105048321, "learning_rate": 3.1445698461851638e-09, "loss": 0.2487, "step": 8311 }, { "epoch": 1.97840185636937, "grad_norm": 0.3806616330829527, "learning_rate": 3.0765881283478794e-09, "loss": 0.3329, "step": 8312 }, { "epoch": 1.9786398524424347, "grad_norm": 0.3620860977402568, "learning_rate": 3.0093490769472765e-09, "loss": 0.3399, "step": 8313 }, { "epoch": 1.9788778485154994, "grad_norm": 0.38013912104937325, "learning_rate": 2.942852701977028e-09, "loss": 0.2705, "step": 8314 }, { "epoch": 1.9791158445885642, "grad_norm": 0.3818113771924823, "learning_rate": 2.8770990133203392e-09, "loss": 0.3248, "step": 8315 }, { "epoch": 1.9793538406616291, "grad_norm": 0.4113225030445951, "learning_rate": 2.8120880207493928e-09, "loss": 0.3487, "step": 8316 }, { "epoch": 1.9795918367346939, "grad_norm": 0.3703805736030292, "learning_rate": 2.747819733927015e-09, "loss": 0.3121, "step": 8317 }, { "epoch": 1.9798298328077588, "grad_norm": 0.368702788372816, "learning_rate": 2.6842941624044548e-09, "loss": 0.2654, "step": 8318 }, { "epoch": 1.9800678288808236, "grad_norm": 0.39480635521831586, "learning_rate": 2.6215113156230487e-09, "loss": 0.3279, "step": 8319 }, { "epoch": 1.9803058249538883, "grad_norm": 0.35852264603642603, "learning_rate": 2.559471202914776e-09, "loss": 0.393, "step": 8320 }, { "epoch": 1.980543821026953, "grad_norm": 0.3594864841251863, "learning_rate": 2.498173833499484e-09, "loss": 0.2659, "step": 8321 }, { "epoch": 1.9807818171000178, "grad_norm": 0.40546718269932125, "learning_rate": 2.4376192164882183e-09, "loss": 0.3409, "step": 8322 }, { "epoch": 1.9810198131730825, "grad_norm": 0.5532056344379126, "learning_rate": 2.3778073608798914e-09, "loss": 0.358, "step": 8323 }, { "epoch": 1.9812578092461475, "grad_norm": 0.3546557183922418, "learning_rate": 2.3187382755651687e-09, "loss": 0.3298, "step": 8324 }, { "epoch": 1.9814958053192122, "grad_norm": 0.37203732480489365, "learning_rate": 2.2604119693220295e-09, "loss": 0.2895, "step": 8325 }, { "epoch": 1.9817338013922772, "grad_norm": 0.4172958208934213, "learning_rate": 2.202828450820205e-09, "loss": 0.3164, "step": 8326 }, { "epoch": 1.981971797465342, "grad_norm": 0.3625690030083054, "learning_rate": 2.1459877286172935e-09, "loss": 0.3785, "step": 8327 }, { "epoch": 1.9822097935384067, "grad_norm": 0.3603665358371541, "learning_rate": 2.0898898111620935e-09, "loss": 0.2816, "step": 8328 }, { "epoch": 1.9824477896114714, "grad_norm": 0.3780300709578647, "learning_rate": 2.034534706791269e-09, "loss": 0.2825, "step": 8329 }, { "epoch": 1.9826857856845361, "grad_norm": 0.37028301830691185, "learning_rate": 1.979922423732128e-09, "loss": 0.3403, "step": 8330 }, { "epoch": 1.9829237817576009, "grad_norm": 0.3638771091784381, "learning_rate": 1.9260529701015105e-09, "loss": 0.3491, "step": 8331 }, { "epoch": 1.9831617778306658, "grad_norm": 0.41702450685086956, "learning_rate": 1.8729263539063457e-09, "loss": 0.2864, "step": 8332 }, { "epoch": 1.9833997739037306, "grad_norm": 0.37868852344723125, "learning_rate": 1.820542583041429e-09, "loss": 0.3317, "step": 8333 }, { "epoch": 1.9836377699767955, "grad_norm": 0.3666826086769097, "learning_rate": 1.76890166529331e-09, "loss": 0.3906, "step": 8334 }, { "epoch": 1.9838757660498603, "grad_norm": 0.35122611004045284, "learning_rate": 1.718003608336405e-09, "loss": 0.2839, "step": 8335 }, { "epoch": 1.984113762122925, "grad_norm": 0.4202455413793847, "learning_rate": 1.6678484197357737e-09, "loss": 0.262, "step": 8336 }, { "epoch": 1.9843517581959897, "grad_norm": 0.37863495269066555, "learning_rate": 1.6184361069460085e-09, "loss": 0.3527, "step": 8337 }, { "epoch": 1.9845897542690545, "grad_norm": 0.35319699077317435, "learning_rate": 1.569766677310125e-09, "loss": 0.3451, "step": 8338 }, { "epoch": 1.9848277503421192, "grad_norm": 0.34742580202122714, "learning_rate": 1.521840138062336e-09, "loss": 0.2781, "step": 8339 }, { "epoch": 1.9850657464151842, "grad_norm": 0.38670204572295525, "learning_rate": 1.4746564963258325e-09, "loss": 0.2925, "step": 8340 }, { "epoch": 1.985303742488249, "grad_norm": 0.3666529795353334, "learning_rate": 1.4282157591122282e-09, "loss": 0.3708, "step": 8341 }, { "epoch": 1.9855417385613139, "grad_norm": 0.3696854659046601, "learning_rate": 1.3825179333248895e-09, "loss": 0.2828, "step": 8342 }, { "epoch": 1.9857797346343786, "grad_norm": 0.406404518542554, "learning_rate": 1.3375630257550509e-09, "loss": 0.3009, "step": 8343 }, { "epoch": 1.9860177307074434, "grad_norm": 0.385912160739807, "learning_rate": 1.2933510430845898e-09, "loss": 0.3693, "step": 8344 }, { "epoch": 1.986255726780508, "grad_norm": 0.3925971908766997, "learning_rate": 1.2498819918843609e-09, "loss": 0.4039, "step": 8345 }, { "epoch": 1.9864937228535728, "grad_norm": 0.3652691608712696, "learning_rate": 1.2071558786141969e-09, "loss": 0.273, "step": 8346 }, { "epoch": 1.9867317189266376, "grad_norm": 0.38597466303071754, "learning_rate": 1.1651727096251287e-09, "loss": 0.3049, "step": 8347 }, { "epoch": 1.9869697149997025, "grad_norm": 0.3810587229670749, "learning_rate": 1.1239324911566096e-09, "loss": 0.3677, "step": 8348 }, { "epoch": 1.9872077110727673, "grad_norm": 0.40007821179401476, "learning_rate": 1.083435229338181e-09, "loss": 0.3497, "step": 8349 }, { "epoch": 1.9874457071458322, "grad_norm": 0.40580202241849406, "learning_rate": 1.043680930187807e-09, "loss": 0.2742, "step": 8350 }, { "epoch": 1.987683703218897, "grad_norm": 0.37923487971766323, "learning_rate": 1.0046695996152046e-09, "loss": 0.3465, "step": 8351 }, { "epoch": 1.9879216992919617, "grad_norm": 0.37215554302402476, "learning_rate": 9.66401243417958e-10, "loss": 0.3798, "step": 8352 }, { "epoch": 1.9881596953650265, "grad_norm": 0.39321133558205074, "learning_rate": 9.288758672837406e-10, "loss": 0.3067, "step": 8353 }, { "epoch": 1.9883976914380912, "grad_norm": 0.3748563987454188, "learning_rate": 8.92093476789202e-10, "loss": 0.2742, "step": 8354 }, { "epoch": 1.988635687511156, "grad_norm": 0.3891255633326672, "learning_rate": 8.560540774016357e-10, "loss": 0.3438, "step": 8355 }, { "epoch": 1.988873683584221, "grad_norm": 0.3646078625631843, "learning_rate": 8.207576744773127e-10, "loss": 0.3308, "step": 8356 }, { "epoch": 1.9891116796572856, "grad_norm": 0.36793355963820623, "learning_rate": 7.862042732620367e-10, "loss": 0.2491, "step": 8357 }, { "epoch": 1.9893496757303506, "grad_norm": 0.3672753281223139, "learning_rate": 7.523938788916996e-10, "loss": 0.3369, "step": 8358 }, { "epoch": 1.9895876718034153, "grad_norm": 0.39245638391380494, "learning_rate": 7.193264963911706e-10, "loss": 0.4073, "step": 8359 }, { "epoch": 1.98982566787648, "grad_norm": 0.3865165837782479, "learning_rate": 6.870021306742968e-10, "loss": 0.2962, "step": 8360 }, { "epoch": 1.9900636639495448, "grad_norm": 0.41219133327793983, "learning_rate": 6.554207865466788e-10, "loss": 0.302, "step": 8361 }, { "epoch": 1.9903016600226096, "grad_norm": 0.4223513362715057, "learning_rate": 6.245824687006741e-10, "loss": 0.3742, "step": 8362 }, { "epoch": 1.9905396560956743, "grad_norm": 0.3839303730562907, "learning_rate": 5.944871817209486e-10, "loss": 0.383, "step": 8363 }, { "epoch": 1.9907776521687393, "grad_norm": 0.3926286842555442, "learning_rate": 5.651349300794806e-10, "loss": 0.292, "step": 8364 }, { "epoch": 1.991015648241804, "grad_norm": 0.397668403394273, "learning_rate": 5.365257181388917e-10, "loss": 0.2986, "step": 8365 }, { "epoch": 1.991253644314869, "grad_norm": 0.391019416533082, "learning_rate": 5.086595501513358e-10, "loss": 0.3498, "step": 8366 }, { "epoch": 1.9914916403879337, "grad_norm": 0.42914431554738636, "learning_rate": 4.815364302590553e-10, "loss": 0.339, "step": 8367 }, { "epoch": 1.9917296364609984, "grad_norm": 0.37889014824362105, "learning_rate": 4.5515636249160446e-10, "loss": 0.2723, "step": 8368 }, { "epoch": 1.9919676325340632, "grad_norm": 0.38046794389103084, "learning_rate": 4.2951935077140127e-10, "loss": 0.3101, "step": 8369 }, { "epoch": 1.992205628607128, "grad_norm": 0.40863630016035024, "learning_rate": 4.04625398907621e-10, "loss": 0.3595, "step": 8370 }, { "epoch": 1.9924436246801926, "grad_norm": 0.34145231287271066, "learning_rate": 3.80474510601192e-10, "loss": 0.3063, "step": 8371 }, { "epoch": 1.9926816207532576, "grad_norm": 0.37706309665477467, "learning_rate": 3.5706668944035517e-10, "loss": 0.3212, "step": 8372 }, { "epoch": 1.9929196168263223, "grad_norm": 0.4166841421991216, "learning_rate": 3.344019389045494e-10, "loss": 0.3757, "step": 8373 }, { "epoch": 1.9931576128993873, "grad_norm": 0.36453047466675503, "learning_rate": 3.124802623627465e-10, "loss": 0.318, "step": 8374 }, { "epoch": 1.993395608972452, "grad_norm": 0.40191372694668726, "learning_rate": 2.913016630723409e-10, "loss": 0.2785, "step": 8375 }, { "epoch": 1.9936336050455168, "grad_norm": 0.38151201580835575, "learning_rate": 2.708661441813698e-10, "loss": 0.3213, "step": 8376 }, { "epoch": 1.9938716011185815, "grad_norm": 0.38965306740860267, "learning_rate": 2.5117370872684843e-10, "loss": 0.3845, "step": 8377 }, { "epoch": 1.9941095971916463, "grad_norm": 0.3625652747058628, "learning_rate": 2.3222435963643485e-10, "loss": 0.298, "step": 8378 }, { "epoch": 1.994347593264711, "grad_norm": 0.3795900323777344, "learning_rate": 2.1401809972509957e-10, "loss": 0.2865, "step": 8379 }, { "epoch": 1.994585589337776, "grad_norm": 0.3930027956116569, "learning_rate": 1.965549316995663e-10, "loss": 0.3304, "step": 8380 }, { "epoch": 1.9948235854108407, "grad_norm": 0.37631234555304693, "learning_rate": 1.7983485815553646e-10, "loss": 0.3261, "step": 8381 }, { "epoch": 1.9950615814839057, "grad_norm": 0.4000269616966013, "learning_rate": 1.6385788157713413e-10, "loss": 0.2903, "step": 8382 }, { "epoch": 1.9952995775569704, "grad_norm": 0.3941543276970109, "learning_rate": 1.486240043396814e-10, "loss": 0.3109, "step": 8383 }, { "epoch": 1.9955375736300351, "grad_norm": 0.36557779503672044, "learning_rate": 1.3413322870692304e-10, "loss": 0.3941, "step": 8384 }, { "epoch": 1.9957755697030999, "grad_norm": 0.3717543720800664, "learning_rate": 1.203855568326917e-10, "loss": 0.3063, "step": 8385 }, { "epoch": 1.9960135657761646, "grad_norm": 0.37253382680661573, "learning_rate": 1.0738099076035291e-10, "loss": 0.3121, "step": 8386 }, { "epoch": 1.9962515618492294, "grad_norm": 0.3692513186862969, "learning_rate": 9.511953242280492e-11, "loss": 0.3249, "step": 8387 }, { "epoch": 1.9964895579222943, "grad_norm": 0.42548108491286396, "learning_rate": 8.360118364192371e-11, "loss": 0.3672, "step": 8388 }, { "epoch": 1.996727553995359, "grad_norm": 0.381706175323407, "learning_rate": 7.28259461296732e-11, "loss": 0.2803, "step": 8389 }, { "epoch": 1.996965550068424, "grad_norm": 0.3901616736475803, "learning_rate": 6.27938214881052e-11, "loss": 0.3152, "step": 8390 }, { "epoch": 1.9972035461414888, "grad_norm": 0.36504854093795047, "learning_rate": 5.350481120769413e-11, "loss": 0.3544, "step": 8391 }, { "epoch": 1.9974415422145535, "grad_norm": 0.37140042079834173, "learning_rate": 4.4958916669002315e-11, "loss": 0.3046, "step": 8392 }, { "epoch": 1.9976795382876182, "grad_norm": 0.42614737328351837, "learning_rate": 3.7156139142680014e-11, "loss": 0.2803, "step": 8393 }, { "epoch": 1.997917534360683, "grad_norm": 0.4103735541875691, "learning_rate": 3.009647978780006e-11, "loss": 0.3432, "step": 8394 }, { "epoch": 1.9981555304337477, "grad_norm": 0.4593503351998582, "learning_rate": 2.377993965407832e-11, "loss": 0.3484, "step": 8395 }, { "epoch": 1.9983935265068127, "grad_norm": 0.38126825431046657, "learning_rate": 1.8206519680208368e-11, "loss": 0.2837, "step": 8396 }, { "epoch": 1.9986315225798774, "grad_norm": 0.3896583084295616, "learning_rate": 1.3376220694416575e-11, "loss": 0.2981, "step": 8397 }, { "epoch": 1.9988695186529424, "grad_norm": 0.3774261304077494, "learning_rate": 9.289043414462128e-12, "loss": 0.3539, "step": 8398 }, { "epoch": 1.9991075147260071, "grad_norm": 0.3547871077621748, "learning_rate": 5.944988447637024e-12, "loss": 0.3137, "step": 8399 }, { "epoch": 1.9993455107990719, "grad_norm": 0.37445511560704847, "learning_rate": 3.3440562918762855e-12, "loss": 0.2957, "step": 8400 }, { "epoch": 1.9995835068721366, "grad_norm": 0.37305187911281695, "learning_rate": 1.4862473329824156e-12, "loss": 0.3171, "step": 8401 }, { "epoch": 1.9998215029452013, "grad_norm": 0.3725046051286145, "learning_rate": 3.7156184684583597e-13, "loss": 0.3945, "step": 8402 }, { "epoch": 1.9998215029452013, "step": 8402, "total_flos": 1.2470400040881357e+17, "train_loss": 0.3506650565916145, "train_runtime": 79311.4461, "train_samples_per_second": 108.495, "train_steps_per_second": 0.106 } ], "logging_steps": 1.0, "max_steps": 8402, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.2470400040881357e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }