|
{ |
|
"best_metric": 3.0925960540771484, |
|
"best_model_checkpoint": "matching-buyback/checkpoint-2648", |
|
"epoch": 4.0, |
|
"eval_steps": 500, |
|
"global_step": 2648, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0377643504531722, |
|
"grad_norm": 2.148848295211792, |
|
"learning_rate": 4.716981132075472e-06, |
|
"loss": 7.1736, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0755287009063444, |
|
"grad_norm": 1.9532533884048462, |
|
"learning_rate": 9.433962264150944e-06, |
|
"loss": 7.1696, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11329305135951662, |
|
"grad_norm": 1.8605287075042725, |
|
"learning_rate": 1.4150943396226415e-05, |
|
"loss": 7.1706, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.1510574018126888, |
|
"grad_norm": 1.7690376043319702, |
|
"learning_rate": 1.8867924528301888e-05, |
|
"loss": 7.168, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.18882175226586104, |
|
"grad_norm": 1.6989686489105225, |
|
"learning_rate": 2.358490566037736e-05, |
|
"loss": 7.1623, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.22658610271903323, |
|
"grad_norm": 1.9419589042663574, |
|
"learning_rate": 2.830188679245283e-05, |
|
"loss": 7.1582, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.26435045317220546, |
|
"grad_norm": 6.193149089813232, |
|
"learning_rate": 3.30188679245283e-05, |
|
"loss": 7.142, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.3021148036253776, |
|
"grad_norm": 3.8507258892059326, |
|
"learning_rate": 3.7735849056603776e-05, |
|
"loss": 7.1296, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.33987915407854985, |
|
"grad_norm": 3.860013246536255, |
|
"learning_rate": 4.245283018867925e-05, |
|
"loss": 7.129, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.3776435045317221, |
|
"grad_norm": 3.5848217010498047, |
|
"learning_rate": 4.716981132075472e-05, |
|
"loss": 7.104, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.41540785498489424, |
|
"grad_norm": 3.5513248443603516, |
|
"learning_rate": 4.979018044481746e-05, |
|
"loss": 7.0868, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.45317220543806647, |
|
"grad_norm": 3.685840368270874, |
|
"learning_rate": 4.9265631556861106e-05, |
|
"loss": 6.9957, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.4909365558912387, |
|
"grad_norm": 3.6940855979919434, |
|
"learning_rate": 4.874108266890474e-05, |
|
"loss": 6.9211, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.5287009063444109, |
|
"grad_norm": 4.479358673095703, |
|
"learning_rate": 4.8216533780948384e-05, |
|
"loss": 6.8176, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.5664652567975831, |
|
"grad_norm": 4.331283092498779, |
|
"learning_rate": 4.769198489299203e-05, |
|
"loss": 6.7406, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.6042296072507553, |
|
"grad_norm": 4.426012992858887, |
|
"learning_rate": 4.716743600503567e-05, |
|
"loss": 6.6516, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.6419939577039275, |
|
"grad_norm": 4.232052326202393, |
|
"learning_rate": 4.664288711707932e-05, |
|
"loss": 6.6214, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.6797583081570997, |
|
"grad_norm": 5.0197601318359375, |
|
"learning_rate": 4.6118338229122954e-05, |
|
"loss": 6.4544, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.7175226586102719, |
|
"grad_norm": 5.322621822357178, |
|
"learning_rate": 4.5593789341166596e-05, |
|
"loss": 6.4331, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.7552870090634441, |
|
"grad_norm": 5.3774871826171875, |
|
"learning_rate": 4.5069240453210245e-05, |
|
"loss": 6.3182, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.7930513595166163, |
|
"grad_norm": 5.22687292098999, |
|
"learning_rate": 4.454469156525388e-05, |
|
"loss": 6.2782, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.8308157099697885, |
|
"grad_norm": 4.664173603057861, |
|
"learning_rate": 4.402014267729753e-05, |
|
"loss": 6.2099, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.8685800604229608, |
|
"grad_norm": 5.771005153656006, |
|
"learning_rate": 4.349559378934117e-05, |
|
"loss": 6.1739, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.9063444108761329, |
|
"grad_norm": 5.225096225738525, |
|
"learning_rate": 4.297104490138481e-05, |
|
"loss": 6.1653, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.9441087613293051, |
|
"grad_norm": 5.635812282562256, |
|
"learning_rate": 4.244649601342846e-05, |
|
"loss": 6.0278, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.9818731117824774, |
|
"grad_norm": 5.488772392272949, |
|
"learning_rate": 4.192194712547209e-05, |
|
"loss": 6.0308, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.026465028355387523, |
|
"eval_f1_macro": 0.0018394084225980786, |
|
"eval_f1_micro": 0.026465028355387523, |
|
"eval_f1_weighted": 0.002693831801885104, |
|
"eval_loss": 5.7428083419799805, |
|
"eval_precision_macro": 0.0011073227324794681, |
|
"eval_precision_micro": 0.026465028355387523, |
|
"eval_precision_weighted": 0.0015115244261337012, |
|
"eval_recall_macro": 0.014671814671814672, |
|
"eval_recall_micro": 0.026465028355387523, |
|
"eval_recall_weighted": 0.026465028355387523, |
|
"eval_runtime": 258.8851, |
|
"eval_samples_per_second": 10.217, |
|
"eval_steps_per_second": 0.321, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 1.0196374622356494, |
|
"grad_norm": 5.648388385772705, |
|
"learning_rate": 4.139739823751574e-05, |
|
"loss": 5.8178, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.0574018126888218, |
|
"grad_norm": 6.245723724365234, |
|
"learning_rate": 4.0872849349559384e-05, |
|
"loss": 5.6123, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.095166163141994, |
|
"grad_norm": 6.041352272033691, |
|
"learning_rate": 4.034830046160302e-05, |
|
"loss": 5.7037, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.1329305135951662, |
|
"grad_norm": 6.065165996551514, |
|
"learning_rate": 3.982375157364667e-05, |
|
"loss": 5.6632, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.1706948640483383, |
|
"grad_norm": 5.901498794555664, |
|
"learning_rate": 3.9299202685690305e-05, |
|
"loss": 5.6154, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.2084592145015105, |
|
"grad_norm": 6.365645885467529, |
|
"learning_rate": 3.8774653797733954e-05, |
|
"loss": 5.4768, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.2462235649546827, |
|
"grad_norm": 6.6829705238342285, |
|
"learning_rate": 3.8250104909777596e-05, |
|
"loss": 5.3382, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.283987915407855, |
|
"grad_norm": 5.9976019859313965, |
|
"learning_rate": 3.772555602182123e-05, |
|
"loss": 5.4, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.3217522658610272, |
|
"grad_norm": 6.049790859222412, |
|
"learning_rate": 3.720100713386488e-05, |
|
"loss": 5.3734, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.3595166163141994, |
|
"grad_norm": 7.126038074493408, |
|
"learning_rate": 3.667645824590852e-05, |
|
"loss": 5.2598, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.3972809667673716, |
|
"grad_norm": 6.60282564163208, |
|
"learning_rate": 3.6151909357952166e-05, |
|
"loss": 5.2374, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 1.4350453172205437, |
|
"grad_norm": 6.773179054260254, |
|
"learning_rate": 3.562736046999581e-05, |
|
"loss": 5.172, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.4728096676737161, |
|
"grad_norm": 6.833974361419678, |
|
"learning_rate": 3.5102811582039444e-05, |
|
"loss": 5.1996, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.510574018126888, |
|
"grad_norm": 6.419187068939209, |
|
"learning_rate": 3.457826269408309e-05, |
|
"loss": 5.1801, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.5483383685800605, |
|
"grad_norm": 6.803603649139404, |
|
"learning_rate": 3.405371380612673e-05, |
|
"loss": 4.988, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 1.5861027190332326, |
|
"grad_norm": 6.773290157318115, |
|
"learning_rate": 3.352916491817038e-05, |
|
"loss": 5.0186, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.6238670694864048, |
|
"grad_norm": 6.730032920837402, |
|
"learning_rate": 3.300461603021402e-05, |
|
"loss": 5.0032, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 1.6616314199395772, |
|
"grad_norm": 7.288454532623291, |
|
"learning_rate": 3.2480067142257656e-05, |
|
"loss": 4.908, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.6993957703927491, |
|
"grad_norm": 6.888255596160889, |
|
"learning_rate": 3.1955518254301305e-05, |
|
"loss": 4.937, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 1.7371601208459215, |
|
"grad_norm": 6.915685176849365, |
|
"learning_rate": 3.143096936634494e-05, |
|
"loss": 4.9246, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.7749244712990937, |
|
"grad_norm": 6.667412757873535, |
|
"learning_rate": 3.090642047838859e-05, |
|
"loss": 4.8717, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 1.8126888217522659, |
|
"grad_norm": 6.816399097442627, |
|
"learning_rate": 3.0381871590432233e-05, |
|
"loss": 4.9049, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.850453172205438, |
|
"grad_norm": 7.150826930999756, |
|
"learning_rate": 2.985732270247587e-05, |
|
"loss": 4.705, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 1.8882175226586102, |
|
"grad_norm": 7.209892272949219, |
|
"learning_rate": 2.9332773814519514e-05, |
|
"loss": 4.8234, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.9259818731117826, |
|
"grad_norm": 7.008028507232666, |
|
"learning_rate": 2.8808224926563153e-05, |
|
"loss": 4.6716, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 1.9637462235649545, |
|
"grad_norm": 7.195932865142822, |
|
"learning_rate": 2.82836760386068e-05, |
|
"loss": 4.6394, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.21890359168241966, |
|
"eval_f1_macro": 0.07847614280788937, |
|
"eval_f1_micro": 0.21890359168241966, |
|
"eval_f1_weighted": 0.14378778601568412, |
|
"eval_loss": 4.332443714141846, |
|
"eval_precision_macro": 0.07050179882195226, |
|
"eval_precision_micro": 0.21890359168241966, |
|
"eval_precision_weighted": 0.12910697824366127, |
|
"eval_recall_macro": 0.12453024453024451, |
|
"eval_recall_micro": 0.21890359168241966, |
|
"eval_recall_weighted": 0.21890359168241966, |
|
"eval_runtime": 274.1069, |
|
"eval_samples_per_second": 9.65, |
|
"eval_steps_per_second": 0.303, |
|
"step": 1324 |
|
}, |
|
{ |
|
"epoch": 2.001510574018127, |
|
"grad_norm": 7.194442272186279, |
|
"learning_rate": 2.7759127150650445e-05, |
|
"loss": 4.5155, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 2.039274924471299, |
|
"grad_norm": 7.649657726287842, |
|
"learning_rate": 2.7234578262694084e-05, |
|
"loss": 4.3742, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.0770392749244713, |
|
"grad_norm": 6.8479084968566895, |
|
"learning_rate": 2.6710029374737726e-05, |
|
"loss": 4.4017, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 2.1148036253776437, |
|
"grad_norm": 7.72012186050415, |
|
"learning_rate": 2.6185480486781372e-05, |
|
"loss": 4.2114, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.1525679758308156, |
|
"grad_norm": 7.429843425750732, |
|
"learning_rate": 2.566093159882501e-05, |
|
"loss": 4.3608, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 2.190332326283988, |
|
"grad_norm": 7.590367794036865, |
|
"learning_rate": 2.5136382710868657e-05, |
|
"loss": 4.2724, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 2.22809667673716, |
|
"grad_norm": 7.643247127532959, |
|
"learning_rate": 2.46118338229123e-05, |
|
"loss": 4.149, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 2.2658610271903323, |
|
"grad_norm": 7.686148643493652, |
|
"learning_rate": 2.4087284934955938e-05, |
|
"loss": 4.1981, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.3036253776435047, |
|
"grad_norm": 7.800241470336914, |
|
"learning_rate": 2.356273604699958e-05, |
|
"loss": 4.2175, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 2.3413897280966767, |
|
"grad_norm": 7.715053558349609, |
|
"learning_rate": 2.3038187159043223e-05, |
|
"loss": 4.1977, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 2.379154078549849, |
|
"grad_norm": 7.797581195831299, |
|
"learning_rate": 2.251363827108687e-05, |
|
"loss": 4.1727, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 2.416918429003021, |
|
"grad_norm": 7.720523834228516, |
|
"learning_rate": 2.198908938313051e-05, |
|
"loss": 4.0437, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.4546827794561934, |
|
"grad_norm": 7.72569465637207, |
|
"learning_rate": 2.146454049517415e-05, |
|
"loss": 4.0182, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 2.4924471299093653, |
|
"grad_norm": 7.541537761688232, |
|
"learning_rate": 2.0939991607217792e-05, |
|
"loss": 4.0712, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.5302114803625377, |
|
"grad_norm": 7.888173580169678, |
|
"learning_rate": 2.0415442719261435e-05, |
|
"loss": 3.9628, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 2.56797583081571, |
|
"grad_norm": 7.701706886291504, |
|
"learning_rate": 1.989089383130508e-05, |
|
"loss": 3.9174, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.605740181268882, |
|
"grad_norm": 7.674171447753906, |
|
"learning_rate": 1.9366344943348723e-05, |
|
"loss": 3.8891, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 2.6435045317220545, |
|
"grad_norm": 7.812932014465332, |
|
"learning_rate": 1.8841796055392362e-05, |
|
"loss": 3.9667, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.6812688821752264, |
|
"grad_norm": 8.138640403747559, |
|
"learning_rate": 1.8317247167436005e-05, |
|
"loss": 3.9761, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 2.719033232628399, |
|
"grad_norm": 7.94352912902832, |
|
"learning_rate": 1.7792698279479647e-05, |
|
"loss": 3.8518, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.756797583081571, |
|
"grad_norm": 7.7708539962768555, |
|
"learning_rate": 1.7268149391523293e-05, |
|
"loss": 3.6874, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 2.794561933534743, |
|
"grad_norm": 8.06053638458252, |
|
"learning_rate": 1.6743600503566935e-05, |
|
"loss": 3.8025, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 2.8323262839879155, |
|
"grad_norm": 7.878209590911865, |
|
"learning_rate": 1.6219051615610574e-05, |
|
"loss": 3.7442, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 2.8700906344410875, |
|
"grad_norm": 8.10824203491211, |
|
"learning_rate": 1.5694502727654217e-05, |
|
"loss": 3.8247, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.90785498489426, |
|
"grad_norm": 7.619449138641357, |
|
"learning_rate": 1.5169953839697862e-05, |
|
"loss": 3.6439, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 2.9456193353474323, |
|
"grad_norm": 8.001503944396973, |
|
"learning_rate": 1.4645404951741503e-05, |
|
"loss": 3.6267, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.983383685800604, |
|
"grad_norm": 7.938083648681641, |
|
"learning_rate": 1.4120856063785145e-05, |
|
"loss": 3.6283, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.45255198487712667, |
|
"eval_f1_macro": 0.22372025748723245, |
|
"eval_f1_micro": 0.45255198487712667, |
|
"eval_f1_weighted": 0.36915380230770156, |
|
"eval_loss": 3.410895347595215, |
|
"eval_precision_macro": 0.210819340352679, |
|
"eval_precision_micro": 0.45255198487712667, |
|
"eval_precision_weighted": 0.34998443503067106, |
|
"eval_recall_macro": 0.28555984555984554, |
|
"eval_recall_micro": 0.45255198487712667, |
|
"eval_recall_weighted": 0.45255198487712667, |
|
"eval_runtime": 240.6652, |
|
"eval_samples_per_second": 10.99, |
|
"eval_steps_per_second": 0.345, |
|
"step": 1986 |
|
}, |
|
{ |
|
"epoch": 3.0211480362537766, |
|
"grad_norm": 8.365966796875, |
|
"learning_rate": 1.3596307175828788e-05, |
|
"loss": 3.6007, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.0589123867069485, |
|
"grad_norm": 8.145760536193848, |
|
"learning_rate": 1.3071758287872429e-05, |
|
"loss": 3.5455, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 3.096676737160121, |
|
"grad_norm": 7.752460479736328, |
|
"learning_rate": 1.2547209399916074e-05, |
|
"loss": 3.3978, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 3.134441087613293, |
|
"grad_norm": 8.035235404968262, |
|
"learning_rate": 1.2022660511959715e-05, |
|
"loss": 3.5076, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 3.1722054380664653, |
|
"grad_norm": 8.423967361450195, |
|
"learning_rate": 1.1498111624003358e-05, |
|
"loss": 3.4326, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 3.2099697885196377, |
|
"grad_norm": 8.095000267028809, |
|
"learning_rate": 1.0973562736047e-05, |
|
"loss": 3.4518, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 3.2477341389728096, |
|
"grad_norm": 8.297784805297852, |
|
"learning_rate": 1.0449013848090642e-05, |
|
"loss": 3.4024, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 3.285498489425982, |
|
"grad_norm": 8.063372611999512, |
|
"learning_rate": 9.924464960134285e-06, |
|
"loss": 3.4202, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 3.323262839879154, |
|
"grad_norm": 7.8603515625, |
|
"learning_rate": 9.399916072177927e-06, |
|
"loss": 3.4905, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.3610271903323263, |
|
"grad_norm": 8.366976737976074, |
|
"learning_rate": 8.87536718422157e-06, |
|
"loss": 3.395, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 3.3987915407854983, |
|
"grad_norm": 8.408001899719238, |
|
"learning_rate": 8.350818296265214e-06, |
|
"loss": 3.4572, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 3.4365558912386707, |
|
"grad_norm": 7.793301582336426, |
|
"learning_rate": 7.826269408308854e-06, |
|
"loss": 3.3398, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 3.474320241691843, |
|
"grad_norm": 8.445042610168457, |
|
"learning_rate": 7.301720520352497e-06, |
|
"loss": 3.3126, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 3.512084592145015, |
|
"grad_norm": 8.048064231872559, |
|
"learning_rate": 6.77717163239614e-06, |
|
"loss": 3.3747, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 3.5498489425981874, |
|
"grad_norm": 8.69857120513916, |
|
"learning_rate": 6.252622744439782e-06, |
|
"loss": 3.424, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 3.5876132930513593, |
|
"grad_norm": 8.197694778442383, |
|
"learning_rate": 5.728073856483424e-06, |
|
"loss": 3.3231, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 3.6253776435045317, |
|
"grad_norm": 8.308802604675293, |
|
"learning_rate": 5.2035249685270664e-06, |
|
"loss": 3.2876, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.663141993957704, |
|
"grad_norm": 8.097749710083008, |
|
"learning_rate": 4.67897608057071e-06, |
|
"loss": 3.201, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 3.700906344410876, |
|
"grad_norm": 8.310770988464355, |
|
"learning_rate": 4.154427192614352e-06, |
|
"loss": 3.3542, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 3.7386706948640485, |
|
"grad_norm": 8.366157531738281, |
|
"learning_rate": 3.6298783046579945e-06, |
|
"loss": 3.4844, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 3.7764350453172204, |
|
"grad_norm": 8.279153823852539, |
|
"learning_rate": 3.1053294167016365e-06, |
|
"loss": 3.2068, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.814199395770393, |
|
"grad_norm": 8.165327072143555, |
|
"learning_rate": 2.5807805287452793e-06, |
|
"loss": 3.3177, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 3.851963746223565, |
|
"grad_norm": 8.91379165649414, |
|
"learning_rate": 2.0562316407889217e-06, |
|
"loss": 3.2367, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 3.889728096676737, |
|
"grad_norm": 8.2485990524292, |
|
"learning_rate": 1.5316827528325641e-06, |
|
"loss": 3.3136, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 3.9274924471299095, |
|
"grad_norm": 7.780291557312012, |
|
"learning_rate": 1.0071338648762065e-06, |
|
"loss": 3.273, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 3.9652567975830815, |
|
"grad_norm": 8.575440406799316, |
|
"learning_rate": 4.82584976919849e-07, |
|
"loss": 3.3768, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.5051039697542533, |
|
"eval_f1_macro": 0.2675876194485524, |
|
"eval_f1_micro": 0.5051039697542533, |
|
"eval_f1_weighted": 0.4266553262031796, |
|
"eval_loss": 3.0925960540771484, |
|
"eval_precision_macro": 0.2522810784129609, |
|
"eval_precision_micro": 0.5051039697542533, |
|
"eval_precision_weighted": 0.40593727556638787, |
|
"eval_recall_macro": 0.33, |
|
"eval_recall_micro": 0.5051039697542533, |
|
"eval_recall_weighted": 0.5051039697542533, |
|
"eval_runtime": 244.9673, |
|
"eval_samples_per_second": 10.797, |
|
"eval_steps_per_second": 0.339, |
|
"step": 2648 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 2648, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.01 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2867373216675840.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|