matching-buyback / checkpoint-2648 /trainer_state.json
aimanfadillah's picture
Upload folder using huggingface_hub
6bc0dd1 verified
{
"best_metric": 3.0925960540771484,
"best_model_checkpoint": "matching-buyback/checkpoint-2648",
"epoch": 4.0,
"eval_steps": 500,
"global_step": 2648,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0377643504531722,
"grad_norm": 2.148848295211792,
"learning_rate": 4.716981132075472e-06,
"loss": 7.1736,
"step": 25
},
{
"epoch": 0.0755287009063444,
"grad_norm": 1.9532533884048462,
"learning_rate": 9.433962264150944e-06,
"loss": 7.1696,
"step": 50
},
{
"epoch": 0.11329305135951662,
"grad_norm": 1.8605287075042725,
"learning_rate": 1.4150943396226415e-05,
"loss": 7.1706,
"step": 75
},
{
"epoch": 0.1510574018126888,
"grad_norm": 1.7690376043319702,
"learning_rate": 1.8867924528301888e-05,
"loss": 7.168,
"step": 100
},
{
"epoch": 0.18882175226586104,
"grad_norm": 1.6989686489105225,
"learning_rate": 2.358490566037736e-05,
"loss": 7.1623,
"step": 125
},
{
"epoch": 0.22658610271903323,
"grad_norm": 1.9419589042663574,
"learning_rate": 2.830188679245283e-05,
"loss": 7.1582,
"step": 150
},
{
"epoch": 0.26435045317220546,
"grad_norm": 6.193149089813232,
"learning_rate": 3.30188679245283e-05,
"loss": 7.142,
"step": 175
},
{
"epoch": 0.3021148036253776,
"grad_norm": 3.8507258892059326,
"learning_rate": 3.7735849056603776e-05,
"loss": 7.1296,
"step": 200
},
{
"epoch": 0.33987915407854985,
"grad_norm": 3.860013246536255,
"learning_rate": 4.245283018867925e-05,
"loss": 7.129,
"step": 225
},
{
"epoch": 0.3776435045317221,
"grad_norm": 3.5848217010498047,
"learning_rate": 4.716981132075472e-05,
"loss": 7.104,
"step": 250
},
{
"epoch": 0.41540785498489424,
"grad_norm": 3.5513248443603516,
"learning_rate": 4.979018044481746e-05,
"loss": 7.0868,
"step": 275
},
{
"epoch": 0.45317220543806647,
"grad_norm": 3.685840368270874,
"learning_rate": 4.9265631556861106e-05,
"loss": 6.9957,
"step": 300
},
{
"epoch": 0.4909365558912387,
"grad_norm": 3.6940855979919434,
"learning_rate": 4.874108266890474e-05,
"loss": 6.9211,
"step": 325
},
{
"epoch": 0.5287009063444109,
"grad_norm": 4.479358673095703,
"learning_rate": 4.8216533780948384e-05,
"loss": 6.8176,
"step": 350
},
{
"epoch": 0.5664652567975831,
"grad_norm": 4.331283092498779,
"learning_rate": 4.769198489299203e-05,
"loss": 6.7406,
"step": 375
},
{
"epoch": 0.6042296072507553,
"grad_norm": 4.426012992858887,
"learning_rate": 4.716743600503567e-05,
"loss": 6.6516,
"step": 400
},
{
"epoch": 0.6419939577039275,
"grad_norm": 4.232052326202393,
"learning_rate": 4.664288711707932e-05,
"loss": 6.6214,
"step": 425
},
{
"epoch": 0.6797583081570997,
"grad_norm": 5.0197601318359375,
"learning_rate": 4.6118338229122954e-05,
"loss": 6.4544,
"step": 450
},
{
"epoch": 0.7175226586102719,
"grad_norm": 5.322621822357178,
"learning_rate": 4.5593789341166596e-05,
"loss": 6.4331,
"step": 475
},
{
"epoch": 0.7552870090634441,
"grad_norm": 5.3774871826171875,
"learning_rate": 4.5069240453210245e-05,
"loss": 6.3182,
"step": 500
},
{
"epoch": 0.7930513595166163,
"grad_norm": 5.22687292098999,
"learning_rate": 4.454469156525388e-05,
"loss": 6.2782,
"step": 525
},
{
"epoch": 0.8308157099697885,
"grad_norm": 4.664173603057861,
"learning_rate": 4.402014267729753e-05,
"loss": 6.2099,
"step": 550
},
{
"epoch": 0.8685800604229608,
"grad_norm": 5.771005153656006,
"learning_rate": 4.349559378934117e-05,
"loss": 6.1739,
"step": 575
},
{
"epoch": 0.9063444108761329,
"grad_norm": 5.225096225738525,
"learning_rate": 4.297104490138481e-05,
"loss": 6.1653,
"step": 600
},
{
"epoch": 0.9441087613293051,
"grad_norm": 5.635812282562256,
"learning_rate": 4.244649601342846e-05,
"loss": 6.0278,
"step": 625
},
{
"epoch": 0.9818731117824774,
"grad_norm": 5.488772392272949,
"learning_rate": 4.192194712547209e-05,
"loss": 6.0308,
"step": 650
},
{
"epoch": 1.0,
"eval_accuracy": 0.026465028355387523,
"eval_f1_macro": 0.0018394084225980786,
"eval_f1_micro": 0.026465028355387523,
"eval_f1_weighted": 0.002693831801885104,
"eval_loss": 5.7428083419799805,
"eval_precision_macro": 0.0011073227324794681,
"eval_precision_micro": 0.026465028355387523,
"eval_precision_weighted": 0.0015115244261337012,
"eval_recall_macro": 0.014671814671814672,
"eval_recall_micro": 0.026465028355387523,
"eval_recall_weighted": 0.026465028355387523,
"eval_runtime": 258.8851,
"eval_samples_per_second": 10.217,
"eval_steps_per_second": 0.321,
"step": 662
},
{
"epoch": 1.0196374622356494,
"grad_norm": 5.648388385772705,
"learning_rate": 4.139739823751574e-05,
"loss": 5.8178,
"step": 675
},
{
"epoch": 1.0574018126888218,
"grad_norm": 6.245723724365234,
"learning_rate": 4.0872849349559384e-05,
"loss": 5.6123,
"step": 700
},
{
"epoch": 1.095166163141994,
"grad_norm": 6.041352272033691,
"learning_rate": 4.034830046160302e-05,
"loss": 5.7037,
"step": 725
},
{
"epoch": 1.1329305135951662,
"grad_norm": 6.065165996551514,
"learning_rate": 3.982375157364667e-05,
"loss": 5.6632,
"step": 750
},
{
"epoch": 1.1706948640483383,
"grad_norm": 5.901498794555664,
"learning_rate": 3.9299202685690305e-05,
"loss": 5.6154,
"step": 775
},
{
"epoch": 1.2084592145015105,
"grad_norm": 6.365645885467529,
"learning_rate": 3.8774653797733954e-05,
"loss": 5.4768,
"step": 800
},
{
"epoch": 1.2462235649546827,
"grad_norm": 6.6829705238342285,
"learning_rate": 3.8250104909777596e-05,
"loss": 5.3382,
"step": 825
},
{
"epoch": 1.283987915407855,
"grad_norm": 5.9976019859313965,
"learning_rate": 3.772555602182123e-05,
"loss": 5.4,
"step": 850
},
{
"epoch": 1.3217522658610272,
"grad_norm": 6.049790859222412,
"learning_rate": 3.720100713386488e-05,
"loss": 5.3734,
"step": 875
},
{
"epoch": 1.3595166163141994,
"grad_norm": 7.126038074493408,
"learning_rate": 3.667645824590852e-05,
"loss": 5.2598,
"step": 900
},
{
"epoch": 1.3972809667673716,
"grad_norm": 6.60282564163208,
"learning_rate": 3.6151909357952166e-05,
"loss": 5.2374,
"step": 925
},
{
"epoch": 1.4350453172205437,
"grad_norm": 6.773179054260254,
"learning_rate": 3.562736046999581e-05,
"loss": 5.172,
"step": 950
},
{
"epoch": 1.4728096676737161,
"grad_norm": 6.833974361419678,
"learning_rate": 3.5102811582039444e-05,
"loss": 5.1996,
"step": 975
},
{
"epoch": 1.510574018126888,
"grad_norm": 6.419187068939209,
"learning_rate": 3.457826269408309e-05,
"loss": 5.1801,
"step": 1000
},
{
"epoch": 1.5483383685800605,
"grad_norm": 6.803603649139404,
"learning_rate": 3.405371380612673e-05,
"loss": 4.988,
"step": 1025
},
{
"epoch": 1.5861027190332326,
"grad_norm": 6.773290157318115,
"learning_rate": 3.352916491817038e-05,
"loss": 5.0186,
"step": 1050
},
{
"epoch": 1.6238670694864048,
"grad_norm": 6.730032920837402,
"learning_rate": 3.300461603021402e-05,
"loss": 5.0032,
"step": 1075
},
{
"epoch": 1.6616314199395772,
"grad_norm": 7.288454532623291,
"learning_rate": 3.2480067142257656e-05,
"loss": 4.908,
"step": 1100
},
{
"epoch": 1.6993957703927491,
"grad_norm": 6.888255596160889,
"learning_rate": 3.1955518254301305e-05,
"loss": 4.937,
"step": 1125
},
{
"epoch": 1.7371601208459215,
"grad_norm": 6.915685176849365,
"learning_rate": 3.143096936634494e-05,
"loss": 4.9246,
"step": 1150
},
{
"epoch": 1.7749244712990937,
"grad_norm": 6.667412757873535,
"learning_rate": 3.090642047838859e-05,
"loss": 4.8717,
"step": 1175
},
{
"epoch": 1.8126888217522659,
"grad_norm": 6.816399097442627,
"learning_rate": 3.0381871590432233e-05,
"loss": 4.9049,
"step": 1200
},
{
"epoch": 1.850453172205438,
"grad_norm": 7.150826930999756,
"learning_rate": 2.985732270247587e-05,
"loss": 4.705,
"step": 1225
},
{
"epoch": 1.8882175226586102,
"grad_norm": 7.209892272949219,
"learning_rate": 2.9332773814519514e-05,
"loss": 4.8234,
"step": 1250
},
{
"epoch": 1.9259818731117826,
"grad_norm": 7.008028507232666,
"learning_rate": 2.8808224926563153e-05,
"loss": 4.6716,
"step": 1275
},
{
"epoch": 1.9637462235649545,
"grad_norm": 7.195932865142822,
"learning_rate": 2.82836760386068e-05,
"loss": 4.6394,
"step": 1300
},
{
"epoch": 2.0,
"eval_accuracy": 0.21890359168241966,
"eval_f1_macro": 0.07847614280788937,
"eval_f1_micro": 0.21890359168241966,
"eval_f1_weighted": 0.14378778601568412,
"eval_loss": 4.332443714141846,
"eval_precision_macro": 0.07050179882195226,
"eval_precision_micro": 0.21890359168241966,
"eval_precision_weighted": 0.12910697824366127,
"eval_recall_macro": 0.12453024453024451,
"eval_recall_micro": 0.21890359168241966,
"eval_recall_weighted": 0.21890359168241966,
"eval_runtime": 274.1069,
"eval_samples_per_second": 9.65,
"eval_steps_per_second": 0.303,
"step": 1324
},
{
"epoch": 2.001510574018127,
"grad_norm": 7.194442272186279,
"learning_rate": 2.7759127150650445e-05,
"loss": 4.5155,
"step": 1325
},
{
"epoch": 2.039274924471299,
"grad_norm": 7.649657726287842,
"learning_rate": 2.7234578262694084e-05,
"loss": 4.3742,
"step": 1350
},
{
"epoch": 2.0770392749244713,
"grad_norm": 6.8479084968566895,
"learning_rate": 2.6710029374737726e-05,
"loss": 4.4017,
"step": 1375
},
{
"epoch": 2.1148036253776437,
"grad_norm": 7.72012186050415,
"learning_rate": 2.6185480486781372e-05,
"loss": 4.2114,
"step": 1400
},
{
"epoch": 2.1525679758308156,
"grad_norm": 7.429843425750732,
"learning_rate": 2.566093159882501e-05,
"loss": 4.3608,
"step": 1425
},
{
"epoch": 2.190332326283988,
"grad_norm": 7.590367794036865,
"learning_rate": 2.5136382710868657e-05,
"loss": 4.2724,
"step": 1450
},
{
"epoch": 2.22809667673716,
"grad_norm": 7.643247127532959,
"learning_rate": 2.46118338229123e-05,
"loss": 4.149,
"step": 1475
},
{
"epoch": 2.2658610271903323,
"grad_norm": 7.686148643493652,
"learning_rate": 2.4087284934955938e-05,
"loss": 4.1981,
"step": 1500
},
{
"epoch": 2.3036253776435047,
"grad_norm": 7.800241470336914,
"learning_rate": 2.356273604699958e-05,
"loss": 4.2175,
"step": 1525
},
{
"epoch": 2.3413897280966767,
"grad_norm": 7.715053558349609,
"learning_rate": 2.3038187159043223e-05,
"loss": 4.1977,
"step": 1550
},
{
"epoch": 2.379154078549849,
"grad_norm": 7.797581195831299,
"learning_rate": 2.251363827108687e-05,
"loss": 4.1727,
"step": 1575
},
{
"epoch": 2.416918429003021,
"grad_norm": 7.720523834228516,
"learning_rate": 2.198908938313051e-05,
"loss": 4.0437,
"step": 1600
},
{
"epoch": 2.4546827794561934,
"grad_norm": 7.72569465637207,
"learning_rate": 2.146454049517415e-05,
"loss": 4.0182,
"step": 1625
},
{
"epoch": 2.4924471299093653,
"grad_norm": 7.541537761688232,
"learning_rate": 2.0939991607217792e-05,
"loss": 4.0712,
"step": 1650
},
{
"epoch": 2.5302114803625377,
"grad_norm": 7.888173580169678,
"learning_rate": 2.0415442719261435e-05,
"loss": 3.9628,
"step": 1675
},
{
"epoch": 2.56797583081571,
"grad_norm": 7.701706886291504,
"learning_rate": 1.989089383130508e-05,
"loss": 3.9174,
"step": 1700
},
{
"epoch": 2.605740181268882,
"grad_norm": 7.674171447753906,
"learning_rate": 1.9366344943348723e-05,
"loss": 3.8891,
"step": 1725
},
{
"epoch": 2.6435045317220545,
"grad_norm": 7.812932014465332,
"learning_rate": 1.8841796055392362e-05,
"loss": 3.9667,
"step": 1750
},
{
"epoch": 2.6812688821752264,
"grad_norm": 8.138640403747559,
"learning_rate": 1.8317247167436005e-05,
"loss": 3.9761,
"step": 1775
},
{
"epoch": 2.719033232628399,
"grad_norm": 7.94352912902832,
"learning_rate": 1.7792698279479647e-05,
"loss": 3.8518,
"step": 1800
},
{
"epoch": 2.756797583081571,
"grad_norm": 7.7708539962768555,
"learning_rate": 1.7268149391523293e-05,
"loss": 3.6874,
"step": 1825
},
{
"epoch": 2.794561933534743,
"grad_norm": 8.06053638458252,
"learning_rate": 1.6743600503566935e-05,
"loss": 3.8025,
"step": 1850
},
{
"epoch": 2.8323262839879155,
"grad_norm": 7.878209590911865,
"learning_rate": 1.6219051615610574e-05,
"loss": 3.7442,
"step": 1875
},
{
"epoch": 2.8700906344410875,
"grad_norm": 8.10824203491211,
"learning_rate": 1.5694502727654217e-05,
"loss": 3.8247,
"step": 1900
},
{
"epoch": 2.90785498489426,
"grad_norm": 7.619449138641357,
"learning_rate": 1.5169953839697862e-05,
"loss": 3.6439,
"step": 1925
},
{
"epoch": 2.9456193353474323,
"grad_norm": 8.001503944396973,
"learning_rate": 1.4645404951741503e-05,
"loss": 3.6267,
"step": 1950
},
{
"epoch": 2.983383685800604,
"grad_norm": 7.938083648681641,
"learning_rate": 1.4120856063785145e-05,
"loss": 3.6283,
"step": 1975
},
{
"epoch": 3.0,
"eval_accuracy": 0.45255198487712667,
"eval_f1_macro": 0.22372025748723245,
"eval_f1_micro": 0.45255198487712667,
"eval_f1_weighted": 0.36915380230770156,
"eval_loss": 3.410895347595215,
"eval_precision_macro": 0.210819340352679,
"eval_precision_micro": 0.45255198487712667,
"eval_precision_weighted": 0.34998443503067106,
"eval_recall_macro": 0.28555984555984554,
"eval_recall_micro": 0.45255198487712667,
"eval_recall_weighted": 0.45255198487712667,
"eval_runtime": 240.6652,
"eval_samples_per_second": 10.99,
"eval_steps_per_second": 0.345,
"step": 1986
},
{
"epoch": 3.0211480362537766,
"grad_norm": 8.365966796875,
"learning_rate": 1.3596307175828788e-05,
"loss": 3.6007,
"step": 2000
},
{
"epoch": 3.0589123867069485,
"grad_norm": 8.145760536193848,
"learning_rate": 1.3071758287872429e-05,
"loss": 3.5455,
"step": 2025
},
{
"epoch": 3.096676737160121,
"grad_norm": 7.752460479736328,
"learning_rate": 1.2547209399916074e-05,
"loss": 3.3978,
"step": 2050
},
{
"epoch": 3.134441087613293,
"grad_norm": 8.035235404968262,
"learning_rate": 1.2022660511959715e-05,
"loss": 3.5076,
"step": 2075
},
{
"epoch": 3.1722054380664653,
"grad_norm": 8.423967361450195,
"learning_rate": 1.1498111624003358e-05,
"loss": 3.4326,
"step": 2100
},
{
"epoch": 3.2099697885196377,
"grad_norm": 8.095000267028809,
"learning_rate": 1.0973562736047e-05,
"loss": 3.4518,
"step": 2125
},
{
"epoch": 3.2477341389728096,
"grad_norm": 8.297784805297852,
"learning_rate": 1.0449013848090642e-05,
"loss": 3.4024,
"step": 2150
},
{
"epoch": 3.285498489425982,
"grad_norm": 8.063372611999512,
"learning_rate": 9.924464960134285e-06,
"loss": 3.4202,
"step": 2175
},
{
"epoch": 3.323262839879154,
"grad_norm": 7.8603515625,
"learning_rate": 9.399916072177927e-06,
"loss": 3.4905,
"step": 2200
},
{
"epoch": 3.3610271903323263,
"grad_norm": 8.366976737976074,
"learning_rate": 8.87536718422157e-06,
"loss": 3.395,
"step": 2225
},
{
"epoch": 3.3987915407854983,
"grad_norm": 8.408001899719238,
"learning_rate": 8.350818296265214e-06,
"loss": 3.4572,
"step": 2250
},
{
"epoch": 3.4365558912386707,
"grad_norm": 7.793301582336426,
"learning_rate": 7.826269408308854e-06,
"loss": 3.3398,
"step": 2275
},
{
"epoch": 3.474320241691843,
"grad_norm": 8.445042610168457,
"learning_rate": 7.301720520352497e-06,
"loss": 3.3126,
"step": 2300
},
{
"epoch": 3.512084592145015,
"grad_norm": 8.048064231872559,
"learning_rate": 6.77717163239614e-06,
"loss": 3.3747,
"step": 2325
},
{
"epoch": 3.5498489425981874,
"grad_norm": 8.69857120513916,
"learning_rate": 6.252622744439782e-06,
"loss": 3.424,
"step": 2350
},
{
"epoch": 3.5876132930513593,
"grad_norm": 8.197694778442383,
"learning_rate": 5.728073856483424e-06,
"loss": 3.3231,
"step": 2375
},
{
"epoch": 3.6253776435045317,
"grad_norm": 8.308802604675293,
"learning_rate": 5.2035249685270664e-06,
"loss": 3.2876,
"step": 2400
},
{
"epoch": 3.663141993957704,
"grad_norm": 8.097749710083008,
"learning_rate": 4.67897608057071e-06,
"loss": 3.201,
"step": 2425
},
{
"epoch": 3.700906344410876,
"grad_norm": 8.310770988464355,
"learning_rate": 4.154427192614352e-06,
"loss": 3.3542,
"step": 2450
},
{
"epoch": 3.7386706948640485,
"grad_norm": 8.366157531738281,
"learning_rate": 3.6298783046579945e-06,
"loss": 3.4844,
"step": 2475
},
{
"epoch": 3.7764350453172204,
"grad_norm": 8.279153823852539,
"learning_rate": 3.1053294167016365e-06,
"loss": 3.2068,
"step": 2500
},
{
"epoch": 3.814199395770393,
"grad_norm": 8.165327072143555,
"learning_rate": 2.5807805287452793e-06,
"loss": 3.3177,
"step": 2525
},
{
"epoch": 3.851963746223565,
"grad_norm": 8.91379165649414,
"learning_rate": 2.0562316407889217e-06,
"loss": 3.2367,
"step": 2550
},
{
"epoch": 3.889728096676737,
"grad_norm": 8.2485990524292,
"learning_rate": 1.5316827528325641e-06,
"loss": 3.3136,
"step": 2575
},
{
"epoch": 3.9274924471299095,
"grad_norm": 7.780291557312012,
"learning_rate": 1.0071338648762065e-06,
"loss": 3.273,
"step": 2600
},
{
"epoch": 3.9652567975830815,
"grad_norm": 8.575440406799316,
"learning_rate": 4.82584976919849e-07,
"loss": 3.3768,
"step": 2625
},
{
"epoch": 4.0,
"eval_accuracy": 0.5051039697542533,
"eval_f1_macro": 0.2675876194485524,
"eval_f1_micro": 0.5051039697542533,
"eval_f1_weighted": 0.4266553262031796,
"eval_loss": 3.0925960540771484,
"eval_precision_macro": 0.2522810784129609,
"eval_precision_micro": 0.5051039697542533,
"eval_precision_weighted": 0.40593727556638787,
"eval_recall_macro": 0.33,
"eval_recall_micro": 0.5051039697542533,
"eval_recall_weighted": 0.5051039697542533,
"eval_runtime": 244.9673,
"eval_samples_per_second": 10.797,
"eval_steps_per_second": 0.339,
"step": 2648
}
],
"logging_steps": 25,
"max_steps": 2648,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.01
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2867373216675840.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}