{ "best_metric": 3.0925960540771484, "best_model_checkpoint": "matching-buyback/checkpoint-2648", "epoch": 4.0, "eval_steps": 500, "global_step": 2648, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0377643504531722, "grad_norm": 2.148848295211792, "learning_rate": 4.716981132075472e-06, "loss": 7.1736, "step": 25 }, { "epoch": 0.0755287009063444, "grad_norm": 1.9532533884048462, "learning_rate": 9.433962264150944e-06, "loss": 7.1696, "step": 50 }, { "epoch": 0.11329305135951662, "grad_norm": 1.8605287075042725, "learning_rate": 1.4150943396226415e-05, "loss": 7.1706, "step": 75 }, { "epoch": 0.1510574018126888, "grad_norm": 1.7690376043319702, "learning_rate": 1.8867924528301888e-05, "loss": 7.168, "step": 100 }, { "epoch": 0.18882175226586104, "grad_norm": 1.6989686489105225, "learning_rate": 2.358490566037736e-05, "loss": 7.1623, "step": 125 }, { "epoch": 0.22658610271903323, "grad_norm": 1.9419589042663574, "learning_rate": 2.830188679245283e-05, "loss": 7.1582, "step": 150 }, { "epoch": 0.26435045317220546, "grad_norm": 6.193149089813232, "learning_rate": 3.30188679245283e-05, "loss": 7.142, "step": 175 }, { "epoch": 0.3021148036253776, "grad_norm": 3.8507258892059326, "learning_rate": 3.7735849056603776e-05, "loss": 7.1296, "step": 200 }, { "epoch": 0.33987915407854985, "grad_norm": 3.860013246536255, "learning_rate": 4.245283018867925e-05, "loss": 7.129, "step": 225 }, { "epoch": 0.3776435045317221, "grad_norm": 3.5848217010498047, "learning_rate": 4.716981132075472e-05, "loss": 7.104, "step": 250 }, { "epoch": 0.41540785498489424, "grad_norm": 3.5513248443603516, "learning_rate": 4.979018044481746e-05, "loss": 7.0868, "step": 275 }, { "epoch": 0.45317220543806647, "grad_norm": 3.685840368270874, "learning_rate": 4.9265631556861106e-05, "loss": 6.9957, "step": 300 }, { "epoch": 0.4909365558912387, "grad_norm": 3.6940855979919434, "learning_rate": 4.874108266890474e-05, "loss": 6.9211, "step": 325 }, { "epoch": 0.5287009063444109, "grad_norm": 4.479358673095703, "learning_rate": 4.8216533780948384e-05, "loss": 6.8176, "step": 350 }, { "epoch": 0.5664652567975831, "grad_norm": 4.331283092498779, "learning_rate": 4.769198489299203e-05, "loss": 6.7406, "step": 375 }, { "epoch": 0.6042296072507553, "grad_norm": 4.426012992858887, "learning_rate": 4.716743600503567e-05, "loss": 6.6516, "step": 400 }, { "epoch": 0.6419939577039275, "grad_norm": 4.232052326202393, "learning_rate": 4.664288711707932e-05, "loss": 6.6214, "step": 425 }, { "epoch": 0.6797583081570997, "grad_norm": 5.0197601318359375, "learning_rate": 4.6118338229122954e-05, "loss": 6.4544, "step": 450 }, { "epoch": 0.7175226586102719, "grad_norm": 5.322621822357178, "learning_rate": 4.5593789341166596e-05, "loss": 6.4331, "step": 475 }, { "epoch": 0.7552870090634441, "grad_norm": 5.3774871826171875, "learning_rate": 4.5069240453210245e-05, "loss": 6.3182, "step": 500 }, { "epoch": 0.7930513595166163, "grad_norm": 5.22687292098999, "learning_rate": 4.454469156525388e-05, "loss": 6.2782, "step": 525 }, { "epoch": 0.8308157099697885, "grad_norm": 4.664173603057861, "learning_rate": 4.402014267729753e-05, "loss": 6.2099, "step": 550 }, { "epoch": 0.8685800604229608, "grad_norm": 5.771005153656006, "learning_rate": 4.349559378934117e-05, "loss": 6.1739, "step": 575 }, { "epoch": 0.9063444108761329, "grad_norm": 5.225096225738525, "learning_rate": 4.297104490138481e-05, "loss": 6.1653, "step": 600 }, { "epoch": 0.9441087613293051, "grad_norm": 5.635812282562256, "learning_rate": 4.244649601342846e-05, "loss": 6.0278, "step": 625 }, { "epoch": 0.9818731117824774, "grad_norm": 5.488772392272949, "learning_rate": 4.192194712547209e-05, "loss": 6.0308, "step": 650 }, { "epoch": 1.0, "eval_accuracy": 0.026465028355387523, "eval_f1_macro": 0.0018394084225980786, "eval_f1_micro": 0.026465028355387523, "eval_f1_weighted": 0.002693831801885104, "eval_loss": 5.7428083419799805, "eval_precision_macro": 0.0011073227324794681, "eval_precision_micro": 0.026465028355387523, "eval_precision_weighted": 0.0015115244261337012, "eval_recall_macro": 0.014671814671814672, "eval_recall_micro": 0.026465028355387523, "eval_recall_weighted": 0.026465028355387523, "eval_runtime": 258.8851, "eval_samples_per_second": 10.217, "eval_steps_per_second": 0.321, "step": 662 }, { "epoch": 1.0196374622356494, "grad_norm": 5.648388385772705, "learning_rate": 4.139739823751574e-05, "loss": 5.8178, "step": 675 }, { "epoch": 1.0574018126888218, "grad_norm": 6.245723724365234, "learning_rate": 4.0872849349559384e-05, "loss": 5.6123, "step": 700 }, { "epoch": 1.095166163141994, "grad_norm": 6.041352272033691, "learning_rate": 4.034830046160302e-05, "loss": 5.7037, "step": 725 }, { "epoch": 1.1329305135951662, "grad_norm": 6.065165996551514, "learning_rate": 3.982375157364667e-05, "loss": 5.6632, "step": 750 }, { "epoch": 1.1706948640483383, "grad_norm": 5.901498794555664, "learning_rate": 3.9299202685690305e-05, "loss": 5.6154, "step": 775 }, { "epoch": 1.2084592145015105, "grad_norm": 6.365645885467529, "learning_rate": 3.8774653797733954e-05, "loss": 5.4768, "step": 800 }, { "epoch": 1.2462235649546827, "grad_norm": 6.6829705238342285, "learning_rate": 3.8250104909777596e-05, "loss": 5.3382, "step": 825 }, { "epoch": 1.283987915407855, "grad_norm": 5.9976019859313965, "learning_rate": 3.772555602182123e-05, "loss": 5.4, "step": 850 }, { "epoch": 1.3217522658610272, "grad_norm": 6.049790859222412, "learning_rate": 3.720100713386488e-05, "loss": 5.3734, "step": 875 }, { "epoch": 1.3595166163141994, "grad_norm": 7.126038074493408, "learning_rate": 3.667645824590852e-05, "loss": 5.2598, "step": 900 }, { "epoch": 1.3972809667673716, "grad_norm": 6.60282564163208, "learning_rate": 3.6151909357952166e-05, "loss": 5.2374, "step": 925 }, { "epoch": 1.4350453172205437, "grad_norm": 6.773179054260254, "learning_rate": 3.562736046999581e-05, "loss": 5.172, "step": 950 }, { "epoch": 1.4728096676737161, "grad_norm": 6.833974361419678, "learning_rate": 3.5102811582039444e-05, "loss": 5.1996, "step": 975 }, { "epoch": 1.510574018126888, "grad_norm": 6.419187068939209, "learning_rate": 3.457826269408309e-05, "loss": 5.1801, "step": 1000 }, { "epoch": 1.5483383685800605, "grad_norm": 6.803603649139404, "learning_rate": 3.405371380612673e-05, "loss": 4.988, "step": 1025 }, { "epoch": 1.5861027190332326, "grad_norm": 6.773290157318115, "learning_rate": 3.352916491817038e-05, "loss": 5.0186, "step": 1050 }, { "epoch": 1.6238670694864048, "grad_norm": 6.730032920837402, "learning_rate": 3.300461603021402e-05, "loss": 5.0032, "step": 1075 }, { "epoch": 1.6616314199395772, "grad_norm": 7.288454532623291, "learning_rate": 3.2480067142257656e-05, "loss": 4.908, "step": 1100 }, { "epoch": 1.6993957703927491, "grad_norm": 6.888255596160889, "learning_rate": 3.1955518254301305e-05, "loss": 4.937, "step": 1125 }, { "epoch": 1.7371601208459215, "grad_norm": 6.915685176849365, "learning_rate": 3.143096936634494e-05, "loss": 4.9246, "step": 1150 }, { "epoch": 1.7749244712990937, "grad_norm": 6.667412757873535, "learning_rate": 3.090642047838859e-05, "loss": 4.8717, "step": 1175 }, { "epoch": 1.8126888217522659, "grad_norm": 6.816399097442627, "learning_rate": 3.0381871590432233e-05, "loss": 4.9049, "step": 1200 }, { "epoch": 1.850453172205438, "grad_norm": 7.150826930999756, "learning_rate": 2.985732270247587e-05, "loss": 4.705, "step": 1225 }, { "epoch": 1.8882175226586102, "grad_norm": 7.209892272949219, "learning_rate": 2.9332773814519514e-05, "loss": 4.8234, "step": 1250 }, { "epoch": 1.9259818731117826, "grad_norm": 7.008028507232666, "learning_rate": 2.8808224926563153e-05, "loss": 4.6716, "step": 1275 }, { "epoch": 1.9637462235649545, "grad_norm": 7.195932865142822, "learning_rate": 2.82836760386068e-05, "loss": 4.6394, "step": 1300 }, { "epoch": 2.0, "eval_accuracy": 0.21890359168241966, "eval_f1_macro": 0.07847614280788937, "eval_f1_micro": 0.21890359168241966, "eval_f1_weighted": 0.14378778601568412, "eval_loss": 4.332443714141846, "eval_precision_macro": 0.07050179882195226, "eval_precision_micro": 0.21890359168241966, "eval_precision_weighted": 0.12910697824366127, "eval_recall_macro": 0.12453024453024451, "eval_recall_micro": 0.21890359168241966, "eval_recall_weighted": 0.21890359168241966, "eval_runtime": 274.1069, "eval_samples_per_second": 9.65, "eval_steps_per_second": 0.303, "step": 1324 }, { "epoch": 2.001510574018127, "grad_norm": 7.194442272186279, "learning_rate": 2.7759127150650445e-05, "loss": 4.5155, "step": 1325 }, { "epoch": 2.039274924471299, "grad_norm": 7.649657726287842, "learning_rate": 2.7234578262694084e-05, "loss": 4.3742, "step": 1350 }, { "epoch": 2.0770392749244713, "grad_norm": 6.8479084968566895, "learning_rate": 2.6710029374737726e-05, "loss": 4.4017, "step": 1375 }, { "epoch": 2.1148036253776437, "grad_norm": 7.72012186050415, "learning_rate": 2.6185480486781372e-05, "loss": 4.2114, "step": 1400 }, { "epoch": 2.1525679758308156, "grad_norm": 7.429843425750732, "learning_rate": 2.566093159882501e-05, "loss": 4.3608, "step": 1425 }, { "epoch": 2.190332326283988, "grad_norm": 7.590367794036865, "learning_rate": 2.5136382710868657e-05, "loss": 4.2724, "step": 1450 }, { "epoch": 2.22809667673716, "grad_norm": 7.643247127532959, "learning_rate": 2.46118338229123e-05, "loss": 4.149, "step": 1475 }, { "epoch": 2.2658610271903323, "grad_norm": 7.686148643493652, "learning_rate": 2.4087284934955938e-05, "loss": 4.1981, "step": 1500 }, { "epoch": 2.3036253776435047, "grad_norm": 7.800241470336914, "learning_rate": 2.356273604699958e-05, "loss": 4.2175, "step": 1525 }, { "epoch": 2.3413897280966767, "grad_norm": 7.715053558349609, "learning_rate": 2.3038187159043223e-05, "loss": 4.1977, "step": 1550 }, { "epoch": 2.379154078549849, "grad_norm": 7.797581195831299, "learning_rate": 2.251363827108687e-05, "loss": 4.1727, "step": 1575 }, { "epoch": 2.416918429003021, "grad_norm": 7.720523834228516, "learning_rate": 2.198908938313051e-05, "loss": 4.0437, "step": 1600 }, { "epoch": 2.4546827794561934, "grad_norm": 7.72569465637207, "learning_rate": 2.146454049517415e-05, "loss": 4.0182, "step": 1625 }, { "epoch": 2.4924471299093653, "grad_norm": 7.541537761688232, "learning_rate": 2.0939991607217792e-05, "loss": 4.0712, "step": 1650 }, { "epoch": 2.5302114803625377, "grad_norm": 7.888173580169678, "learning_rate": 2.0415442719261435e-05, "loss": 3.9628, "step": 1675 }, { "epoch": 2.56797583081571, "grad_norm": 7.701706886291504, "learning_rate": 1.989089383130508e-05, "loss": 3.9174, "step": 1700 }, { "epoch": 2.605740181268882, "grad_norm": 7.674171447753906, "learning_rate": 1.9366344943348723e-05, "loss": 3.8891, "step": 1725 }, { "epoch": 2.6435045317220545, "grad_norm": 7.812932014465332, "learning_rate": 1.8841796055392362e-05, "loss": 3.9667, "step": 1750 }, { "epoch": 2.6812688821752264, "grad_norm": 8.138640403747559, "learning_rate": 1.8317247167436005e-05, "loss": 3.9761, "step": 1775 }, { "epoch": 2.719033232628399, "grad_norm": 7.94352912902832, "learning_rate": 1.7792698279479647e-05, "loss": 3.8518, "step": 1800 }, { "epoch": 2.756797583081571, "grad_norm": 7.7708539962768555, "learning_rate": 1.7268149391523293e-05, "loss": 3.6874, "step": 1825 }, { "epoch": 2.794561933534743, "grad_norm": 8.06053638458252, "learning_rate": 1.6743600503566935e-05, "loss": 3.8025, "step": 1850 }, { "epoch": 2.8323262839879155, "grad_norm": 7.878209590911865, "learning_rate": 1.6219051615610574e-05, "loss": 3.7442, "step": 1875 }, { "epoch": 2.8700906344410875, "grad_norm": 8.10824203491211, "learning_rate": 1.5694502727654217e-05, "loss": 3.8247, "step": 1900 }, { "epoch": 2.90785498489426, "grad_norm": 7.619449138641357, "learning_rate": 1.5169953839697862e-05, "loss": 3.6439, "step": 1925 }, { "epoch": 2.9456193353474323, "grad_norm": 8.001503944396973, "learning_rate": 1.4645404951741503e-05, "loss": 3.6267, "step": 1950 }, { "epoch": 2.983383685800604, "grad_norm": 7.938083648681641, "learning_rate": 1.4120856063785145e-05, "loss": 3.6283, "step": 1975 }, { "epoch": 3.0, "eval_accuracy": 0.45255198487712667, "eval_f1_macro": 0.22372025748723245, "eval_f1_micro": 0.45255198487712667, "eval_f1_weighted": 0.36915380230770156, "eval_loss": 3.410895347595215, "eval_precision_macro": 0.210819340352679, "eval_precision_micro": 0.45255198487712667, "eval_precision_weighted": 0.34998443503067106, "eval_recall_macro": 0.28555984555984554, "eval_recall_micro": 0.45255198487712667, "eval_recall_weighted": 0.45255198487712667, "eval_runtime": 240.6652, "eval_samples_per_second": 10.99, "eval_steps_per_second": 0.345, "step": 1986 }, { "epoch": 3.0211480362537766, "grad_norm": 8.365966796875, "learning_rate": 1.3596307175828788e-05, "loss": 3.6007, "step": 2000 }, { "epoch": 3.0589123867069485, "grad_norm": 8.145760536193848, "learning_rate": 1.3071758287872429e-05, "loss": 3.5455, "step": 2025 }, { "epoch": 3.096676737160121, "grad_norm": 7.752460479736328, "learning_rate": 1.2547209399916074e-05, "loss": 3.3978, "step": 2050 }, { "epoch": 3.134441087613293, "grad_norm": 8.035235404968262, "learning_rate": 1.2022660511959715e-05, "loss": 3.5076, "step": 2075 }, { "epoch": 3.1722054380664653, "grad_norm": 8.423967361450195, "learning_rate": 1.1498111624003358e-05, "loss": 3.4326, "step": 2100 }, { "epoch": 3.2099697885196377, "grad_norm": 8.095000267028809, "learning_rate": 1.0973562736047e-05, "loss": 3.4518, "step": 2125 }, { "epoch": 3.2477341389728096, "grad_norm": 8.297784805297852, "learning_rate": 1.0449013848090642e-05, "loss": 3.4024, "step": 2150 }, { "epoch": 3.285498489425982, "grad_norm": 8.063372611999512, "learning_rate": 9.924464960134285e-06, "loss": 3.4202, "step": 2175 }, { "epoch": 3.323262839879154, "grad_norm": 7.8603515625, "learning_rate": 9.399916072177927e-06, "loss": 3.4905, "step": 2200 }, { "epoch": 3.3610271903323263, "grad_norm": 8.366976737976074, "learning_rate": 8.87536718422157e-06, "loss": 3.395, "step": 2225 }, { "epoch": 3.3987915407854983, "grad_norm": 8.408001899719238, "learning_rate": 8.350818296265214e-06, "loss": 3.4572, "step": 2250 }, { "epoch": 3.4365558912386707, "grad_norm": 7.793301582336426, "learning_rate": 7.826269408308854e-06, "loss": 3.3398, "step": 2275 }, { "epoch": 3.474320241691843, "grad_norm": 8.445042610168457, "learning_rate": 7.301720520352497e-06, "loss": 3.3126, "step": 2300 }, { "epoch": 3.512084592145015, "grad_norm": 8.048064231872559, "learning_rate": 6.77717163239614e-06, "loss": 3.3747, "step": 2325 }, { "epoch": 3.5498489425981874, "grad_norm": 8.69857120513916, "learning_rate": 6.252622744439782e-06, "loss": 3.424, "step": 2350 }, { "epoch": 3.5876132930513593, "grad_norm": 8.197694778442383, "learning_rate": 5.728073856483424e-06, "loss": 3.3231, "step": 2375 }, { "epoch": 3.6253776435045317, "grad_norm": 8.308802604675293, "learning_rate": 5.2035249685270664e-06, "loss": 3.2876, "step": 2400 }, { "epoch": 3.663141993957704, "grad_norm": 8.097749710083008, "learning_rate": 4.67897608057071e-06, "loss": 3.201, "step": 2425 }, { "epoch": 3.700906344410876, "grad_norm": 8.310770988464355, "learning_rate": 4.154427192614352e-06, "loss": 3.3542, "step": 2450 }, { "epoch": 3.7386706948640485, "grad_norm": 8.366157531738281, "learning_rate": 3.6298783046579945e-06, "loss": 3.4844, "step": 2475 }, { "epoch": 3.7764350453172204, "grad_norm": 8.279153823852539, "learning_rate": 3.1053294167016365e-06, "loss": 3.2068, "step": 2500 }, { "epoch": 3.814199395770393, "grad_norm": 8.165327072143555, "learning_rate": 2.5807805287452793e-06, "loss": 3.3177, "step": 2525 }, { "epoch": 3.851963746223565, "grad_norm": 8.91379165649414, "learning_rate": 2.0562316407889217e-06, "loss": 3.2367, "step": 2550 }, { "epoch": 3.889728096676737, "grad_norm": 8.2485990524292, "learning_rate": 1.5316827528325641e-06, "loss": 3.3136, "step": 2575 }, { "epoch": 3.9274924471299095, "grad_norm": 7.780291557312012, "learning_rate": 1.0071338648762065e-06, "loss": 3.273, "step": 2600 }, { "epoch": 3.9652567975830815, "grad_norm": 8.575440406799316, "learning_rate": 4.82584976919849e-07, "loss": 3.3768, "step": 2625 }, { "epoch": 4.0, "eval_accuracy": 0.5051039697542533, "eval_f1_macro": 0.2675876194485524, "eval_f1_micro": 0.5051039697542533, "eval_f1_weighted": 0.4266553262031796, "eval_loss": 3.0925960540771484, "eval_precision_macro": 0.2522810784129609, "eval_precision_micro": 0.5051039697542533, "eval_precision_weighted": 0.40593727556638787, "eval_recall_macro": 0.33, "eval_recall_micro": 0.5051039697542533, "eval_recall_weighted": 0.5051039697542533, "eval_runtime": 244.9673, "eval_samples_per_second": 10.797, "eval_steps_per_second": 0.339, "step": 2648 } ], "logging_steps": 25, "max_steps": 2648, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2867373216675840.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }