{ "best_metric": 0.06995197385549545, "best_model_checkpoint": "output_pipe/prom_300_notata/origin/checkpoint-800", "epoch": 4.0, "eval_steps": 200, "global_step": 2656, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.15060240963855423, "grad_norm": 1.585874080657959, "learning_rate": 2.9424405218726017e-05, "loss": 0.3415, "step": 100 }, { "epoch": 0.30120481927710846, "grad_norm": 3.314279317855835, "learning_rate": 2.827321565617805e-05, "loss": 0.1374, "step": 200 }, { "epoch": 0.30120481927710846, "eval_accuracy": 0.9636329376295459, "eval_f1": 0.9635801730549014, "eval_loss": 0.11874407529830933, "eval_matthews_correlation": 0.9286443161593607, "eval_precision": 0.9653044555881855, "eval_recall": 0.9633419342852427, "eval_runtime": 2.0547, "eval_samples_per_second": 2582.881, "eval_steps_per_second": 40.396, "step": 200 }, { "epoch": 0.45180722891566266, "grad_norm": 1.1476503610610962, "learning_rate": 2.7122026093630083e-05, "loss": 0.1131, "step": 300 }, { "epoch": 0.6024096385542169, "grad_norm": 1.4456894397735596, "learning_rate": 2.597083653108212e-05, "loss": 0.0844, "step": 400 }, { "epoch": 0.6024096385542169, "eval_accuracy": 0.9715470133785566, "eval_f1": 0.971545978842977, "eval_loss": 0.07299650460481644, "eval_matthews_correlation": 0.9431284574559641, "eval_precision": 0.9715395817834918, "eval_recall": 0.9715888769607457, "eval_runtime": 2.0485, "eval_samples_per_second": 2590.731, "eval_steps_per_second": 40.518, "step": 400 }, { "epoch": 0.7530120481927711, "grad_norm": 0.18486830592155457, "learning_rate": 2.481964696853415e-05, "loss": 0.0896, "step": 500 }, { "epoch": 0.9036144578313253, "grad_norm": 0.22208698093891144, "learning_rate": 2.3668457405986186e-05, "loss": 0.0785, "step": 600 }, { "epoch": 0.9036144578313253, "eval_accuracy": 0.9739966082532504, "eval_f1": 0.9739843921840996, "eval_loss": 0.08863991498947144, "eval_matthews_correlation": 0.948218039694491, "eval_precision": 0.974341521021252, "eval_recall": 0.9738766326350019, "eval_runtime": 2.0499, "eval_samples_per_second": 2588.955, "eval_steps_per_second": 40.491, "step": 600 }, { "epoch": 1.0542168674698795, "grad_norm": 2.187502384185791, "learning_rate": 2.251726784343822e-05, "loss": 0.0722, "step": 700 }, { "epoch": 1.2048192771084336, "grad_norm": 2.979804515838623, "learning_rate": 2.1366078280890252e-05, "loss": 0.0416, "step": 800 }, { "epoch": 1.2048192771084336, "eval_accuracy": 0.9781420765027322, "eval_f1": 0.9781331874627035, "eval_loss": 0.06995197385549545, "eval_matthews_correlation": 0.9564543617799154, "eval_precision": 0.9784162660123144, "eval_recall": 0.9780381704999754, "eval_runtime": 2.0486, "eval_samples_per_second": 2590.596, "eval_steps_per_second": 40.516, "step": 800 }, { "epoch": 1.355421686746988, "grad_norm": 3.7425546646118164, "learning_rate": 2.021488871834229e-05, "loss": 0.0336, "step": 900 }, { "epoch": 1.5060240963855422, "grad_norm": 0.8297073245048523, "learning_rate": 1.9063699155794322e-05, "loss": 0.035, "step": 1000 }, { "epoch": 1.5060240963855422, "eval_accuracy": 0.97719992462785, "eval_f1": 0.9771963977277897, "eval_loss": 0.08680247515439987, "eval_matthews_correlation": 0.954401194573866, "eval_precision": 0.9772252037763616, "eval_recall": 0.9771759920662539, "eval_runtime": 2.0487, "eval_samples_per_second": 2590.457, "eval_steps_per_second": 40.514, "step": 1000 }, { "epoch": 1.6566265060240963, "grad_norm": 0.46684613823890686, "learning_rate": 1.7912509593246355e-05, "loss": 0.0367, "step": 1100 }, { "epoch": 1.8072289156626506, "grad_norm": 1.1904711723327637, "learning_rate": 1.6761320030698388e-05, "loss": 0.0452, "step": 1200 }, { "epoch": 1.8072289156626506, "eval_accuracy": 0.979272658752591, "eval_f1": 0.9792632572070812, "eval_loss": 0.07334966957569122, "eval_matthews_correlation": 0.9587606831731992, "eval_precision": 0.9796036311772153, "eval_recall": 0.9791571559531644, "eval_runtime": 2.048, "eval_samples_per_second": 2591.3, "eval_steps_per_second": 40.527, "step": 1200 }, { "epoch": 1.9578313253012047, "grad_norm": 2.6981256008148193, "learning_rate": 1.5610130468150424e-05, "loss": 0.0308, "step": 1300 }, { "epoch": 2.108433734939759, "grad_norm": 0.08918892592191696, "learning_rate": 1.4458940905602456e-05, "loss": 0.0174, "step": 1400 }, { "epoch": 2.108433734939759, "eval_accuracy": 0.978895798002638, "eval_f1": 0.9788908805296799, "eval_loss": 0.08836409449577332, "eval_matthews_correlation": 0.9578287746494739, "eval_precision": 0.9789860333608025, "eval_recall": 0.9788427520053816, "eval_runtime": 2.0489, "eval_samples_per_second": 2590.197, "eval_steps_per_second": 40.51, "step": 1400 }, { "epoch": 2.2590361445783134, "grad_norm": 0.012705490924417973, "learning_rate": 1.3307751343054489e-05, "loss": 0.0106, "step": 1500 }, { "epoch": 2.4096385542168672, "grad_norm": 2.8612990379333496, "learning_rate": 1.2156561780506524e-05, "loss": 0.0073, "step": 1600 }, { "epoch": 2.4096385542168672, "eval_accuracy": 0.9800263802524967, "eval_f1": 0.9800205057769635, "eval_loss": 0.09836893528699875, "eval_matthews_correlation": 0.960131261055992, "eval_precision": 0.9801773617563092, "eval_recall": 0.9799539252981333, "eval_runtime": 2.0486, "eval_samples_per_second": 2590.534, "eval_steps_per_second": 40.515, "step": 1600 }, { "epoch": 2.5602409638554215, "grad_norm": 0.3261496126651764, "learning_rate": 1.1005372217958557e-05, "loss": 0.0107, "step": 1700 }, { "epoch": 2.710843373493976, "grad_norm": 4.168089389801025, "learning_rate": 9.854182655410591e-06, "loss": 0.0058, "step": 1800 }, { "epoch": 2.710843373493976, "eval_accuracy": 0.9787073676276615, "eval_f1": 0.978705903879415, "eval_loss": 0.10657216608524323, "eval_matthews_correlation": 0.9574202184236169, "eval_precision": 0.9786936551154061, "eval_recall": 0.9787265638737861, "eval_runtime": 2.0494, "eval_samples_per_second": 2589.545, "eval_steps_per_second": 40.5, "step": 1800 }, { "epoch": 2.86144578313253, "grad_norm": 0.016729481518268585, "learning_rate": 8.702993092862626e-06, "loss": 0.0088, "step": 1900 }, { "epoch": 3.0120481927710845, "grad_norm": 0.018642086535692215, "learning_rate": 7.5518035303146585e-06, "loss": 0.0061, "step": 2000 }, { "epoch": 3.0120481927710845, "eval_accuracy": 0.9783305068777087, "eval_f1": 0.9783205031761819, "eval_loss": 0.10219753533601761, "eval_matthews_correlation": 0.9568830909886441, "eval_precision": 0.9786702504272362, "eval_recall": 0.9782129498348509, "eval_runtime": 2.0489, "eval_samples_per_second": 2590.175, "eval_steps_per_second": 40.51, "step": 2000 }, { "epoch": 3.1626506024096384, "grad_norm": 0.004093084018677473, "learning_rate": 6.4006139677666924e-06, "loss": 0.001, "step": 2100 }, { "epoch": 3.3132530120481927, "grad_norm": 0.012232447974383831, "learning_rate": 5.249424405218726e-06, "loss": 0.0024, "step": 2200 }, { "epoch": 3.3132530120481927, "eval_accuracy": 0.978895798002638, "eval_f1": 0.9788895910096218, "eval_loss": 0.11258693039417267, "eval_matthews_correlation": 0.957869327344797, "eval_precision": 0.9790461316777106, "eval_recall": 0.9788232216042883, "eval_runtime": 2.0486, "eval_samples_per_second": 2590.571, "eval_steps_per_second": 40.516, "step": 2200 }, { "epoch": 3.463855421686747, "grad_norm": 0.005885094869881868, "learning_rate": 4.09823484267076e-06, "loss": 0.0005, "step": 2300 }, { "epoch": 3.6144578313253013, "grad_norm": 0.00728481262922287, "learning_rate": 2.9470452801227938e-06, "loss": 0.0023, "step": 2400 }, { "epoch": 3.6144578313253013, "eval_accuracy": 0.9790842283776144, "eval_f1": 0.9790764991424794, "eval_loss": 0.11937826871871948, "eval_matthews_correlation": 0.9583067060360234, "eval_precision": 0.9793165728379114, "eval_recall": 0.9789901887787265, "eval_runtime": 2.048, "eval_samples_per_second": 2591.335, "eval_steps_per_second": 40.528, "step": 2400 }, { "epoch": 3.765060240963855, "grad_norm": 0.00713815912604332, "learning_rate": 1.7958557175748275e-06, "loss": 0.0023, "step": 2500 }, { "epoch": 3.9156626506024095, "grad_norm": 0.007936985231935978, "learning_rate": 6.446661550268611e-07, "loss": 0.0058, "step": 2600 }, { "epoch": 3.9156626506024095, "eval_accuracy": 0.979272658752591, "eval_f1": 0.9792660147189736, "eval_loss": 0.11451391130685806, "eval_matthews_correlation": 0.9586433697976791, "eval_precision": 0.979451094051508, "eval_recall": 0.9791923106751326, "eval_runtime": 2.0485, "eval_samples_per_second": 2590.613, "eval_steps_per_second": 40.516, "step": 2600 }, { "epoch": 4.0, "step": 2656, "total_flos": 2.833046111484432e+16, "train_loss": 0.04600991761446538, "train_runtime": 331.0189, "train_samples_per_second": 512.986, "train_steps_per_second": 8.024 } ], "logging_steps": 100, "max_steps": 2656, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.833046111484432e+16, "train_batch_size": 64, "trial_name": null, "trial_params": null }