picth_vision_women_checkpoint_1 / trainer_state.json
miladfa7's picture
Training in progress, epoch 1
7c7a5d9 verified
{
"best_global_step": 3138,
"best_metric": 0.9956178790534619,
"best_model_checkpoint": "/mnt/disk2/users/milad/cricket/PitchVision/models/picth_vision_women_checkpoint_1/checkpoint-3138",
"epoch": 1.5,
"eval_steps": 500,
"global_step": 6276,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.012746972594008922,
"grad_norm": 2.4470611606375314e-05,
"learning_rate": 6.289808917197452e-06,
"loss": 0.0,
"step": 80
},
{
"epoch": 0.025493945188017845,
"grad_norm": 1.2009478268737439e-05,
"learning_rate": 1.265923566878981e-05,
"loss": 0.0,
"step": 160
},
{
"epoch": 0.03824091778202677,
"grad_norm": 9.67565028986428e-06,
"learning_rate": 1.902866242038217e-05,
"loss": 0.0,
"step": 240
},
{
"epoch": 0.05098789037603569,
"grad_norm": 1.1691465260810219e-05,
"learning_rate": 2.5398089171974526e-05,
"loss": 0.0,
"step": 320
},
{
"epoch": 0.06373486297004462,
"grad_norm": 1.419114869349869e-05,
"learning_rate": 3.1767515923566885e-05,
"loss": 0.0,
"step": 400
},
{
"epoch": 0.07648183556405354,
"grad_norm": 2.7463556762086228e-06,
"learning_rate": 3.8136942675159234e-05,
"loss": 0.0,
"step": 480
},
{
"epoch": 0.08922880815806246,
"grad_norm": 1.7475674667366548e-06,
"learning_rate": 4.450636942675159e-05,
"loss": 0.0,
"step": 560
},
{
"epoch": 0.10197578075207138,
"grad_norm": 6.684086315544846e-07,
"learning_rate": 4.990262039660057e-05,
"loss": 0.0,
"step": 640
},
{
"epoch": 0.1147227533460803,
"grad_norm": 2.5132344489975367e-06,
"learning_rate": 4.9194405099150144e-05,
"loss": 0.0,
"step": 720
},
{
"epoch": 0.12746972594008923,
"grad_norm": 1.2890516245533945e-06,
"learning_rate": 4.8486189801699724e-05,
"loss": 0.0,
"step": 800
},
{
"epoch": 0.14021669853409816,
"grad_norm": 5.019840045861201e-07,
"learning_rate": 4.7777974504249296e-05,
"loss": 0.0,
"step": 880
},
{
"epoch": 0.15296367112810708,
"grad_norm": 4.736523635529011e-07,
"learning_rate": 4.706975920679887e-05,
"loss": 0.0,
"step": 960
},
{
"epoch": 0.165710643722116,
"grad_norm": 3.833743278391921e-07,
"learning_rate": 4.636154390934844e-05,
"loss": 0.0,
"step": 1040
},
{
"epoch": 0.17845761631612492,
"grad_norm": 4.709651193479658e-07,
"learning_rate": 4.565332861189802e-05,
"loss": 0.0,
"step": 1120
},
{
"epoch": 0.19120458891013384,
"grad_norm": 3.1532042044091213e-07,
"learning_rate": 4.49451133144476e-05,
"loss": 0.0,
"step": 1200
},
{
"epoch": 0.20395156150414276,
"grad_norm": 2.958893503546278e-07,
"learning_rate": 4.423689801699717e-05,
"loss": 0.0,
"step": 1280
},
{
"epoch": 0.21669853409815168,
"grad_norm": 3.109137480805657e-07,
"learning_rate": 4.352868271954674e-05,
"loss": 0.0,
"step": 1360
},
{
"epoch": 0.2294455066921606,
"grad_norm": 5.809162075820495e-07,
"learning_rate": 4.282046742209632e-05,
"loss": 0.0,
"step": 1440
},
{
"epoch": 0.24219247928616955,
"grad_norm": 2.315806000297016e-07,
"learning_rate": 4.2112252124645894e-05,
"loss": 0.0,
"step": 1520
},
{
"epoch": 0.25493945188017847,
"grad_norm": 2.066967539349207e-07,
"learning_rate": 4.1404036827195467e-05,
"loss": 0.0,
"step": 1600
},
{
"epoch": 0.2676864244741874,
"grad_norm": 1.909528322130427e-07,
"learning_rate": 4.0695821529745046e-05,
"loss": 0.0,
"step": 1680
},
{
"epoch": 0.2804333970681963,
"grad_norm": 2.6134313202419435e-07,
"learning_rate": 3.9987606232294625e-05,
"loss": 0.0,
"step": 1760
},
{
"epoch": 0.29318036966220523,
"grad_norm": 1.887009801748718e-07,
"learning_rate": 3.92793909348442e-05,
"loss": 0.0,
"step": 1840
},
{
"epoch": 0.30592734225621415,
"grad_norm": 2.4859255631781707e-07,
"learning_rate": 3.857117563739377e-05,
"loss": 0.0,
"step": 1920
},
{
"epoch": 0.3186743148502231,
"grad_norm": 2.077134269029557e-07,
"learning_rate": 3.786296033994334e-05,
"loss": 0.0,
"step": 2000
},
{
"epoch": 0.331421287444232,
"grad_norm": 1.5502992312121933e-07,
"learning_rate": 3.715474504249292e-05,
"loss": 0.0,
"step": 2080
},
{
"epoch": 0.3441682600382409,
"grad_norm": 1.4760372835098678e-07,
"learning_rate": 3.644652974504249e-05,
"loss": 0.0,
"step": 2160
},
{
"epoch": 0.35691523263224983,
"grad_norm": 1.3052418523784581e-07,
"learning_rate": 3.573831444759207e-05,
"loss": 0.0,
"step": 2240
},
{
"epoch": 0.36966220522625876,
"grad_norm": 1.6187010487556108e-07,
"learning_rate": 3.5030099150141644e-05,
"loss": 0.0,
"step": 2320
},
{
"epoch": 0.3824091778202677,
"grad_norm": 1.5073992187808471e-07,
"learning_rate": 3.432188385269122e-05,
"loss": 0.0,
"step": 2400
},
{
"epoch": 0.3951561504142766,
"grad_norm": 1.3718963032260945e-07,
"learning_rate": 3.3613668555240795e-05,
"loss": 0.0,
"step": 2480
},
{
"epoch": 0.4079031230082855,
"grad_norm": 1.8094301879045815e-07,
"learning_rate": 3.290545325779037e-05,
"loss": 0.0,
"step": 2560
},
{
"epoch": 0.42065009560229444,
"grad_norm": 1.7407593588814052e-07,
"learning_rate": 3.219723796033994e-05,
"loss": 0.0,
"step": 2640
},
{
"epoch": 0.43339706819630336,
"grad_norm": 1.1643874842093282e-07,
"learning_rate": 3.148902266288952e-05,
"loss": 0.0,
"step": 2720
},
{
"epoch": 0.4461440407903123,
"grad_norm": 1.2781330838151916e-07,
"learning_rate": 3.07808073654391e-05,
"loss": 0.0,
"step": 2800
},
{
"epoch": 0.4588910133843212,
"grad_norm": 9.686536373010313e-08,
"learning_rate": 3.007259206798867e-05,
"loss": 0.0,
"step": 2880
},
{
"epoch": 0.4716379859783301,
"grad_norm": 1.1713498793142207e-07,
"learning_rate": 2.9364376770538243e-05,
"loss": 0.0,
"step": 2960
},
{
"epoch": 0.4843849585723391,
"grad_norm": 9.917301468931328e-08,
"learning_rate": 2.8656161473087822e-05,
"loss": 0.0,
"step": 3040
},
{
"epoch": 0.497131931166348,
"grad_norm": 1.1340028294171134e-07,
"learning_rate": 2.7947946175637397e-05,
"loss": 0.0,
"step": 3120
},
{
"epoch": 0.5,
"eval_accuracy": 0.9956178790534619,
"eval_loss": 0.054073479026556015,
"eval_runtime": 661.1375,
"eval_samples_per_second": 1.726,
"eval_steps_per_second": 0.433,
"step": 3138
},
{
"epoch": 1.0098789037603568,
"grad_norm": 1.056812664046447e-07,
"learning_rate": 2.723973087818697e-05,
"loss": 0.0,
"step": 3200
},
{
"epoch": 1.0226258763543659,
"grad_norm": 2.472717142154579e-07,
"learning_rate": 2.6531515580736542e-05,
"loss": 0.0,
"step": 3280
},
{
"epoch": 1.0353728489483747,
"grad_norm": 9.554004520850867e-08,
"learning_rate": 2.582330028328612e-05,
"loss": 0.0,
"step": 3360
},
{
"epoch": 1.0481198215423837,
"grad_norm": 1.1742042715923162e-07,
"learning_rate": 2.5115084985835697e-05,
"loss": 0.0,
"step": 3440
},
{
"epoch": 1.0608667941363925,
"grad_norm": 8.59054054558328e-08,
"learning_rate": 2.440686968838527e-05,
"loss": 0.0,
"step": 3520
},
{
"epoch": 1.0736137667304015,
"grad_norm": 7.50477227029478e-08,
"learning_rate": 2.3698654390934848e-05,
"loss": 0.0,
"step": 3600
},
{
"epoch": 1.0863607393244104,
"grad_norm": 9.754602103839716e-08,
"learning_rate": 2.299043909348442e-05,
"loss": 0.0,
"step": 3680
},
{
"epoch": 1.0991077119184194,
"grad_norm": 6.827690413047094e-08,
"learning_rate": 2.2282223796033996e-05,
"loss": 0.0,
"step": 3760
},
{
"epoch": 1.1118546845124282,
"grad_norm": 7.005213120692133e-08,
"learning_rate": 2.157400849858357e-05,
"loss": 0.0,
"step": 3840
},
{
"epoch": 1.1246016571064372,
"grad_norm": 6.759808712786253e-08,
"learning_rate": 2.0865793201133147e-05,
"loss": 0.0,
"step": 3920
},
{
"epoch": 1.1373486297004463,
"grad_norm": 8.198840362183546e-08,
"learning_rate": 2.015757790368272e-05,
"loss": 0.0,
"step": 4000
},
{
"epoch": 1.150095602294455,
"grad_norm": 9.729581762485395e-08,
"learning_rate": 1.9449362606232295e-05,
"loss": 0.0,
"step": 4080
},
{
"epoch": 1.1628425748884639,
"grad_norm": 6.136054508942834e-08,
"learning_rate": 1.874114730878187e-05,
"loss": 0.0,
"step": 4160
},
{
"epoch": 1.175589547482473,
"grad_norm": 6.17125976987154e-08,
"learning_rate": 1.8032932011331446e-05,
"loss": 0.0,
"step": 4240
},
{
"epoch": 1.188336520076482,
"grad_norm": 5.855752505112832e-08,
"learning_rate": 1.732471671388102e-05,
"loss": 0.0,
"step": 4320
},
{
"epoch": 1.2010834926704907,
"grad_norm": 6.57473364640282e-08,
"learning_rate": 1.6616501416430598e-05,
"loss": 0.0,
"step": 4400
},
{
"epoch": 1.2138304652644996,
"grad_norm": 5.85948498610378e-08,
"learning_rate": 1.590828611898017e-05,
"loss": 0.0,
"step": 4480
},
{
"epoch": 1.2265774378585086,
"grad_norm": 5.247872891800398e-08,
"learning_rate": 1.5200070821529747e-05,
"loss": 0.0,
"step": 4560
},
{
"epoch": 1.2393244104525176,
"grad_norm": 6.339619318396217e-08,
"learning_rate": 1.449185552407932e-05,
"loss": 0.0,
"step": 4640
},
{
"epoch": 1.2520713830465264,
"grad_norm": 5.694401394862325e-08,
"learning_rate": 1.3783640226628897e-05,
"loss": 0.0,
"step": 4720
},
{
"epoch": 1.2648183556405352,
"grad_norm": 8.568391507424167e-08,
"learning_rate": 1.3075424929178471e-05,
"loss": 0.0,
"step": 4800
},
{
"epoch": 1.2775653282345443,
"grad_norm": 6.176901479193475e-08,
"learning_rate": 1.2367209631728045e-05,
"loss": 0.0,
"step": 4880
},
{
"epoch": 1.2903123008285533,
"grad_norm": 4.9299163862315254e-08,
"learning_rate": 1.165899433427762e-05,
"loss": 0.0,
"step": 4960
},
{
"epoch": 1.3030592734225621,
"grad_norm": 8.519159422348821e-08,
"learning_rate": 1.0950779036827195e-05,
"loss": 0.0,
"step": 5040
},
{
"epoch": 1.3158062460165711,
"grad_norm": 5.331121144536155e-08,
"learning_rate": 1.024256373937677e-05,
"loss": 0.0,
"step": 5120
},
{
"epoch": 1.32855321861058,
"grad_norm": 5.610190356719613e-08,
"learning_rate": 9.534348441926346e-06,
"loss": 0.0,
"step": 5200
},
{
"epoch": 1.341300191204589,
"grad_norm": 4.2810885503286045e-08,
"learning_rate": 8.82613314447592e-06,
"loss": 0.0,
"step": 5280
},
{
"epoch": 1.3540471637985978,
"grad_norm": 4.60853151196261e-08,
"learning_rate": 8.117917847025496e-06,
"loss": 0.0,
"step": 5360
},
{
"epoch": 1.3667941363926068,
"grad_norm": 5.44949720904242e-08,
"learning_rate": 7.40970254957507e-06,
"loss": 0.0,
"step": 5440
},
{
"epoch": 1.3795411089866156,
"grad_norm": 4.676035914030763e-08,
"learning_rate": 6.701487252124647e-06,
"loss": 0.0,
"step": 5520
},
{
"epoch": 1.3922880815806247,
"grad_norm": 4.139203113595613e-08,
"learning_rate": 5.993271954674221e-06,
"loss": 0.0,
"step": 5600
},
{
"epoch": 1.4050350541746335,
"grad_norm": 4.362629724141698e-08,
"learning_rate": 5.2850566572237965e-06,
"loss": 0.0,
"step": 5680
},
{
"epoch": 1.4177820267686425,
"grad_norm": 4.268404296681183e-08,
"learning_rate": 4.576841359773371e-06,
"loss": 0.0,
"step": 5760
},
{
"epoch": 1.4305289993626513,
"grad_norm": 4.0364348308230547e-08,
"learning_rate": 3.868626062322946e-06,
"loss": 0.0,
"step": 5840
},
{
"epoch": 1.4432759719566604,
"grad_norm": 5.122909740862269e-08,
"learning_rate": 3.1604107648725214e-06,
"loss": 0.0,
"step": 5920
},
{
"epoch": 1.4560229445506692,
"grad_norm": 4.077623572129596e-08,
"learning_rate": 2.452195467422096e-06,
"loss": 0.0,
"step": 6000
},
{
"epoch": 1.4687699171446782,
"grad_norm": 4.021276112098349e-08,
"learning_rate": 1.7439801699716714e-06,
"loss": 0.0,
"step": 6080
},
{
"epoch": 1.481516889738687,
"grad_norm": 7.456922190840487e-08,
"learning_rate": 1.0357648725212467e-06,
"loss": 0.0,
"step": 6160
},
{
"epoch": 1.494263862332696,
"grad_norm": 4.9845471750131765e-08,
"learning_rate": 3.275495750708216e-07,
"loss": 0.0,
"step": 6240
},
{
"epoch": 1.5,
"eval_accuracy": 0.9956178790534619,
"eval_loss": 0.05542786046862602,
"eval_runtime": 628.2348,
"eval_samples_per_second": 1.816,
"eval_steps_per_second": 0.455,
"step": 6276
},
{
"epoch": 1.5,
"step": 6276,
"total_flos": 3.128120545409866e+19,
"train_loss": 5.207791438765269e-08,
"train_runtime": 12106.0186,
"train_samples_per_second": 2.074,
"train_steps_per_second": 0.518
},
{
"epoch": 1.5,
"eval_accuracy": 0.9956178790534619,
"eval_loss": 0.054073482751846313,
"eval_runtime": 611.7915,
"eval_samples_per_second": 1.865,
"eval_steps_per_second": 0.467,
"step": 6276
}
],
"logging_steps": 80,
"max_steps": 6276,
"num_input_tokens_seen": 0,
"num_train_epochs": 9223372036854775807,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.128120545409866e+19,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}