Spaces:
Runtime error
Runtime error
{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 9.9867197875166, | |
"global_step": 5640, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.04, | |
"learning_rate": 9.411764705882353e-05, | |
"loss": 1.501, | |
"step": 20 | |
}, | |
{ | |
"epoch": 0.07, | |
"learning_rate": 0.00018823529411764707, | |
"loss": 1.4128, | |
"step": 40 | |
}, | |
{ | |
"epoch": 0.11, | |
"learning_rate": 0.0002823529411764706, | |
"loss": 1.325, | |
"step": 60 | |
}, | |
{ | |
"epoch": 0.14, | |
"learning_rate": 0.00037647058823529414, | |
"loss": 1.2828, | |
"step": 80 | |
}, | |
{ | |
"epoch": 0.18, | |
"learning_rate": 0.00047058823529411766, | |
"loss": 1.2758, | |
"step": 100 | |
}, | |
{ | |
"epoch": 0.21, | |
"learning_rate": 0.0005647058823529412, | |
"loss": 1.2667, | |
"step": 120 | |
}, | |
{ | |
"epoch": 0.25, | |
"learning_rate": 0.0006588235294117648, | |
"loss": 1.2504, | |
"step": 140 | |
}, | |
{ | |
"epoch": 0.28, | |
"learning_rate": 0.0007529411764705883, | |
"loss": 1.2394, | |
"step": 160 | |
}, | |
{ | |
"epoch": 0.32, | |
"learning_rate": 0.0007999934028874321, | |
"loss": 1.228, | |
"step": 180 | |
}, | |
{ | |
"epoch": 0.35, | |
"learning_rate": 0.0007999406272925394, | |
"loss": 1.2138, | |
"step": 200 | |
}, | |
{ | |
"epoch": 0.39, | |
"learning_rate": 0.0007998350830660272, | |
"loss": 1.2122, | |
"step": 220 | |
}, | |
{ | |
"epoch": 0.42, | |
"learning_rate": 0.0007996767841335234, | |
"loss": 1.219, | |
"step": 240 | |
}, | |
{ | |
"epoch": 0.46, | |
"learning_rate": 0.0007994657513811737, | |
"loss": 1.1998, | |
"step": 260 | |
}, | |
{ | |
"epoch": 0.5, | |
"learning_rate": 0.0007992020126528848, | |
"loss": 1.188, | |
"step": 280 | |
}, | |
{ | |
"epoch": 0.53, | |
"learning_rate": 0.0007988856027466511, | |
"loss": 1.1931, | |
"step": 300 | |
}, | |
{ | |
"epoch": 0.57, | |
"learning_rate": 0.000798516563409964, | |
"loss": 1.1753, | |
"step": 320 | |
}, | |
{ | |
"epoch": 0.6, | |
"learning_rate": 0.0007980949433343026, | |
"loss": 1.173, | |
"step": 340 | |
}, | |
{ | |
"epoch": 0.64, | |
"learning_rate": 0.0007976207981487104, | |
"loss": 1.1637, | |
"step": 360 | |
}, | |
{ | |
"epoch": 0.67, | |
"learning_rate": 0.0007970941904124546, | |
"loss": 1.1651, | |
"step": 380 | |
}, | |
{ | |
"epoch": 0.71, | |
"learning_rate": 0.0007965151896067728, | |
"loss": 1.1559, | |
"step": 400 | |
}, | |
{ | |
"epoch": 0.74, | |
"learning_rate": 0.0007958838721257046, | |
"loss": 1.1658, | |
"step": 420 | |
}, | |
{ | |
"epoch": 0.78, | |
"learning_rate": 0.0007952003212660127, | |
"loss": 1.1496, | |
"step": 440 | |
}, | |
{ | |
"epoch": 0.81, | |
"learning_rate": 0.0007944646272161933, | |
"loss": 1.1471, | |
"step": 460 | |
}, | |
{ | |
"epoch": 0.85, | |
"learning_rate": 0.0007936768870445747, | |
"loss": 1.1311, | |
"step": 480 | |
}, | |
{ | |
"epoch": 0.89, | |
"learning_rate": 0.0007928372046865116, | |
"loss": 1.1301, | |
"step": 500 | |
}, | |
{ | |
"epoch": 0.92, | |
"learning_rate": 0.0007919456909306711, | |
"loss": 1.134, | |
"step": 520 | |
}, | |
{ | |
"epoch": 0.96, | |
"learning_rate": 0.0007910024634044154, | |
"loss": 1.1235, | |
"step": 540 | |
}, | |
{ | |
"epoch": 0.99, | |
"learning_rate": 0.0007900076465582816, | |
"loss": 1.1239, | |
"step": 560 | |
}, | |
{ | |
"epoch": 1.03, | |
"learning_rate": 0.0007889613716495616, | |
"loss": 1.0878, | |
"step": 580 | |
}, | |
{ | |
"epoch": 1.06, | |
"learning_rate": 0.0007878637767249839, | |
"loss": 1.0879, | |
"step": 600 | |
}, | |
{ | |
"epoch": 1.1, | |
"learning_rate": 0.0007867150066024996, | |
"loss": 1.0671, | |
"step": 620 | |
}, | |
{ | |
"epoch": 1.13, | |
"learning_rate": 0.0007855152128521754, | |
"loss": 1.0689, | |
"step": 640 | |
}, | |
{ | |
"epoch": 1.17, | |
"learning_rate": 0.0007842645537761941, | |
"loss": 1.0794, | |
"step": 660 | |
}, | |
{ | |
"epoch": 1.2, | |
"learning_rate": 0.0007829631943879694, | |
"loss": 1.0653, | |
"step": 680 | |
}, | |
{ | |
"epoch": 1.24, | |
"learning_rate": 0.0007816113063903726, | |
"loss": 1.066, | |
"step": 700 | |
}, | |
{ | |
"epoch": 1.27, | |
"learning_rate": 0.0007802090681530788, | |
"loss": 1.0675, | |
"step": 720 | |
}, | |
{ | |
"epoch": 1.31, | |
"learning_rate": 0.0007787566646890325, | |
"loss": 1.0598, | |
"step": 740 | |
}, | |
{ | |
"epoch": 1.35, | |
"learning_rate": 0.0007772542876300359, | |
"loss": 1.0669, | |
"step": 760 | |
}, | |
{ | |
"epoch": 1.38, | |
"learning_rate": 0.0007757021352014663, | |
"loss": 1.0558, | |
"step": 780 | |
}, | |
{ | |
"epoch": 1.42, | |
"learning_rate": 0.0007741004121961207, | |
"loss": 1.0578, | |
"step": 800 | |
}, | |
{ | |
"epoch": 1.45, | |
"learning_rate": 0.0007724493299471956, | |
"loss": 1.056, | |
"step": 820 | |
}, | |
{ | |
"epoch": 1.49, | |
"learning_rate": 0.0007707491063004035, | |
"loss": 1.0491, | |
"step": 840 | |
}, | |
{ | |
"epoch": 1.52, | |
"learning_rate": 0.0007689999655852306, | |
"loss": 1.0497, | |
"step": 860 | |
}, | |
{ | |
"epoch": 1.56, | |
"learning_rate": 0.0007672021385853376, | |
"loss": 1.0393, | |
"step": 880 | |
}, | |
{ | |
"epoch": 1.59, | |
"learning_rate": 0.0007653558625081099, | |
"loss": 1.0379, | |
"step": 900 | |
}, | |
{ | |
"epoch": 1.63, | |
"learning_rate": 0.0007634613809533613, | |
"loss": 1.049, | |
"step": 920 | |
}, | |
{ | |
"epoch": 1.66, | |
"learning_rate": 0.0007615189438811918, | |
"loss": 1.0594, | |
"step": 940 | |
}, | |
{ | |
"epoch": 1.7, | |
"learning_rate": 0.0007595288075790085, | |
"loss": 1.0375, | |
"step": 960 | |
}, | |
{ | |
"epoch": 1.74, | |
"learning_rate": 0.0007574912346277103, | |
"loss": 1.0453, | |
"step": 980 | |
}, | |
{ | |
"epoch": 1.77, | |
"learning_rate": 0.0007554064938670426, | |
"loss": 1.0348, | |
"step": 1000 | |
}, | |
{ | |
"epoch": 1.81, | |
"learning_rate": 0.0007532748603601265, | |
"loss": 1.0441, | |
"step": 1020 | |
}, | |
{ | |
"epoch": 1.84, | |
"learning_rate": 0.0007510966153571667, | |
"loss": 1.0237, | |
"step": 1040 | |
}, | |
{ | |
"epoch": 1.88, | |
"learning_rate": 0.000748872046258343, | |
"loss": 1.0478, | |
"step": 1060 | |
}, | |
{ | |
"epoch": 1.91, | |
"learning_rate": 0.0007466014465758899, | |
"loss": 1.0289, | |
"step": 1080 | |
}, | |
{ | |
"epoch": 1.95, | |
"learning_rate": 0.0007442851158953712, | |
"loss": 1.026, | |
"step": 1100 | |
}, | |
{ | |
"epoch": 1.98, | |
"learning_rate": 0.0007419233598361512, | |
"loss": 1.0244, | |
"step": 1120 | |
}, | |
{ | |
"epoch": 2.02, | |
"learning_rate": 0.0007395164900110721, | |
"loss": 0.9968, | |
"step": 1140 | |
}, | |
{ | |
"epoch": 2.05, | |
"learning_rate": 0.0007370648239853385, | |
"loss": 0.9798, | |
"step": 1160 | |
}, | |
{ | |
"epoch": 2.09, | |
"learning_rate": 0.0007345686852346176, | |
"loss": 0.9529, | |
"step": 1180 | |
}, | |
{ | |
"epoch": 2.12, | |
"learning_rate": 0.0007320284031023603, | |
"loss": 0.9666, | |
"step": 1200 | |
}, | |
{ | |
"epoch": 2.16, | |
"learning_rate": 0.000729444312756346, | |
"loss": 0.9797, | |
"step": 1220 | |
}, | |
{ | |
"epoch": 2.2, | |
"learning_rate": 0.0007268167551444611, | |
"loss": 0.9674, | |
"step": 1240 | |
}, | |
{ | |
"epoch": 2.23, | |
"learning_rate": 0.0007241460769497138, | |
"loss": 0.9666, | |
"step": 1260 | |
}, | |
{ | |
"epoch": 2.27, | |
"learning_rate": 0.0007214326305444917, | |
"loss": 0.9785, | |
"step": 1280 | |
}, | |
{ | |
"epoch": 2.3, | |
"learning_rate": 0.0007186767739440701, | |
"loss": 0.9629, | |
"step": 1300 | |
}, | |
{ | |
"epoch": 2.34, | |
"learning_rate": 0.0007158788707593748, | |
"loss": 0.973, | |
"step": 1320 | |
}, | |
{ | |
"epoch": 2.37, | |
"learning_rate": 0.0007130392901490069, | |
"loss": 0.9649, | |
"step": 1340 | |
}, | |
{ | |
"epoch": 2.41, | |
"learning_rate": 0.0007101584067705355, | |
"loss": 0.9766, | |
"step": 1360 | |
}, | |
{ | |
"epoch": 2.44, | |
"learning_rate": 0.0007072366007310646, | |
"loss": 0.954, | |
"step": 1380 | |
}, | |
{ | |
"epoch": 2.48, | |
"learning_rate": 0.0007042742575370822, | |
"loss": 0.9576, | |
"step": 1400 | |
}, | |
{ | |
"epoch": 2.51, | |
"learning_rate": 0.0007012717680435956, | |
"loss": 0.9783, | |
"step": 1420 | |
}, | |
{ | |
"epoch": 2.55, | |
"learning_rate": 0.0006982295284025612, | |
"loss": 0.9553, | |
"step": 1440 | |
}, | |
{ | |
"epoch": 2.59, | |
"learning_rate": 0.0006951479400106161, | |
"loss": 0.951, | |
"step": 1460 | |
}, | |
{ | |
"epoch": 2.62, | |
"learning_rate": 0.000692027409456118, | |
"loss": 0.9647, | |
"step": 1480 | |
}, | |
{ | |
"epoch": 2.66, | |
"learning_rate": 0.0006888683484654981, | |
"loss": 0.9656, | |
"step": 1500 | |
}, | |
{ | |
"epoch": 2.69, | |
"learning_rate": 0.0006856711738489386, | |
"loss": 0.9507, | |
"step": 1520 | |
}, | |
{ | |
"epoch": 2.73, | |
"learning_rate": 0.0006824363074453778, | |
"loss": 0.9496, | |
"step": 1540 | |
}, | |
{ | |
"epoch": 2.76, | |
"learning_rate": 0.0006791641760668519, | |
"loss": 0.9571, | |
"step": 1560 | |
}, | |
{ | |
"epoch": 2.8, | |
"learning_rate": 0.0006758552114421815, | |
"loss": 0.9494, | |
"step": 1580 | |
}, | |
{ | |
"epoch": 2.83, | |
"learning_rate": 0.0006725098501600088, | |
"loss": 0.9556, | |
"step": 1600 | |
}, | |
{ | |
"epoch": 2.87, | |
"learning_rate": 0.0006691285336111928, | |
"loss": 0.9411, | |
"step": 1620 | |
}, | |
{ | |
"epoch": 2.9, | |
"learning_rate": 0.0006657117079305725, | |
"loss": 0.9501, | |
"step": 1640 | |
}, | |
{ | |
"epoch": 2.94, | |
"learning_rate": 0.0006622598239381033, | |
"loss": 0.9598, | |
"step": 1660 | |
}, | |
{ | |
"epoch": 2.97, | |
"learning_rate": 0.0006587733370793743, | |
"loss": 0.9599, | |
"step": 1680 | |
}, | |
{ | |
"epoch": 3.01, | |
"learning_rate": 0.0006552527073655178, | |
"loss": 0.9306, | |
"step": 1700 | |
}, | |
{ | |
"epoch": 3.05, | |
"learning_rate": 0.0006516983993125138, | |
"loss": 0.9013, | |
"step": 1720 | |
}, | |
{ | |
"epoch": 3.08, | |
"learning_rate": 0.0006481108818799015, | |
"loss": 0.8798, | |
"step": 1740 | |
}, | |
{ | |
"epoch": 3.12, | |
"learning_rate": 0.0006444906284089044, | |
"loss": 0.903, | |
"step": 1760 | |
}, | |
{ | |
"epoch": 3.15, | |
"learning_rate": 0.000640838116559977, | |
"loss": 0.9017, | |
"step": 1780 | |
}, | |
{ | |
"epoch": 3.19, | |
"learning_rate": 0.0006371538282497815, | |
"loss": 0.9051, | |
"step": 1800 | |
}, | |
{ | |
"epoch": 3.22, | |
"learning_rate": 0.0006334382495876036, | |
"loss": 0.8966, | |
"step": 1820 | |
}, | |
{ | |
"epoch": 3.26, | |
"learning_rate": 0.0006296918708112143, | |
"loss": 0.8863, | |
"step": 1840 | |
}, | |
{ | |
"epoch": 3.29, | |
"learning_rate": 0.0006259151862221875, | |
"loss": 0.8926, | |
"step": 1860 | |
}, | |
{ | |
"epoch": 3.33, | |
"learning_rate": 0.0006221086941206817, | |
"loss": 0.908, | |
"step": 1880 | |
}, | |
{ | |
"epoch": 3.36, | |
"learning_rate": 0.0006182728967396925, | |
"loss": 0.9007, | |
"step": 1900 | |
}, | |
{ | |
"epoch": 3.4, | |
"learning_rate": 0.0006144083001787886, | |
"loss": 0.8872, | |
"step": 1920 | |
}, | |
{ | |
"epoch": 3.44, | |
"learning_rate": 0.0006105154143373362, | |
"loss": 0.8984, | |
"step": 1940 | |
}, | |
{ | |
"epoch": 3.47, | |
"learning_rate": 0.0006065947528472215, | |
"loss": 0.9123, | |
"step": 1960 | |
}, | |
{ | |
"epoch": 3.51, | |
"learning_rate": 0.0006026468330050827, | |
"loss": 0.8929, | |
"step": 1980 | |
}, | |
{ | |
"epoch": 3.54, | |
"learning_rate": 0.0005986721757040564, | |
"loss": 0.9145, | |
"step": 2000 | |
}, | |
{ | |
"epoch": 3.58, | |
"learning_rate": 0.0005946713053650507, | |
"loss": 0.8867, | |
"step": 2020 | |
}, | |
{ | |
"epoch": 3.61, | |
"learning_rate": 0.0005906447498675521, | |
"loss": 0.8914, | |
"step": 2040 | |
}, | |
{ | |
"epoch": 3.65, | |
"learning_rate": 0.0005865930404799774, | |
"loss": 0.8946, | |
"step": 2060 | |
}, | |
{ | |
"epoch": 3.68, | |
"learning_rate": 0.0005825167117895765, | |
"loss": 0.892, | |
"step": 2080 | |
}, | |
{ | |
"epoch": 3.72, | |
"learning_rate": 0.0005784163016318987, | |
"loss": 0.8875, | |
"step": 2100 | |
}, | |
{ | |
"epoch": 3.75, | |
"learning_rate": 0.0005742923510198303, | |
"loss": 0.888, | |
"step": 2120 | |
}, | |
{ | |
"epoch": 3.79, | |
"learning_rate": 0.0005701454040722124, | |
"loss": 0.9078, | |
"step": 2140 | |
}, | |
{ | |
"epoch": 3.82, | |
"learning_rate": 0.0005659760079420498, | |
"loss": 0.9027, | |
"step": 2160 | |
}, | |
{ | |
"epoch": 3.86, | |
"learning_rate": 0.000561784712744318, | |
"loss": 0.8997, | |
"step": 2180 | |
}, | |
{ | |
"epoch": 3.9, | |
"learning_rate": 0.0005575720714833808, | |
"loss": 0.9053, | |
"step": 2200 | |
}, | |
{ | |
"epoch": 3.93, | |
"learning_rate": 0.0005533386399800275, | |
"loss": 0.9054, | |
"step": 2220 | |
}, | |
{ | |
"epoch": 3.97, | |
"learning_rate": 0.0005490849767981348, | |
"loss": 0.8988, | |
"step": 2240 | |
}, | |
{ | |
"epoch": 4.0, | |
"learning_rate": 0.0005448116431709716, | |
"loss": 0.8903, | |
"step": 2260 | |
}, | |
{ | |
"epoch": 4.04, | |
"learning_rate": 0.0005405192029271477, | |
"loss": 0.8373, | |
"step": 2280 | |
}, | |
{ | |
"epoch": 4.07, | |
"learning_rate": 0.0005362082224162223, | |
"loss": 0.8336, | |
"step": 2300 | |
}, | |
{ | |
"epoch": 4.11, | |
"learning_rate": 0.0005318792704339792, | |
"loss": 0.8483, | |
"step": 2320 | |
}, | |
{ | |
"epoch": 4.14, | |
"learning_rate": 0.0005275329181473787, | |
"loss": 0.8453, | |
"step": 2340 | |
}, | |
{ | |
"epoch": 4.18, | |
"learning_rate": 0.0005231697390191976, | |
"loss": 0.8351, | |
"step": 2360 | |
}, | |
{ | |
"epoch": 4.21, | |
"learning_rate": 0.000518790308732366, | |
"loss": 0.8329, | |
"step": 2380 | |
}, | |
{ | |
"epoch": 4.25, | |
"learning_rate": 0.0005143952051140103, | |
"loss": 0.8394, | |
"step": 2400 | |
}, | |
{ | |
"epoch": 4.29, | |
"learning_rate": 0.000509985008059215, | |
"loss": 0.8526, | |
"step": 2420 | |
}, | |
{ | |
"epoch": 4.32, | |
"learning_rate": 0.0005055602994545098, | |
"loss": 0.826, | |
"step": 2440 | |
}, | |
{ | |
"epoch": 4.36, | |
"learning_rate": 0.0005011216631010953, | |
"loss": 0.849, | |
"step": 2460 | |
}, | |
{ | |
"epoch": 4.39, | |
"learning_rate": 0.0004966696846378156, | |
"loss": 0.8507, | |
"step": 2480 | |
}, | |
{ | |
"epoch": 4.43, | |
"learning_rate": 0.000492204951463888, | |
"loss": 0.8461, | |
"step": 2500 | |
}, | |
{ | |
"epoch": 4.46, | |
"learning_rate": 0.00048772805266140154, | |
"loss": 0.8533, | |
"step": 2520 | |
}, | |
{ | |
"epoch": 4.5, | |
"learning_rate": 0.00048323957891759203, | |
"loss": 0.8384, | |
"step": 2540 | |
}, | |
{ | |
"epoch": 4.53, | |
"learning_rate": 0.00047874012244690696, | |
"loss": 0.842, | |
"step": 2560 | |
}, | |
{ | |
"epoch": 4.57, | |
"learning_rate": 0.000474230276912867, | |
"loss": 0.8608, | |
"step": 2580 | |
}, | |
{ | |
"epoch": 4.6, | |
"learning_rate": 0.00046971063734973833, | |
"loss": 0.8562, | |
"step": 2600 | |
}, | |
{ | |
"epoch": 4.64, | |
"learning_rate": 0.0004651818000840229, | |
"loss": 0.8594, | |
"step": 2620 | |
}, | |
{ | |
"epoch": 4.67, | |
"learning_rate": 0.0004606443626557778, | |
"loss": 0.8608, | |
"step": 2640 | |
}, | |
{ | |
"epoch": 4.71, | |
"learning_rate": 0.0004560989237397758, | |
"loss": 0.8486, | |
"step": 2660 | |
}, | |
{ | |
"epoch": 4.75, | |
"learning_rate": 0.00045154608306651514, | |
"loss": 0.869, | |
"step": 2680 | |
}, | |
{ | |
"epoch": 4.78, | |
"learning_rate": 0.0004469864413430907, | |
"loss": 0.8482, | |
"step": 2700 | |
}, | |
{ | |
"epoch": 4.82, | |
"learning_rate": 0.00044242060017393573, | |
"loss": 0.8583, | |
"step": 2720 | |
}, | |
{ | |
"epoch": 4.85, | |
"learning_rate": 0.00043784916198144543, | |
"loss": 0.8582, | |
"step": 2740 | |
}, | |
{ | |
"epoch": 4.89, | |
"learning_rate": 0.00043327272992649317, | |
"loss": 0.8504, | |
"step": 2760 | |
}, | |
{ | |
"epoch": 4.92, | |
"learning_rate": 0.00042869190782884794, | |
"loss": 0.8592, | |
"step": 2780 | |
}, | |
{ | |
"epoch": 4.96, | |
"learning_rate": 0.00042410730008750623, | |
"loss": 0.8545, | |
"step": 2800 | |
}, | |
{ | |
"epoch": 4.99, | |
"learning_rate": 0.00041951951160094664, | |
"loss": 0.855, | |
"step": 2820 | |
}, | |
{ | |
"epoch": 5.03, | |
"learning_rate": 0.00041492914768731927, | |
"loss": 0.7869, | |
"step": 2840 | |
}, | |
{ | |
"epoch": 5.06, | |
"learning_rate": 0.0004103368140045789, | |
"loss": 0.8083, | |
"step": 2860 | |
}, | |
{ | |
"epoch": 5.1, | |
"learning_rate": 0.00040574311647057366, | |
"loss": 0.8108, | |
"step": 2880 | |
}, | |
{ | |
"epoch": 5.14, | |
"learning_rate": 0.00040114866118310045, | |
"loss": 0.7968, | |
"step": 2900 | |
}, | |
{ | |
"epoch": 5.17, | |
"learning_rate": 0.0003965540543399344, | |
"loss": 0.8016, | |
"step": 2920 | |
}, | |
{ | |
"epoch": 5.21, | |
"learning_rate": 0.00039195990215884756, | |
"loss": 0.7967, | |
"step": 2940 | |
}, | |
{ | |
"epoch": 5.24, | |
"learning_rate": 0.00038736681079762293, | |
"loss": 0.8096, | |
"step": 2960 | |
}, | |
{ | |
"epoch": 5.28, | |
"learning_rate": 0.0003827753862740779, | |
"loss": 0.8073, | |
"step": 2980 | |
}, | |
{ | |
"epoch": 5.31, | |
"learning_rate": 0.0003781862343861055, | |
"loss": 0.804, | |
"step": 3000 | |
}, | |
{ | |
"epoch": 5.35, | |
"learning_rate": 0.00037359996063174425, | |
"loss": 0.8119, | |
"step": 3020 | |
}, | |
{ | |
"epoch": 5.38, | |
"learning_rate": 0.0003690171701292887, | |
"loss": 0.7997, | |
"step": 3040 | |
}, | |
{ | |
"epoch": 5.42, | |
"learning_rate": 0.0003644384675374489, | |
"loss": 0.8202, | |
"step": 3060 | |
}, | |
{ | |
"epoch": 5.45, | |
"learning_rate": 0.0003598644569755713, | |
"loss": 0.815, | |
"step": 3080 | |
}, | |
{ | |
"epoch": 5.49, | |
"learning_rate": 0.00035529574194393033, | |
"loss": 0.825, | |
"step": 3100 | |
}, | |
{ | |
"epoch": 5.52, | |
"learning_rate": 0.00035073292524410207, | |
"loss": 0.8171, | |
"step": 3120 | |
}, | |
{ | |
"epoch": 5.56, | |
"learning_rate": 0.00034617660889943, | |
"loss": 0.7921, | |
"step": 3140 | |
}, | |
{ | |
"epoch": 5.6, | |
"learning_rate": 0.00034162739407559285, | |
"loss": 0.8299, | |
"step": 3160 | |
}, | |
{ | |
"epoch": 5.63, | |
"learning_rate": 0.0003370858810012869, | |
"loss": 0.811, | |
"step": 3180 | |
}, | |
{ | |
"epoch": 5.67, | |
"learning_rate": 0.00033255266888903006, | |
"loss": 0.8093, | |
"step": 3200 | |
}, | |
{ | |
"epoch": 5.7, | |
"learning_rate": 0.00032802835585610225, | |
"loss": 0.8106, | |
"step": 3220 | |
}, | |
{ | |
"epoch": 5.74, | |
"learning_rate": 0.00032351353884562783, | |
"loss": 0.8053, | |
"step": 3240 | |
}, | |
{ | |
"epoch": 5.77, | |
"learning_rate": 0.00031900881354781556, | |
"loss": 0.8161, | |
"step": 3260 | |
}, | |
{ | |
"epoch": 5.81, | |
"learning_rate": 0.00031451477432136154, | |
"loss": 0.8186, | |
"step": 3280 | |
}, | |
{ | |
"epoch": 5.84, | |
"learning_rate": 0.0003100320141150293, | |
"loss": 0.8046, | |
"step": 3300 | |
}, | |
{ | |
"epoch": 5.88, | |
"learning_rate": 0.00030556112438941526, | |
"loss": 0.8236, | |
"step": 3320 | |
}, | |
{ | |
"epoch": 5.91, | |
"learning_rate": 0.00030110269503891084, | |
"loss": 0.8057, | |
"step": 3340 | |
}, | |
{ | |
"epoch": 5.95, | |
"learning_rate": 0.0002966573143138713, | |
"loss": 0.8109, | |
"step": 3360 | |
}, | |
{ | |
"epoch": 5.98, | |
"learning_rate": 0.00029222556874300036, | |
"loss": 0.8163, | |
"step": 3380 | |
}, | |
{ | |
"epoch": 6.02, | |
"learning_rate": 0.0002878080430559646, | |
"loss": 0.7901, | |
"step": 3400 | |
}, | |
{ | |
"epoch": 6.06, | |
"learning_rate": 0.0002834053201062417, | |
"loss": 0.7749, | |
"step": 3420 | |
}, | |
{ | |
"epoch": 6.09, | |
"learning_rate": 0.00027901798079421977, | |
"loss": 0.7775, | |
"step": 3440 | |
}, | |
{ | |
"epoch": 6.13, | |
"learning_rate": 0.0002746466039905513, | |
"loss": 0.7589, | |
"step": 3460 | |
}, | |
{ | |
"epoch": 6.16, | |
"learning_rate": 0.000270291766459777, | |
"loss": 0.7742, | |
"step": 3480 | |
}, | |
{ | |
"epoch": 6.2, | |
"learning_rate": 0.00026595404278422684, | |
"loss": 0.7914, | |
"step": 3500 | |
}, | |
{ | |
"epoch": 6.23, | |
"learning_rate": 0.00026163400528820836, | |
"loss": 0.7909, | |
"step": 3520 | |
}, | |
{ | |
"epoch": 6.27, | |
"learning_rate": 0.0002573322239624947, | |
"loss": 0.7653, | |
"step": 3540 | |
}, | |
{ | |
"epoch": 6.3, | |
"learning_rate": 0.000253049266389118, | |
"loss": 0.7779, | |
"step": 3560 | |
}, | |
{ | |
"epoch": 6.34, | |
"learning_rate": 0.0002487856976664831, | |
"loss": 0.7771, | |
"step": 3580 | |
}, | |
{ | |
"epoch": 6.37, | |
"learning_rate": 0.00024454208033480683, | |
"loss": 0.771, | |
"step": 3600 | |
}, | |
{ | |
"epoch": 6.41, | |
"learning_rate": 0.00024031897430189695, | |
"loss": 0.7788, | |
"step": 3620 | |
}, | |
{ | |
"epoch": 6.45, | |
"learning_rate": 0.00023611693676927606, | |
"loss": 0.7798, | |
"step": 3640 | |
}, | |
{ | |
"epoch": 6.48, | |
"learning_rate": 0.00023193652215866429, | |
"loss": 0.7779, | |
"step": 3660 | |
}, | |
{ | |
"epoch": 6.52, | |
"learning_rate": 0.00022777828203882875, | |
"loss": 0.7784, | |
"step": 3680 | |
}, | |
{ | |
"epoch": 6.55, | |
"learning_rate": 0.00022364276505280794, | |
"loss": 0.7946, | |
"step": 3700 | |
}, | |
{ | |
"epoch": 6.59, | |
"learning_rate": 0.0002195305168455239, | |
"loss": 0.774, | |
"step": 3720 | |
}, | |
{ | |
"epoch": 6.62, | |
"learning_rate": 0.00021544207999178917, | |
"loss": 0.7823, | |
"step": 3740 | |
}, | |
{ | |
"epoch": 6.66, | |
"learning_rate": 0.00021137799392471814, | |
"loss": 0.7779, | |
"step": 3760 | |
}, | |
{ | |
"epoch": 6.69, | |
"learning_rate": 0.00020733879486455433, | |
"loss": 0.7867, | |
"step": 3780 | |
}, | |
{ | |
"epoch": 6.73, | |
"learning_rate": 0.0002033250157479206, | |
"loss": 0.7738, | |
"step": 3800 | |
}, | |
{ | |
"epoch": 6.76, | |
"learning_rate": 0.0001993371861575028, | |
"loss": 0.7814, | |
"step": 3820 | |
}, | |
{ | |
"epoch": 6.8, | |
"learning_rate": 0.00019537583225217605, | |
"loss": 0.7695, | |
"step": 3840 | |
}, | |
{ | |
"epoch": 6.83, | |
"learning_rate": 0.00019144147669758322, | |
"loss": 0.7846, | |
"step": 3860 | |
}, | |
{ | |
"epoch": 6.87, | |
"learning_rate": 0.00018753463859717283, | |
"loss": 0.7779, | |
"step": 3880 | |
}, | |
{ | |
"epoch": 6.91, | |
"learning_rate": 0.0001836558334237088, | |
"loss": 0.7769, | |
"step": 3900 | |
}, | |
{ | |
"epoch": 6.94, | |
"learning_rate": 0.0001798055729512579, | |
"loss": 0.782, | |
"step": 3920 | |
}, | |
{ | |
"epoch": 6.98, | |
"learning_rate": 0.00017598436518766596, | |
"loss": 0.7683, | |
"step": 3940 | |
}, | |
{ | |
"epoch": 7.01, | |
"learning_rate": 0.0001721927143075305, | |
"loss": 0.7674, | |
"step": 3960 | |
}, | |
{ | |
"epoch": 7.05, | |
"learning_rate": 0.00016843112058567935, | |
"loss": 0.7427, | |
"step": 3980 | |
}, | |
{ | |
"epoch": 7.08, | |
"learning_rate": 0.00016470008033116443, | |
"loss": 0.7627, | |
"step": 4000 | |
}, | |
{ | |
"epoch": 7.12, | |
"learning_rate": 0.00016100008582177705, | |
"loss": 0.7541, | |
"step": 4020 | |
}, | |
{ | |
"epoch": 7.15, | |
"learning_rate": 0.00015733162523909707, | |
"loss": 0.7654, | |
"step": 4040 | |
}, | |
{ | |
"epoch": 7.19, | |
"learning_rate": 0.0001536951826040813, | |
"loss": 0.7382, | |
"step": 4060 | |
}, | |
{ | |
"epoch": 7.22, | |
"learning_rate": 0.0001500912377132013, | |
"loss": 0.7555, | |
"step": 4080 | |
}, | |
{ | |
"epoch": 7.26, | |
"learning_rate": 0.00014652026607513848, | |
"loss": 0.748, | |
"step": 4100 | |
}, | |
{ | |
"epoch": 7.3, | |
"learning_rate": 0.00014298273884804478, | |
"loss": 0.753, | |
"step": 4120 | |
}, | |
{ | |
"epoch": 7.33, | |
"learning_rate": 0.00013947912277737808, | |
"loss": 0.7594, | |
"step": 4140 | |
}, | |
{ | |
"epoch": 7.37, | |
"learning_rate": 0.00013600988013431832, | |
"loss": 0.7387, | |
"step": 4160 | |
}, | |
{ | |
"epoch": 7.4, | |
"learning_rate": 0.00013257546865477572, | |
"loss": 0.7551, | |
"step": 4180 | |
}, | |
{ | |
"epoch": 7.44, | |
"learning_rate": 0.00012917634147899607, | |
"loss": 0.7609, | |
"step": 4200 | |
}, | |
{ | |
"epoch": 7.47, | |
"learning_rate": 0.00012581294709177327, | |
"loss": 0.762, | |
"step": 4220 | |
}, | |
{ | |
"epoch": 7.51, | |
"learning_rate": 0.00012248572926327537, | |
"loss": 0.755, | |
"step": 4240 | |
}, | |
{ | |
"epoch": 7.54, | |
"learning_rate": 0.00011919512699049314, | |
"loss": 0.753, | |
"step": 4260 | |
}, | |
{ | |
"epoch": 7.58, | |
"learning_rate": 0.00011594157443931872, | |
"loss": 0.7603, | |
"step": 4280 | |
}, | |
{ | |
"epoch": 7.61, | |
"learning_rate": 0.0001127255008872604, | |
"loss": 0.7565, | |
"step": 4300 | |
}, | |
{ | |
"epoch": 7.65, | |
"learning_rate": 0.00010954733066680401, | |
"loss": 0.7542, | |
"step": 4320 | |
}, | |
{ | |
"epoch": 7.68, | |
"learning_rate": 0.00010640748310942559, | |
"loss": 0.7674, | |
"step": 4340 | |
}, | |
{ | |
"epoch": 7.72, | |
"learning_rate": 0.00010330637249026445, | |
"loss": 0.7447, | |
"step": 4360 | |
}, | |
{ | |
"epoch": 7.76, | |
"learning_rate": 0.00010024440797346324, | |
"loss": 0.7582, | |
"step": 4380 | |
}, | |
{ | |
"epoch": 7.79, | |
"learning_rate": 9.722199355818227e-05, | |
"loss": 0.7598, | |
"step": 4400 | |
}, | |
{ | |
"epoch": 7.83, | |
"learning_rate": 9.423952802529564e-05, | |
"loss": 0.7389, | |
"step": 4420 | |
}, | |
{ | |
"epoch": 7.86, | |
"learning_rate": 9.129740488477518e-05, | |
"loss": 0.759, | |
"step": 4440 | |
}, | |
{ | |
"epoch": 7.9, | |
"learning_rate": 8.83960123237706e-05, | |
"loss": 0.7442, | |
"step": 4460 | |
}, | |
{ | |
"epoch": 7.93, | |
"learning_rate": 8.553573315539188e-05, | |
"loss": 0.7627, | |
"step": 4480 | |
}, | |
{ | |
"epoch": 7.97, | |
"learning_rate": 8.271694476819956e-05, | |
"loss": 0.7531, | |
"step": 4500 | |
}, | |
{ | |
"epoch": 8.0, | |
"learning_rate": 7.994001907641262e-05, | |
"loss": 0.7567, | |
"step": 4520 | |
}, | |
{ | |
"epoch": 8.04, | |
"learning_rate": 7.720532247083743e-05, | |
"loss": 0.7274, | |
"step": 4540 | |
}, | |
{ | |
"epoch": 8.07, | |
"learning_rate": 7.451321577052533e-05, | |
"loss": 0.7347, | |
"step": 4560 | |
}, | |
{ | |
"epoch": 8.11, | |
"learning_rate": 7.18640541751661e-05, | |
"loss": 0.752, | |
"step": 4580 | |
}, | |
{ | |
"epoch": 8.15, | |
"learning_rate": 6.925818721822239e-05, | |
"loss": 0.7293, | |
"step": 4600 | |
}, | |
{ | |
"epoch": 8.18, | |
"learning_rate": 6.669595872081211e-05, | |
"loss": 0.7213, | |
"step": 4620 | |
}, | |
{ | |
"epoch": 8.22, | |
"learning_rate": 6.417770674634365e-05, | |
"loss": 0.7246, | |
"step": 4640 | |
}, | |
{ | |
"epoch": 8.25, | |
"learning_rate": 6.170376355591204e-05, | |
"loss": 0.7353, | |
"step": 4660 | |
}, | |
{ | |
"epoch": 8.29, | |
"learning_rate": 5.9274455564459896e-05, | |
"loss": 0.7396, | |
"step": 4680 | |
}, | |
{ | |
"epoch": 8.32, | |
"learning_rate": 5.689010329770965e-05, | |
"loss": 0.7401, | |
"step": 4700 | |
}, | |
{ | |
"epoch": 8.36, | |
"learning_rate": 5.455102134987304e-05, | |
"loss": 0.7399, | |
"step": 4720 | |
}, | |
{ | |
"epoch": 8.39, | |
"learning_rate": 5.225751834214339e-05, | |
"loss": 0.7427, | |
"step": 4740 | |
}, | |
{ | |
"epoch": 8.43, | |
"learning_rate": 5.000989688197555e-05, | |
"loss": 0.7289, | |
"step": 4760 | |
}, | |
{ | |
"epoch": 8.46, | |
"learning_rate": 4.780845352315968e-05, | |
"loss": 0.7398, | |
"step": 4780 | |
}, | |
{ | |
"epoch": 8.5, | |
"learning_rate": 4.565347872669339e-05, | |
"loss": 0.7332, | |
"step": 4800 | |
}, | |
{ | |
"epoch": 8.53, | |
"learning_rate": 4.3545256822458445e-05, | |
"loss": 0.7365, | |
"step": 4820 | |
}, | |
{ | |
"epoch": 8.57, | |
"learning_rate": 4.148406597170529e-05, | |
"loss": 0.7347, | |
"step": 4840 | |
}, | |
{ | |
"epoch": 8.61, | |
"learning_rate": 3.947017813035254e-05, | |
"loss": 0.7515, | |
"step": 4860 | |
}, | |
{ | |
"epoch": 8.64, | |
"learning_rate": 3.7503859013104806e-05, | |
"loss": 0.7397, | |
"step": 4880 | |
}, | |
{ | |
"epoch": 8.68, | |
"learning_rate": 3.5585368058393834e-05, | |
"loss": 0.7597, | |
"step": 4900 | |
}, | |
{ | |
"epoch": 8.71, | |
"learning_rate": 3.3714958394147975e-05, | |
"loss": 0.7391, | |
"step": 4920 | |
}, | |
{ | |
"epoch": 8.75, | |
"learning_rate": 3.1892876804394144e-05, | |
"loss": 0.7359, | |
"step": 4940 | |
}, | |
{ | |
"epoch": 8.78, | |
"learning_rate": 3.0119363696697078e-05, | |
"loss": 0.7412, | |
"step": 4960 | |
}, | |
{ | |
"epoch": 8.82, | |
"learning_rate": 2.839465307043927e-05, | |
"loss": 0.7485, | |
"step": 4980 | |
}, | |
{ | |
"epoch": 8.85, | |
"learning_rate": 2.6718972485947037e-05, | |
"loss": 0.7534, | |
"step": 5000 | |
}, | |
{ | |
"epoch": 8.89, | |
"learning_rate": 2.5092543034466264e-05, | |
"loss": 0.7405, | |
"step": 5020 | |
}, | |
{ | |
"epoch": 8.92, | |
"learning_rate": 2.3515579308990597e-05, | |
"loss": 0.7415, | |
"step": 5040 | |
}, | |
{ | |
"epoch": 8.96, | |
"learning_rate": 2.1988289375948524e-05, | |
"loss": 0.7309, | |
"step": 5060 | |
}, | |
{ | |
"epoch": 9.0, | |
"learning_rate": 2.0510874747750575e-05, | |
"loss": 0.7418, | |
"step": 5080 | |
}, | |
{ | |
"epoch": 9.03, | |
"learning_rate": 1.9083530356201407e-05, | |
"loss": 0.7469, | |
"step": 5100 | |
}, | |
{ | |
"epoch": 9.07, | |
"learning_rate": 1.7706444526780585e-05, | |
"loss": 0.7385, | |
"step": 5120 | |
}, | |
{ | |
"epoch": 9.1, | |
"learning_rate": 1.637979895379429e-05, | |
"loss": 0.7333, | |
"step": 5140 | |
}, | |
{ | |
"epoch": 9.14, | |
"learning_rate": 1.5103768676402885e-05, | |
"loss": 0.7314, | |
"step": 5160 | |
}, | |
{ | |
"epoch": 9.17, | |
"learning_rate": 1.38785220555254e-05, | |
"loss": 0.74, | |
"step": 5180 | |
}, | |
{ | |
"epoch": 9.21, | |
"learning_rate": 1.270422075162645e-05, | |
"loss": 0.7481, | |
"step": 5200 | |
}, | |
{ | |
"epoch": 9.24, | |
"learning_rate": 1.1581019703386143e-05, | |
"loss": 0.7222, | |
"step": 5220 | |
}, | |
{ | |
"epoch": 9.28, | |
"learning_rate": 1.0509067107257365e-05, | |
"loss": 0.7259, | |
"step": 5240 | |
}, | |
{ | |
"epoch": 9.31, | |
"learning_rate": 9.488504397912712e-06, | |
"loss": 0.7309, | |
"step": 5260 | |
}, | |
{ | |
"epoch": 9.35, | |
"learning_rate": 8.51946622958324e-06, | |
"loss": 0.7212, | |
"step": 5280 | |
}, | |
{ | |
"epoch": 9.38, | |
"learning_rate": 7.602080458292227e-06, | |
"loss": 0.7254, | |
"step": 5300 | |
}, | |
{ | |
"epoch": 9.42, | |
"learning_rate": 6.7364681249854735e-06, | |
"loss": 0.728, | |
"step": 5320 | |
}, | |
{ | |
"epoch": 9.46, | |
"learning_rate": 5.922743439561229e-06, | |
"loss": 0.7341, | |
"step": 5340 | |
}, | |
{ | |
"epoch": 9.49, | |
"learning_rate": 5.161013765801137e-06, | |
"loss": 0.721, | |
"step": 5360 | |
}, | |
{ | |
"epoch": 9.53, | |
"learning_rate": 4.451379607204453e-06, | |
"loss": 0.725, | |
"step": 5380 | |
}, | |
{ | |
"epoch": 9.56, | |
"learning_rate": 3.7939345937275884e-06, | |
"loss": 0.7399, | |
"step": 5400 | |
}, | |
{ | |
"epoch": 9.6, | |
"learning_rate": 3.1887654694303883e-06, | |
"loss": 0.7301, | |
"step": 5420 | |
}, | |
{ | |
"epoch": 9.63, | |
"learning_rate": 2.635952081031201e-06, | |
"loss": 0.7222, | |
"step": 5440 | |
}, | |
{ | |
"epoch": 9.67, | |
"learning_rate": 2.1355673673715716e-06, | |
"loss": 0.7308, | |
"step": 5460 | |
}, | |
{ | |
"epoch": 9.7, | |
"learning_rate": 1.6876773497926046e-06, | |
"loss": 0.7417, | |
"step": 5480 | |
}, | |
{ | |
"epoch": 9.74, | |
"learning_rate": 1.292341123424423e-06, | |
"loss": 0.7348, | |
"step": 5500 | |
}, | |
{ | |
"epoch": 9.77, | |
"learning_rate": 9.496108493884936e-07, | |
"loss": 0.7278, | |
"step": 5520 | |
}, | |
{ | |
"epoch": 9.81, | |
"learning_rate": 6.595317479159313e-07, | |
"loss": 0.7257, | |
"step": 5540 | |
}, | |
{ | |
"epoch": 9.85, | |
"learning_rate": 4.2214209238085054e-07, | |
"loss": 0.7296, | |
"step": 5560 | |
}, | |
{ | |
"epoch": 9.88, | |
"learning_rate": 2.3747320425053786e-07, | |
"loss": 0.7301, | |
"step": 5580 | |
}, | |
{ | |
"epoch": 9.92, | |
"learning_rate": 1.0554944895293517e-07, | |
"loss": 0.739, | |
"step": 5600 | |
}, | |
{ | |
"epoch": 9.95, | |
"learning_rate": 2.638823266174484e-08, | |
"loss": 0.7284, | |
"step": 5620 | |
}, | |
{ | |
"epoch": 9.99, | |
"learning_rate": 0.0, | |
"loss": 0.7294, | |
"step": 5640 | |
}, | |
{ | |
"epoch": 9.99, | |
"step": 5640, | |
"total_flos": 1.4686171215861645e+19, | |
"train_loss": 0.8784972819876163, | |
"train_runtime": 13393.5014, | |
"train_samples_per_second": 53.949, | |
"train_steps_per_second": 0.421 | |
} | |
], | |
"max_steps": 5640, | |
"num_train_epochs": 10, | |
"total_flos": 1.4686171215861645e+19, | |
"trial_name": null, | |
"trial_params": null | |
} | |