mlplayer-top5 / eval.log
davidquarel's picture
Upload folder using huggingface_hub
be4085b verified
type eval | step 0 | loss 79.9867 138.0471 290.0014 391.2957 | checkpoint False | ce_loss 1.5684 | sae_losses 79.7252 0.2616 136.0259 2.0211 285.0749 4.9263 363.8107 27.4848 | ce_loss_increases 2.3840 3.3337 3.3473 2.7219 | compound_ce_loss_increase 4.2002 | l0s 5.0925 5.1237 5.0739 5.0805 5.1016 5.0916 5.0892 5.0852 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 159.6309 530.3308 731.3961 1304.6327 | recon_l2 79.7252 0.2616 136.0259 2.0211 285.0749 4.9263 363.8107 27.4848
type eval | step 0 | loss 89.8302 141.9365 287.0340 410.3918 | checkpoint False | ce_loss 1.5684 | sae_losses 89.5180 0.3122 139.9229 2.0137 282.0403 4.9939 383.9103 26.4814 | ce_loss_increases 2.3990 3.1607 3.3752 2.7120 | compound_ce_loss_increase 4.0803 | l0s 5.0897 5.0897 5.0744 5.0888 5.1042 5.0937 5.0892 5.0860 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 184.2288 516.1461 734.7141 1177.3376 | recon_l2 89.5180 0.3122 139.9229 2.0137 282.0403 4.9939 383.9103 26.4814
type eval | step 250 | loss 21.8788 43.1939 96.7664 145.9026 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 21.8567 0.0222 42.6814 0.5125 95.0570 1.7095 139.4156 6.4870 | ce_loss_increases 0.3521 1.9284 2.2386 1.9695 | compound_ce_loss_increase 3.8136 | l0s 5.0002 5.0000 5.0000 5.0000 5.0000 5.0000 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 124.7379 346.9344 502.8258 712.3646 | recon_l2 21.8567 0.0222 42.6814 0.5125 95.0570 1.7095 139.4156 6.4870
type eval | step 0 | loss 77.0099 157.6908 295.2014 403.0813 | checkpoint False | ce_loss 1.5684 | sae_losses 76.7796 0.2303 155.6699 2.0208 290.5569 4.6444 378.1316 24.9494 | ce_loss_increases 2.1872 3.1606 3.2822 2.6717 | compound_ce_loss_increase 4.0601 | l0s 5.1019 5.0969 5.0706 5.0803 5.1029 5.0851 5.0862 5.0845 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 76.7796 0.2303 155.6699 2.0208 290.5569 4.6444 378.1316 24.9494
type eval | step 250 | loss 20.1699 46.9465 101.3334 144.4482 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 20.1483 0.0216 46.4373 0.5092 99.6890 1.6444 137.7154 6.7328 | ce_loss_increases 0.2556 1.9258 2.1652 1.9052 | compound_ce_loss_increase 3.7999 | l0s 5.0000 5.0000 5.0000 5.0000 5.0000 5.0000 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 20.1483 0.0216 46.4373 0.5092 99.6890 1.6444 137.7154 6.7328
type eval | step 0 | loss 76.8450 150.6551 276.2295 388.9503 | checkpoint False | ce_loss 1.5684 | sae_losses 76.6319 0.2131 148.4654 2.1897 271.3323 4.8971 364.2046 24.7455 | ce_loss_increases 2.3251 3.1292 3.2545 2.7813 | compound_ce_loss_increase 4.0483 | l0s 5.0942 5.1104 5.0719 5.0810 5.1062 5.0873 5.0941 5.0894 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 76.6319 0.2131 148.4654 2.1897 271.3323 4.8971 364.2046 24.7455
type eval | step 0 | loss 81.9714 143.2355 274.1470 400.5002 | checkpoint False | ce_loss 1.5684 | sae_losses 81.6941 0.2772 141.2508 1.9847 269.2685 4.8786 376.8170 23.6829 | ce_loss_increases 2.5396 3.2103 3.1226 2.8084 | compound_ce_loss_increase 4.1972 | l0s 5.1019 5.0852 5.0715 5.0855 5.1055 5.0893 5.0942 5.0889 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 81.6941 0.2772 141.2508 1.9847 269.2685 4.8786 376.8170 23.6829
type eval | step 250 | loss 20.9388 43.1755 95.5738 146.5354 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 20.9168 0.0221 42.6576 0.5179 93.9140 1.6598 139.8276 6.7078 | ce_loss_increases 0.3121 1.9789 2.1396 1.9743 | compound_ce_loss_increase 3.8205 | l0s 5.0000 5.0000 5.0000 5.0000 5.0000 5.0000 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 20.9168 0.0221 42.6576 0.5179 93.9140 1.6598 139.8276 6.7078
type eval | step 500 | loss 3.0430 11.2521 29.9784 54.5921 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 3.0393 0.0037 11.1236 0.1285 29.4139 0.5645 52.7966 1.7954 | ce_loss_increases 0.0434 0.5293 0.9062 0.8325 | compound_ce_loss_increase 1.8755 | l0s 5.0000 5.0000 5.0000 5.0000 5.0000 5.0000 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 3.0393 0.0037 11.1236 0.1285 29.4139 0.5645 52.7966 1.7954
type eval | step 750 | loss 0.4888 4.7263 14.9290 35.8565 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.4879 0.0009 4.6647 0.0616 14.5845 0.3444 34.6596 1.1969 | ce_loss_increases 0.0076 0.2441 0.4962 0.5414 | compound_ce_loss_increase 0.9793 | l0s 5.0000 4.9997 5.0000 5.0000 5.0000 5.0000 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.4879 0.0009 4.6647 0.0616 14.5845 0.3444 34.6596 1.1969
type eval | step 1000 | loss 0.2824 3.2057 11.3992 31.6056 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.2819 0.0006 3.1573 0.0484 11.1048 0.2945 30.5433 1.0623 | ce_loss_increases 0.0039 0.1655 0.3951 0.4617 | compound_ce_loss_increase 0.7783 | l0s 4.9994 4.9988 5.0000 5.0000 5.0000 5.0000 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.2819 0.0006 3.1573 0.0484 11.1048 0.2945 30.5433 1.0623
type eval | step 1250 | loss 0.2355 2.6859 10.1297 29.9274 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.2350 0.0005 2.6422 0.0437 9.8510 0.2787 28.9120 1.0153 | ce_loss_increases 0.0032 0.1478 0.3589 0.4390 | compound_ce_loss_increase 0.7089 | l0s 4.9995 4.9975 5.0000 5.0000 5.0000 5.0000 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.2350 0.0005 2.6422 0.0437 9.8510 0.2787 28.9120 1.0153
type eval | step 1500 | loss 0.2164 2.4320 9.4874 29.0157 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.2160 0.0004 2.3908 0.0412 9.2172 0.2701 28.0263 0.9894 | ce_loss_increases 0.0022 0.1379 0.3421 0.4338 | compound_ce_loss_increase 0.6780 | l0s 4.9995 4.9978 5.0000 4.9999 5.0000 4.9998 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.2160 0.0004 2.3908 0.0412 9.2172 0.2701 28.0263 0.9894
type eval | step 1750 | loss 0.2041 2.2978 9.1596 28.4601 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.2037 0.0004 2.2579 0.0399 8.8945 0.2651 27.4849 0.9752 | ce_loss_increases 0.0016 0.1302 0.3316 0.4299 | compound_ce_loss_increase 0.6692 | l0s 4.9998 4.9960 5.0000 4.9999 5.0000 4.9999 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.2037 0.0004 2.2579 0.0399 8.8945 0.2651 27.4849 0.9752
type eval | step 2000 | loss 0.1929 2.2131 8.9360 28.0944 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.1925 0.0004 2.1742 0.0389 8.6739 0.2622 27.1283 0.9661 | ce_loss_increases 0.0019 0.1263 0.3259 0.4196 | compound_ce_loss_increase 0.6564 | l0s 4.9998 4.9945 5.0000 4.9998 5.0000 4.9998 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1925 0.0004 2.1742 0.0389 8.6739 0.2622 27.1283 0.9661
type eval | step 2250 | loss 0.1846 2.1554 8.7693 27.8005 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.1843 0.0004 2.1171 0.0383 8.5101 0.2592 26.8419 0.9586 | ce_loss_increases 0.0014 0.1217 0.3200 0.4123 | compound_ce_loss_increase 0.6385 | l0s 4.9998 4.9928 4.9999 4.9997 5.0000 4.9998 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1843 0.0004 2.1171 0.0383 8.5101 0.2592 26.8419 0.9586
type eval | step 2500 | loss 0.1777 2.1061 8.6280 27.5892 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.1774 0.0003 2.0685 0.0377 8.3710 0.2570 26.6351 0.9541 | ce_loss_increases 0.0012 0.1205 0.3115 0.4071 | compound_ce_loss_increase 0.6252 | l0s 4.9995 4.9922 4.9999 4.9996 5.0000 4.9999 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1774 0.0003 2.0685 0.0377 8.3710 0.2570 26.6351 0.9541
type eval | step 2750 | loss 0.1713 2.0661 8.5149 27.3472 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.1710 0.0003 2.0290 0.0371 8.2604 0.2545 26.4002 0.9470 | ce_loss_increases 0.0014 0.1142 0.3087 0.4081 | compound_ce_loss_increase 0.6250 | l0s 4.9998 4.9894 4.9999 4.9996 5.0000 4.9998 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1710 0.0003 2.0290 0.0371 8.2604 0.2545 26.4002 0.9470
type eval | step 3000 | loss 0.1665 2.0442 8.4670 27.1781 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.1661 0.0003 2.0074 0.0369 8.2137 0.2533 26.2332 0.9449 | ce_loss_increases 0.0012 0.1131 0.3075 0.4078 | compound_ce_loss_increase 0.6231 | l0s 4.9998 4.9901 5.0000 4.9996 5.0000 4.9998 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1661 0.0003 2.0074 0.0369 8.2137 0.2533 26.2332 0.9449
type eval | step 3250 | loss 0.1651 2.0251 8.4083 27.0497 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.1647 0.0003 1.9886 0.0365 8.1556 0.2527 26.1061 0.9436 | ce_loss_increases 0.0014 0.1156 0.3044 0.4133 | compound_ce_loss_increase 0.6245 | l0s 4.9999 4.9899 4.9999 4.9995 5.0000 4.9998 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1647 0.0003 1.9886 0.0365 8.1556 0.2527 26.1061 0.9436
type eval | step 3500 | loss 0.1617 2.0163 8.3774 26.9780 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.1614 0.0003 1.9798 0.0365 8.1252 0.2522 26.0344 0.9437 | ce_loss_increases 0.0013 0.1155 0.3016 0.4163 | compound_ce_loss_increase 0.6268 | l0s 4.9999 4.9897 4.9999 4.9994 5.0000 4.9998 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1614 0.0003 1.9798 0.0365 8.1252 0.2522 26.0344 0.9437
type eval | step 3750 | loss 0.1593 1.9996 8.3404 26.9027 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.1590 0.0003 1.9633 0.0364 8.0884 0.2521 25.9596 0.9431 | ce_loss_increases 0.0013 0.1099 0.3041 0.4134 | compound_ce_loss_increase 0.6260 | l0s 4.9999 4.9908 4.9999 4.9995 5.0000 4.9997 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1590 0.0003 1.9633 0.0364 8.0884 0.2521 25.9596 0.9431
type eval | step 4000 | loss 0.1560 1.9862 8.2826 26.7933 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.1557 0.0003 1.9499 0.0362 8.0312 0.2514 25.8515 0.9418 | ce_loss_increases 0.0019 0.1107 0.3007 0.4125 | compound_ce_loss_increase 0.6210 | l0s 5.0000 4.9885 4.9999 4.9993 5.0000 4.9995 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1557 0.0003 1.9499 0.0362 8.0312 0.2514 25.8515 0.9418
type eval | step 4250 | loss 0.1547 1.9883 8.2594 26.6967 | checkpoint True False True True | ce_loss 1.5684 | sae_losses 0.1544 0.0003 1.9522 0.0361 8.0085 0.2509 25.7553 0.9414 | ce_loss_increases 0.0012 0.1095 0.2987 0.4123 | compound_ce_loss_increase 0.6143 | l0s 4.9999 4.9897 4.9999 4.9993 5.0000 4.9995 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1544 0.0003 1.9522 0.0361 8.0085 0.2509 25.7553 0.9414
type eval | step 4500 | loss 0.1500 1.9783 8.2385 26.6209 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.1497 0.0003 1.9424 0.0359 7.9878 0.2507 25.6793 0.9416 | ce_loss_increases 0.0013 0.1084 0.2979 0.4151 | compound_ce_loss_increase 0.6117 | l0s 4.9999 4.9926 4.9999 4.9994 5.0000 4.9995 5.0000 4.9999 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1497 0.0003 1.9424 0.0359 7.9878 0.2507 25.6793 0.9416
type eval | step 4750 | loss 0.1474 1.9662 8.2109 26.5301 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.1471 0.0003 1.9305 0.0358 7.9606 0.2502 25.5890 0.9411 | ce_loss_increases 0.0013 0.1040 0.2983 0.4134 | compound_ce_loss_increase 0.6113 | l0s 4.9999 4.9917 4.9999 4.9992 5.0000 4.9995 5.0000 4.9999 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1471 0.0003 1.9305 0.0358 7.9606 0.2502 25.5890 0.9411
type eval | step 5000 | loss 0.1443 1.9570 8.1880 26.4502 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.1440 0.0003 1.9215 0.0355 7.9383 0.2497 25.5096 0.9406 | ce_loss_increases 0.0010 0.1046 0.2989 0.4126 | compound_ce_loss_increase 0.6129 | l0s 4.9999 4.9908 4.9999 4.9994 5.0000 4.9995 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1440 0.0003 1.9215 0.0355 7.9383 0.2497 25.5096 0.9406
type eval | step 5250 | loss 0.1436 1.9451 8.1433 26.3547 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.1433 0.0003 1.9098 0.0353 7.8947 0.2486 25.4164 0.9383 | ce_loss_increases 0.0012 0.1029 0.2989 0.4114 | compound_ce_loss_increase 0.6166 | l0s 5.0000 4.9937 4.9999 4.9992 5.0000 4.9994 5.0000 4.9999 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1433 0.0003 1.9098 0.0353 7.8947 0.2486 25.4164 0.9383
type eval | step 5500 | loss 0.1413 1.9412 8.1279 26.3044 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.1410 0.0003 1.9058 0.0354 7.8797 0.2482 25.3662 0.9383 | ce_loss_increases 0.0012 0.1026 0.3008 0.4108 | compound_ce_loss_increase 0.6166 | l0s 5.0000 4.9937 4.9999 4.9993 5.0000 4.9996 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1410 0.0003 1.9058 0.0354 7.8797 0.2482 25.3662 0.9383
type eval | step 5750 | loss 0.1410 1.9367 8.1214 26.2819 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.1408 0.0003 1.9015 0.0352 7.8733 0.2481 25.3434 0.9385 | ce_loss_increases 0.0011 0.1008 0.2988 0.4092 | compound_ce_loss_increase 0.6113 | l0s 5.0000 4.9901 4.9999 4.9993 5.0000 4.9995 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1408 0.0003 1.9015 0.0352 7.8733 0.2481 25.3434 0.9385
type eval | step 6000 | loss 0.1394 1.9343 8.1171 26.2747 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.1391 0.0003 1.8992 0.0352 7.8689 0.2482 25.3356 0.9391 | ce_loss_increases 0.0010 0.1021 0.2978 0.4082 | compound_ce_loss_increase 0.6150 | l0s 4.9999 4.9912 4.9998 4.9992 5.0000 4.9994 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1391 0.0003 1.8992 0.0352 7.8689 0.2482 25.3356 0.9391
type eval | step 6250 | loss 0.1386 1.9308 8.1225 26.3005 | checkpoint True True False False | ce_loss 1.5684 | sae_losses 0.1383 0.0003 1.8957 0.0351 7.8740 0.2486 25.3609 0.9396 | ce_loss_increases 0.0011 0.1014 0.2992 0.4100 | compound_ce_loss_increase 0.6231 | l0s 5.0000 4.9920 4.9998 4.9994 4.9999 4.9995 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1383 0.0003 1.8957 0.0351 7.8740 0.2486 25.3609 0.9396
type eval | step 6500 | loss 0.1373 1.9269 8.1005 26.2908 | checkpoint True True True False | ce_loss 1.5684 | sae_losses 0.1370 0.0003 1.8920 0.0350 7.8521 0.2484 25.3513 0.9395 | ce_loss_increases 0.0012 0.1020 0.2979 0.4085 | compound_ce_loss_increase 0.6255 | l0s 5.0000 4.9910 4.9999 4.9993 4.9999 4.9993 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1370 0.0003 1.8920 0.0350 7.8521 0.2484 25.3513 0.9395
type eval | step 6750 | loss 0.1372 1.9257 8.0876 26.2662 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.1369 0.0003 1.8908 0.0348 7.8394 0.2482 25.3269 0.9393 | ce_loss_increases 0.0011 0.1042 0.2987 0.4092 | compound_ce_loss_increase 0.6244 | l0s 5.0000 4.9915 4.9998 4.9992 4.9999 4.9995 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1369 0.0003 1.8908 0.0348 7.8394 0.2482 25.3269 0.9393
type eval | step 7000 | loss 0.1349 1.9137 8.0717 26.2603 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.1346 0.0003 1.8790 0.0347 7.8234 0.2483 25.3207 0.9396 | ce_loss_increases 0.0011 0.1044 0.3010 0.4124 | compound_ce_loss_increase 0.6252 | l0s 5.0000 4.9913 4.9999 4.9993 4.9999 4.9994 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1346 0.0003 1.8790 0.0347 7.8234 0.2483 25.3207 0.9396
type eval | step 7250 | loss 0.1356 1.9036 8.0576 26.2398 | checkpoint False True True True | ce_loss 1.5684 | sae_losses 0.1353 0.0003 1.8690 0.0346 7.8096 0.2480 25.3001 0.9397 | ce_loss_increases 0.0012 0.1039 0.3012 0.4140 | compound_ce_loss_increase 0.6249 | l0s 5.0000 4.9903 4.9998 4.9993 5.0000 4.9993 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1353 0.0003 1.8690 0.0346 7.8096 0.2480 25.3001 0.9397
type eval | step 7500 | loss 0.1323 1.9015 8.0413 26.2110 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.1320 0.0003 1.8671 0.0344 7.7936 0.2477 25.2717 0.9393 | ce_loss_increases 0.0013 0.1044 0.2993 0.4146 | compound_ce_loss_increase 0.6254 | l0s 5.0000 4.9906 4.9998 4.9992 5.0000 4.9993 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1320 0.0003 1.8671 0.0344 7.7936 0.2477 25.2717 0.9393
type eval | step 7750 | loss 0.1309 1.8967 8.0128 26.1701 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.1306 0.0003 1.8626 0.0341 7.7659 0.2469 25.2327 0.9374 | ce_loss_increases 0.0010 0.1037 0.3001 0.4150 | compound_ce_loss_increase 0.6225 | l0s 4.9999 4.9934 4.9999 4.9993 5.0000 4.9994 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1306 0.0003 1.8626 0.0341 7.7659 0.2469 25.2327 0.9374
type eval | step 8000 | loss 0.1297 1.8952 8.0003 26.1492 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.1295 0.0003 1.8611 0.0341 7.7538 0.2465 25.2125 0.9368 | ce_loss_increases 0.0008 0.1033 0.3018 0.4133 | compound_ce_loss_increase 0.6205 | l0s 4.9999 4.9914 4.9999 4.9993 5.0000 4.9994 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1295 0.0003 1.8611 0.0341 7.7538 0.2465 25.2125 0.9368
type eval | step 8250 | loss 0.1294 1.8945 8.0000 26.1505 | checkpoint True True True False | ce_loss 1.5684 | sae_losses 0.1292 0.0003 1.8604 0.0341 7.7537 0.2464 25.2139 0.9366 | ce_loss_increases 0.0012 0.1029 0.3028 0.4125 | compound_ce_loss_increase 0.6206 | l0s 5.0000 4.9902 4.9999 4.9991 5.0000 4.9993 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1292 0.0003 1.8604 0.0341 7.7537 0.2464 25.2139 0.9366
type eval | step 8500 | loss 0.1276 1.8954 7.9997 26.1539 | checkpoint True False True False | ce_loss 1.5684 | sae_losses 0.1274 0.0003 1.8613 0.0341 7.7533 0.2464 25.2169 0.9370 | ce_loss_increases 0.0011 0.1025 0.3031 0.4125 | compound_ce_loss_increase 0.6214 | l0s 4.9999 4.9902 4.9999 4.9994 5.0000 4.9994 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1274 0.0003 1.8613 0.0341 7.7533 0.2464 25.2169 0.9370
type eval | step 8750 | loss 0.1264 1.8976 8.0112 26.1851 | checkpoint True False False False | ce_loss 1.5684 | sae_losses 0.1261 0.0003 1.8635 0.0340 7.7647 0.2465 25.2475 0.9376 | ce_loss_increases 0.0010 0.1015 0.3050 0.4141 | compound_ce_loss_increase 0.6225 | l0s 4.9999 4.9914 4.9999 4.9993 5.0000 4.9994 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1261 0.0003 1.8635 0.0340 7.7647 0.2465 25.2475 0.9376
type eval | step 9000 | loss 0.1247 1.8844 7.9968 26.1939 | checkpoint True True True False | ce_loss 1.5684 | sae_losses 0.1245 0.0003 1.8505 0.0339 7.7504 0.2464 25.2564 0.9374 | ce_loss_increases 0.0012 0.1019 0.3049 0.4130 | compound_ce_loss_increase 0.6267 | l0s 4.9998 4.9916 4.9999 4.9992 5.0000 4.9992 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1245 0.0003 1.8505 0.0339 7.7504 0.2464 25.2564 0.9374
type eval | step 9250 | loss 0.1244 1.8804 7.9828 26.1805 | checkpoint True True True False | ce_loss 1.5684 | sae_losses 0.1241 0.0003 1.8465 0.0339 7.7365 0.2463 25.2435 0.9370 | ce_loss_increases 0.0004 0.1028 0.3061 0.4135 | compound_ce_loss_increase 0.6273 | l0s 4.9999 4.9928 4.9999 4.9992 5.0000 4.9993 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1241 0.0003 1.8465 0.0339 7.7365 0.2463 25.2435 0.9370
type eval | step 9500 | loss 0.1233 1.8797 7.9892 26.1791 | checkpoint True True False False | ce_loss 1.5684 | sae_losses 0.1231 0.0003 1.8458 0.0339 7.7428 0.2464 25.2421 0.9370 | ce_loss_increases 0.0007 0.1020 0.3061 0.4134 | compound_ce_loss_increase 0.6268 | l0s 4.9998 4.9919 4.9999 4.9993 5.0000 4.9994 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1231 0.0003 1.8458 0.0339 7.7428 0.2464 25.2421 0.9370
type eval | step 9750 | loss 0.1221 1.8775 7.9815 26.1708 | checkpoint True True True False | ce_loss 1.5684 | sae_losses 0.1218 0.0003 1.8437 0.0338 7.7351 0.2463 25.2341 0.9367 | ce_loss_increases 0.0010 0.1024 0.3062 0.4132 | compound_ce_loss_increase 0.6266 | l0s 4.9999 4.9921 4.9999 4.9993 5.0000 4.9993 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1218 0.0003 1.8437 0.0338 7.7351 0.2463 25.2341 0.9367
type eval | step 10000 | loss 0.1209 1.8747 7.9793 26.1571 | checkpoint True True True False | ce_loss 1.5684 | sae_losses 0.1207 0.0003 1.8409 0.0337 7.7333 0.2461 25.2212 0.9359 | ce_loss_increases 0.0010 0.1023 0.3046 0.4153 | compound_ce_loss_increase 0.6267 | l0s 4.9999 4.9921 4.9999 4.9993 5.0000 4.9993 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1207 0.0003 1.8409 0.0337 7.7333 0.2461 25.2212 0.9359
type eval | step 10250 | loss 0.1198 1.8741 7.9649 26.1285 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.1195 0.0003 1.8405 0.0336 7.7195 0.2454 25.1940 0.9344 | ce_loss_increases 0.0007 0.1022 0.3049 0.4152 | compound_ce_loss_increase 0.6251 | l0s 4.9999 4.9916 4.9999 4.9993 5.0000 4.9992 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1195 0.0003 1.8405 0.0336 7.7195 0.2454 25.1940 0.9344
type eval | step 10500 | loss 0.1194 1.8786 7.9605 26.1128 | checkpoint True False True True | ce_loss 1.5684 | sae_losses 0.1191 0.0003 1.8450 0.0336 7.7153 0.2452 25.1793 0.9334 | ce_loss_increases 0.0009 0.1028 0.3069 0.4147 | compound_ce_loss_increase 0.6200 | l0s 4.9999 4.9923 4.9999 4.9993 5.0000 4.9993 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1191 0.0003 1.8450 0.0336 7.7153 0.2452 25.1793 0.9334
type eval | step 10750 | loss 0.1199 1.8929 7.9608 26.1101 | checkpoint False False False True | ce_loss 1.5684 | sae_losses 0.1196 0.0003 1.8593 0.0336 7.7157 0.2451 25.1764 0.9338 | ce_loss_increases 0.0005 0.1035 0.3081 0.4147 | compound_ce_loss_increase 0.6207 | l0s 4.9999 4.9933 4.9999 4.9993 5.0000 4.9993 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1196 0.0003 1.8593 0.0336 7.7157 0.2451 25.1764 0.9338
type eval | step 11000 | loss 0.1202 1.9107 7.9735 26.1109 | checkpoint False False False False | ce_loss 1.5684 | sae_losses 0.1199 0.0003 1.8770 0.0337 7.7284 0.2452 25.1773 0.9336 | ce_loss_increases 0.0009 0.1032 0.3080 0.4138 | compound_ce_loss_increase 0.6180 | l0s 4.9999 4.9933 4.9999 4.9993 5.0000 4.9993 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1199 0.0003 1.8770 0.0337 7.7284 0.2452 25.1773 0.9336
type eval | step 11250 | loss 0.1193 1.9111 7.9839 26.1297 | checkpoint True False False False | ce_loss 1.5684 | sae_losses 0.1190 0.0003 1.8773 0.0337 7.7385 0.2454 25.1959 0.9338 | ce_loss_increases 0.0010 0.1012 0.3077 0.4141 | compound_ce_loss_increase 0.6196 | l0s 4.9998 4.9932 4.9999 4.9993 5.0000 4.9994 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1190 0.0003 1.8773 0.0337 7.7385 0.2454 25.1959 0.9338
type eval | step 11500 | loss 0.1190 1.8912 7.9842 26.1405 | checkpoint True False False False | ce_loss 1.5684 | sae_losses 0.1188 0.0003 1.8575 0.0336 7.7387 0.2455 25.2066 0.9339 | ce_loss_increases 0.0010 0.0989 0.3076 0.4140 | compound_ce_loss_increase 0.6179 | l0s 4.9999 4.9936 4.9999 4.9991 5.0000 4.9993 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1188 0.0003 1.8575 0.0336 7.7387 0.2455 25.2066 0.9339
type eval | step 11750 | loss 0.1185 1.8744 7.9748 26.1369 | checkpoint True False False False | ce_loss 1.5684 | sae_losses 0.1182 0.0003 1.8408 0.0336 7.7296 0.2453 25.2032 0.9337 | ce_loss_increases 0.0009 0.1006 0.3070 0.4135 | compound_ce_loss_increase 0.6163 | l0s 4.9999 4.9918 4.9999 4.9992 5.0000 4.9993 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1182 0.0003 1.8408 0.0336 7.7296 0.2453 25.2032 0.9337
type eval | step 12000 | loss 0.1188 1.8713 7.9806 26.1412 | checkpoint False True False False | ce_loss 1.5684 | sae_losses 0.1186 0.0003 1.8377 0.0336 7.7352 0.2454 25.2073 0.9339 | ce_loss_increases 0.0010 0.1004 0.3049 0.4126 | compound_ce_loss_increase 0.6149 | l0s 4.9999 4.9900 4.9999 4.9992 5.0000 4.9994 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1186 0.0003 1.8377 0.0336 7.7352 0.2454 25.2073 0.9339
type eval | step 12250 | loss 0.1179 1.8648 7.9712 26.1332 | checkpoint True True False False | ce_loss 1.5684 | sae_losses 0.1177 0.0003 1.8312 0.0336 7.7258 0.2454 25.1994 0.9337 | ce_loss_increases 0.0009 0.0993 0.3040 0.4120 | compound_ce_loss_increase 0.6154 | l0s 4.9999 4.9932 4.9999 4.9993 4.9999 4.9993 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1177 0.0003 1.8312 0.0336 7.7258 0.2454 25.1994 0.9337
type eval | step 12500 | loss 0.1174 1.8605 7.9629 26.1252 | checkpoint True True False False | ce_loss 1.5684 | sae_losses 0.1172 0.0003 1.8269 0.0336 7.7176 0.2453 25.1919 0.9334 | ce_loss_increases 0.0008 0.0980 0.3035 0.4120 | compound_ce_loss_increase 0.6168 | l0s 4.9999 4.9900 4.9999 4.9992 4.9999 4.9992 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1172 0.0003 1.8269 0.0336 7.7176 0.2453 25.1919 0.9334
type eval | step 12750 | loss 0.1168 1.8556 7.9536 26.1115 | checkpoint True True True False | ce_loss 1.5684 | sae_losses 0.1166 0.0003 1.8221 0.0335 7.7085 0.2450 25.1789 0.9326 | ce_loss_increases 0.0008 0.0983 0.3037 0.4121 | compound_ce_loss_increase 0.6135 | l0s 4.9999 4.9915 4.9999 4.9993 4.9999 4.9993 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1166 0.0003 1.8221 0.0335 7.7085 0.2450 25.1789 0.9326
type eval | step 13000 | loss 0.1164 1.8540 7.9546 26.0991 | checkpoint True True False True | ce_loss 1.5684 | sae_losses 0.1161 0.0003 1.8205 0.0334 7.7098 0.2448 25.1671 0.9321 | ce_loss_increases 0.0008 0.0964 0.3049 0.4112 | compound_ce_loss_increase 0.6120 | l0s 4.9999 4.9918 4.9999 4.9993 5.0000 4.9993 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1161 0.0003 1.8205 0.0334 7.7098 0.2448 25.1671 0.9321
type eval | step 13250 | loss 0.1164 1.8511 7.9392 26.0982 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.1161 0.0003 1.8177 0.0335 7.6945 0.2447 25.1662 0.9320 | ce_loss_increases 0.0008 0.0964 0.3066 0.4105 | compound_ce_loss_increase 0.6168 | l0s 4.9999 4.9929 4.9999 4.9993 4.9999 4.9992 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1161 0.0003 1.8177 0.0335 7.6945 0.2447 25.1662 0.9320
type eval | step 13500 | loss 0.1172 1.8505 7.9382 26.0977 | checkpoint False True True True | ce_loss 1.5684 | sae_losses 0.1169 0.0003 1.8171 0.0335 7.6935 0.2447 25.1656 0.9321 | ce_loss_increases 0.0008 0.0956 0.3050 0.4105 | compound_ce_loss_increase 0.6167 | l0s 4.9999 4.9925 4.9999 4.9995 4.9999 4.9992 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1169 0.0003 1.8171 0.0335 7.6935 0.2447 25.1656 0.9321
type eval | step 13750 | loss 0.1175 1.8508 7.9431 26.1005 | checkpoint False False False False | ce_loss 1.5684 | sae_losses 0.1173 0.0003 1.8173 0.0335 7.6983 0.2448 25.1678 0.9327 | ce_loss_increases 0.0009 0.0947 0.3040 0.4104 | compound_ce_loss_increase 0.6162 | l0s 4.9999 4.9931 4.9999 4.9994 4.9999 4.9993 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1173 0.0003 1.8173 0.0335 7.6983 0.2448 25.1678 0.9327
type eval | step 14000 | loss 0.1167 1.8470 7.9418 26.0973 | checkpoint False True False True | ce_loss 1.5684 | sae_losses 0.1165 0.0003 1.8136 0.0335 7.6969 0.2449 25.1643 0.9329 | ce_loss_increases 0.0009 0.0943 0.3035 0.4104 | compound_ce_loss_increase 0.6170 | l0s 4.9999 4.9923 4.9999 4.9992 4.9999 4.9993 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1165 0.0003 1.8136 0.0335 7.6969 0.2449 25.1643 0.9329
type eval | step 14250 | loss 0.1161 1.8398 7.9342 26.0952 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.1159 0.0003 1.8064 0.0334 7.6893 0.2449 25.1624 0.9328 | ce_loss_increases 0.0008 0.0943 0.3040 0.4106 | compound_ce_loss_increase 0.6170 | l0s 4.9999 4.9904 4.9999 4.9991 4.9999 4.9993 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1159 0.0003 1.8064 0.0334 7.6893 0.2449 25.1624 0.9328
type eval | step 14500 | loss 0.1165 1.8391 7.9361 26.0976 | checkpoint False True False False | ce_loss 1.5684 | sae_losses 0.1162 0.0003 1.8057 0.0335 7.6911 0.2450 25.1647 0.9329 | ce_loss_increases 0.0010 0.0949 0.3042 0.4100 | compound_ce_loss_increase 0.6138 | l0s 4.9999 4.9903 4.9999 4.9991 4.9999 4.9994 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1162 0.0003 1.8057 0.0335 7.6911 0.2450 25.1647 0.9329
type eval | step 14750 | loss 0.1164 1.8376 7.9324 26.0964 | checkpoint False True True False | ce_loss 1.5684 | sae_losses 0.1161 0.0003 1.8042 0.0334 7.6874 0.2450 25.1635 0.9330 | ce_loss_increases 0.0009 0.0947 0.3049 0.4100 | compound_ce_loss_increase 0.6159 | l0s 4.9999 4.9936 4.9999 4.9992 4.9999 4.9993 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1161 0.0003 1.8042 0.0334 7.6874 0.2450 25.1635 0.9330
type eval | step 15000 | loss 0.1159 1.8351 7.9296 26.0932 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.1157 0.0003 1.8017 0.0334 7.6847 0.2449 25.1604 0.9327 | ce_loss_increases 0.0009 0.0944 0.3053 0.4106 | compound_ce_loss_increase 0.6161 | l0s 4.9999 4.9929 4.9998 4.9991 4.9999 4.9993 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1157 0.0003 1.8017 0.0334 7.6847 0.2449 25.1604 0.9327
type eval | step 15250 | loss 0.1156 1.8333 7.9219 26.0797 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.1153 0.0002 1.7999 0.0334 7.6771 0.2448 25.1471 0.9326 | ce_loss_increases 0.0009 0.0935 0.3050 0.4105 | compound_ce_loss_increase 0.6151 | l0s 4.9999 4.9923 4.9999 4.9992 4.9999 4.9994 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1153 0.0002 1.7999 0.0334 7.6771 0.2448 25.1471 0.9326
type eval | step 15500 | loss 0.1153 1.8326 7.9151 26.0696 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.1150 0.0002 1.7993 0.0333 7.6705 0.2446 25.1377 0.9318 | ce_loss_increases 0.0010 0.0937 0.3056 0.4098 | compound_ce_loss_increase 0.6145 | l0s 4.9999 4.9916 4.9999 4.9992 5.0000 4.9993 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1150 0.0002 1.7993 0.0333 7.6705 0.2446 25.1377 0.9318
type eval | step 15750 | loss 0.1153 1.8336 7.9113 26.0682 | checkpoint True False True True | ce_loss 1.5684 | sae_losses 0.1150 0.0002 1.8003 0.0333 7.6667 0.2446 25.1365 0.9316 | ce_loss_increases 0.0007 0.0942 0.3062 0.4104 | compound_ce_loss_increase 0.6179 | l0s 4.9999 4.9914 4.9999 4.9991 4.9999 4.9993 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1150 0.0002 1.8003 0.0333 7.6667 0.2446 25.1365 0.9316
type eval | step 16000 | loss 0.1153 1.8348 7.9123 26.0712 | checkpoint False False False False | ce_loss 1.5684 | sae_losses 0.1150 0.0003 1.8015 0.0333 7.6677 0.2446 25.1394 0.9318 | ce_loss_increases 0.0008 0.0950 0.3068 0.4103 | compound_ce_loss_increase 0.6148 | l0s 4.9999 4.9899 4.9999 4.9991 5.0000 4.9993 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1150 0.0003 1.8015 0.0333 7.6677 0.2446 25.1394 0.9318
type eval | step 16250 | loss 0.1157 1.8370 7.9154 26.0749 | checkpoint False False False False | ce_loss 1.5684 | sae_losses 0.1155 0.0003 1.8036 0.0333 7.6707 0.2447 25.1428 0.9320 | ce_loss_increases 0.0009 0.0946 0.3063 0.4103 | compound_ce_loss_increase 0.6150 | l0s 4.9999 4.9916 4.9999 4.9993 5.0000 4.9993 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1155 0.0003 1.8036 0.0333 7.6707 0.2447 25.1428 0.9320
type eval | step 16500 | loss 0.1153 1.8380 7.9141 26.0774 | checkpoint False False False False | ce_loss 1.5684 | sae_losses 0.1150 0.0002 1.8046 0.0333 7.6694 0.2447 25.1451 0.9323 | ce_loss_increases 0.0010 0.0944 0.3054 0.4102 | compound_ce_loss_increase 0.6152 | l0s 4.9999 4.9914 4.9999 4.9992 5.0000 4.9993 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1150 0.0002 1.8046 0.0333 7.6694 0.2447 25.1451 0.9323
type eval | step 16750 | loss 0.1148 1.8362 7.9107 26.0749 | checkpoint True False True False | ce_loss 1.5684 | sae_losses 0.1145 0.0002 1.8029 0.0333 7.6660 0.2447 25.1427 0.9322 | ce_loss_increases 0.0010 0.0938 0.3054 0.4102 | compound_ce_loss_increase 0.6148 | l0s 4.9999 4.9919 4.9999 4.9991 5.0000 4.9993 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1145 0.0002 1.8029 0.0333 7.6660 0.2447 25.1427 0.9322
type eval | step 17000 | loss 0.1149 1.8379 7.9095 26.0768 | checkpoint False False True False | ce_loss 1.5684 | sae_losses 0.1146 0.0002 1.8046 0.0333 7.6648 0.2447 25.1444 0.9324 | ce_loss_increases 0.0009 0.0933 0.3058 0.4096 | compound_ce_loss_increase 0.6138 | l0s 4.9999 4.9897 4.9999 4.9991 4.9999 4.9994 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1146 0.0002 1.8046 0.0333 7.6648 0.2447 25.1444 0.9324
type eval | step 17250 | loss 0.1147 1.8395 7.9093 26.0781 | checkpoint True False True False | ce_loss 1.5684 | sae_losses 0.1145 0.0002 1.8062 0.0333 7.6646 0.2448 25.1457 0.9323 | ce_loss_increases 0.0010 0.0938 0.3072 0.4099 | compound_ce_loss_increase 0.6151 | l0s 4.9999 4.9917 4.9999 4.9992 4.9999 4.9994 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1145 0.0002 1.8062 0.0333 7.6646 0.2448 25.1457 0.9323
type eval | step 17500 | loss 0.1145 1.8370 7.9067 26.0743 | checkpoint True False True False | ce_loss 1.5684 | sae_losses 0.1142 0.0002 1.8037 0.0333 7.6620 0.2448 25.1420 0.9324 | ce_loss_increases 0.0008 0.0941 0.3076 0.4100 | compound_ce_loss_increase 0.6158 | l0s 5.0000 4.9928 4.9998 4.9991 4.9999 4.9993 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1142 0.0002 1.8037 0.0333 7.6620 0.2448 25.1420 0.9324
type eval | step 17750 | loss 0.1144 1.8360 7.9033 26.0709 | checkpoint True False True False | ce_loss 1.5684 | sae_losses 0.1141 0.0002 1.8027 0.0333 7.6586 0.2447 25.1385 0.9324 | ce_loss_increases 0.0008 0.0937 0.3075 0.4105 | compound_ce_loss_increase 0.6123 | l0s 5.0000 4.9918 4.9998 4.9992 4.9999 4.9993 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1141 0.0002 1.8027 0.0333 7.6586 0.2447 25.1385 0.9324
type eval | step 18000 | loss 0.1143 1.8334 7.8990 26.0602 | checkpoint True False True True | ce_loss 1.5684 | sae_losses 0.1141 0.0002 1.8001 0.0332 7.6545 0.2445 25.1284 0.9318 | ce_loss_increases 0.0009 0.0934 0.3068 0.4107 | compound_ce_loss_increase 0.6141 | l0s 5.0000 4.9915 4.9999 4.9991 5.0000 4.9993 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1141 0.0002 1.8001 0.0332 7.6545 0.2445 25.1284 0.9318
type eval | step 18250 | loss 0.1142 1.8330 7.8960 26.0580 | checkpoint True False True True | ce_loss 1.5684 | sae_losses 0.1139 0.0002 1.7998 0.0332 7.6515 0.2445 25.1264 0.9316 | ce_loss_increases 0.0009 0.0938 0.3066 0.4103 | compound_ce_loss_increase 0.6113 | l0s 4.9999 4.9909 4.9998 4.9991 5.0000 4.9993 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1139 0.0002 1.7998 0.0332 7.6515 0.2445 25.1264 0.9316
type eval | step 18500 | loss 0.1141 1.8340 7.8969 26.0556 | checkpoint True False False True | ce_loss 1.5684 | sae_losses 0.1138 0.0002 1.8008 0.0332 7.6524 0.2445 25.1238 0.9318 | ce_loss_increases 0.0009 0.0946 0.3067 0.4103 | compound_ce_loss_increase 0.6134 | l0s 4.9999 4.9913 4.9999 4.9991 5.0000 4.9993 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1138 0.0002 1.8008 0.0332 7.6524 0.2445 25.1238 0.9318
type eval | step 18750 | loss 0.1144 1.8345 7.9002 26.0595 | checkpoint False False False False | ce_loss 1.5684 | sae_losses 0.1142 0.0002 1.8012 0.0333 7.6557 0.2445 25.1275 0.9320 | ce_loss_increases 0.0009 0.0953 0.3065 0.4102 | compound_ce_loss_increase 0.6144 | l0s 4.9999 4.9929 4.9999 4.9992 5.0000 4.9993 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1142 0.0002 1.8012 0.0333 7.6557 0.2445 25.1275 0.9320
type eval | step 19000 | loss 0.1146 1.8357 7.9024 26.0635 | checkpoint False False False False | ce_loss 1.5684 | sae_losses 0.1143 0.0002 1.8024 0.0333 7.6578 0.2445 25.1314 0.9322 | ce_loss_increases 0.0008 0.0961 0.3063 0.4105 | compound_ce_loss_increase 0.6150 | l0s 4.9999 4.9930 4.9999 4.9993 5.0000 4.9993 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1143 0.0002 1.8024 0.0333 7.6578 0.2445 25.1314 0.9322
type eval | step 19250 | loss 0.1143 1.8348 7.9030 26.0656 | checkpoint False False False False | ce_loss 1.5684 | sae_losses 0.1140 0.0002 1.8016 0.0333 7.6585 0.2445 25.1335 0.9321 | ce_loss_increases 0.0011 0.0958 0.3064 0.4104 | compound_ce_loss_increase 0.6136 | l0s 4.9999 4.9918 4.9999 4.9992 4.9999 4.9993 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1140 0.0002 1.8016 0.0333 7.6585 0.2445 25.1335 0.9321
type eval | step 19500 | loss 0.1142 1.8353 7.9017 26.0649 | checkpoint False False False False | ce_loss 1.5684 | sae_losses 0.1140 0.0002 1.8020 0.0333 7.6571 0.2446 25.1327 0.9322 | ce_loss_increases 0.0008 0.0951 0.3060 0.4104 | compound_ce_loss_increase 0.6150 | l0s 4.9999 4.9914 4.9999 4.9992 4.9999 4.9994 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1140 0.0002 1.8020 0.0333 7.6571 0.2446 25.1327 0.9322
type eval | step 19750 | loss 0.1142 1.8357 7.9034 26.0671 | checkpoint False False False False | ce_loss 1.5684 | sae_losses 0.1140 0.0002 1.8025 0.0332 7.6588 0.2446 25.1347 0.9324 | ce_loss_increases 0.0009 0.0953 0.3065 0.4102 | compound_ce_loss_increase 0.6127 | l0s 4.9999 4.9920 4.9999 4.9992 4.9999 4.9994 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1140 0.0002 1.8025 0.0332 7.6588 0.2446 25.1347 0.9324
type eval | step 20000 | loss 0.1141 1.8356 7.9025 26.0671 | checkpoint False False False False | ce_loss 1.5684 | sae_losses 0.1139 0.0002 1.8024 0.0332 7.6579 0.2446 25.1345 0.9326 | ce_loss_increases 0.0009 0.0947 0.3070 0.4102 | compound_ce_loss_increase 0.6130 | l0s 4.9999 4.9921 4.9999 4.9992 4.9999 4.9993 5.0000 5.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1139 0.0002 1.8024 0.0332 7.6579 0.2446 25.1345 0.9326