Text Generation
Transformers
Safetensors
English
code
llama
smallcoder
code-llm
code-generation
sft
pretraining
tpu
303m
trc
text-generation-inference
Instructions to use Beebey/smallcoder-303m with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use Beebey/smallcoder-303m with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="Beebey/smallcoder-303m")# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("Beebey/smallcoder-303m") model = AutoModelForCausalLM.from_pretrained("Beebey/smallcoder-303m") - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use Beebey/smallcoder-303m with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "Beebey/smallcoder-303m" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Beebey/smallcoder-303m", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker
docker model run hf.co/Beebey/smallcoder-303m
- SGLang
How to use Beebey/smallcoder-303m with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "Beebey/smallcoder-303m" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Beebey/smallcoder-303m", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "Beebey/smallcoder-303m" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Beebey/smallcoder-303m", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }' - Docker Model Runner
How to use Beebey/smallcoder-303m with Docker Model Runner:
docker model run hf.co/Beebey/smallcoder-303m
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.25599901577555206, | |
| "eval_steps": 500, | |
| "global_step": 22889, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00011184368726268168, | |
| "grad_norm": 0.6328383684158325, | |
| "learning_rate": 4.5e-06, | |
| "loss": 1.734, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.00022368737452536336, | |
| "grad_norm": 0.566952109336853, | |
| "learning_rate": 9.5e-06, | |
| "loss": 1.6903, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.00033553106178804503, | |
| "grad_norm": 0.5359939932823181, | |
| "learning_rate": 1.4500000000000002e-05, | |
| "loss": 1.6266, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.0004473747490507267, | |
| "grad_norm": 0.4729914367198944, | |
| "learning_rate": 1.95e-05, | |
| "loss": 1.5731, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.0005592184363134084, | |
| "grad_norm": 0.42020025849342346, | |
| "learning_rate": 2.4500000000000003e-05, | |
| "loss": 1.5335, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.0006710621235760901, | |
| "grad_norm": 0.4461672604084015, | |
| "learning_rate": 2.95e-05, | |
| "loss": 1.4851, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.0007829058108387717, | |
| "grad_norm": 0.4443751275539398, | |
| "learning_rate": 3.4500000000000005e-05, | |
| "loss": 1.4431, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.0008947494981014534, | |
| "grad_norm": 0.4204632639884949, | |
| "learning_rate": 3.95e-05, | |
| "loss": 1.4036, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.0010065931853641351, | |
| "grad_norm": 0.3985028862953186, | |
| "learning_rate": 4.45e-05, | |
| "loss": 1.3725, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.0011184368726268167, | |
| "grad_norm": 0.4111650586128235, | |
| "learning_rate": 4.9500000000000004e-05, | |
| "loss": 1.3527, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0012302805598894985, | |
| "grad_norm": 0.4175569713115692, | |
| "learning_rate": 5.45e-05, | |
| "loss": 1.3431, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.0013421242471521801, | |
| "grad_norm": 0.3871678411960602, | |
| "learning_rate": 5.9499999999999996e-05, | |
| "loss": 1.3322, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.0014539679344148617, | |
| "grad_norm": 0.39584827423095703, | |
| "learning_rate": 6.450000000000001e-05, | |
| "loss": 1.3075, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.0015658116216775435, | |
| "grad_norm": 0.4165605902671814, | |
| "learning_rate": 6.950000000000001e-05, | |
| "loss": 1.286, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.001677655308940225, | |
| "grad_norm": 0.3985513150691986, | |
| "learning_rate": 7.45e-05, | |
| "loss": 1.2567, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.0017894989962029069, | |
| "grad_norm": 0.39112743735313416, | |
| "learning_rate": 7.950000000000001e-05, | |
| "loss": 1.2448, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.0019013426834655885, | |
| "grad_norm": 0.3867124915122986, | |
| "learning_rate": 8.450000000000001e-05, | |
| "loss": 1.2405, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.0020131863707282703, | |
| "grad_norm": 0.3955863416194916, | |
| "learning_rate": 8.95e-05, | |
| "loss": 1.2123, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.002125030057990952, | |
| "grad_norm": 0.40293410420417786, | |
| "learning_rate": 9.45e-05, | |
| "loss": 1.2081, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.0022368737452536334, | |
| "grad_norm": 0.3828902542591095, | |
| "learning_rate": 9.95e-05, | |
| "loss": 1.2049, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.002348717432516315, | |
| "grad_norm": 0.3969178795814514, | |
| "learning_rate": 0.00010449999999999999, | |
| "loss": 1.1892, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.002460561119778997, | |
| "grad_norm": 0.4122287929058075, | |
| "learning_rate": 0.0001095, | |
| "loss": 1.184, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.0025724048070416786, | |
| "grad_norm": 0.3793940246105194, | |
| "learning_rate": 0.0001145, | |
| "loss": 1.1809, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.0026842484943043602, | |
| "grad_norm": 0.4132145643234253, | |
| "learning_rate": 0.00011949999999999999, | |
| "loss": 1.1883, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.002796092181567042, | |
| "grad_norm": 0.3900831639766693, | |
| "learning_rate": 0.0001245, | |
| "loss": 1.1818, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.0029079358688297234, | |
| "grad_norm": 0.3898029625415802, | |
| "learning_rate": 0.0001295, | |
| "loss": 1.1693, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.0030197795560924054, | |
| "grad_norm": 0.40828797221183777, | |
| "learning_rate": 0.00013450000000000002, | |
| "loss": 1.1869, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.003131623243355087, | |
| "grad_norm": 0.3976770341396332, | |
| "learning_rate": 0.0001395, | |
| "loss": 1.1841, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.0032434669306177686, | |
| "grad_norm": 0.3902062773704529, | |
| "learning_rate": 0.0001445, | |
| "loss": 1.1843, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.00335531061788045, | |
| "grad_norm": 0.38051125407218933, | |
| "learning_rate": 0.0001495, | |
| "loss": 1.1662, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.0034671543051431318, | |
| "grad_norm": 0.3628483712673187, | |
| "learning_rate": 0.00015450000000000001, | |
| "loss": 1.1638, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.0035789979924058138, | |
| "grad_norm": 0.3693360388278961, | |
| "learning_rate": 0.0001595, | |
| "loss": 1.1606, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.0036908416796684954, | |
| "grad_norm": 0.38896557688713074, | |
| "learning_rate": 0.00016450000000000001, | |
| "loss": 1.1448, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.003802685366931177, | |
| "grad_norm": 0.40257108211517334, | |
| "learning_rate": 0.00016950000000000003, | |
| "loss": 1.143, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.0039145290541938585, | |
| "grad_norm": 0.38656994700431824, | |
| "learning_rate": 0.00017449999999999999, | |
| "loss": 1.141, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.0040263727414565405, | |
| "grad_norm": 0.3700025677680969, | |
| "learning_rate": 0.0001795, | |
| "loss": 1.136, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.004138216428719222, | |
| "grad_norm": 0.37222161889076233, | |
| "learning_rate": 0.0001845, | |
| "loss": 1.1292, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.004250060115981904, | |
| "grad_norm": 0.39386317133903503, | |
| "learning_rate": 0.0001895, | |
| "loss": 1.1139, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.004361903803244586, | |
| "grad_norm": 0.3776305913925171, | |
| "learning_rate": 0.0001945, | |
| "loss": 1.1125, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.004473747490507267, | |
| "grad_norm": 0.40314197540283203, | |
| "learning_rate": 0.00019950000000000002, | |
| "loss": 1.0962, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.004585591177769949, | |
| "grad_norm": 0.37841472029685974, | |
| "learning_rate": 0.00020449999999999998, | |
| "loss": 1.0987, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.00469743486503263, | |
| "grad_norm": 0.3678649365901947, | |
| "learning_rate": 0.0002095, | |
| "loss": 1.0826, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.004809278552295312, | |
| "grad_norm": 0.37902751564979553, | |
| "learning_rate": 0.0002145, | |
| "loss": 1.0973, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.004921122239557994, | |
| "grad_norm": 0.3776302635669708, | |
| "learning_rate": 0.0002195, | |
| "loss": 1.112, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.005032965926820675, | |
| "grad_norm": 0.43771493434906006, | |
| "learning_rate": 0.0002245, | |
| "loss": 1.1005, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.005144809614083357, | |
| "grad_norm": 0.3662595748901367, | |
| "learning_rate": 0.00022950000000000002, | |
| "loss": 1.0899, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.005256653301346038, | |
| "grad_norm": 0.37473002076148987, | |
| "learning_rate": 0.00023449999999999998, | |
| "loss": 1.0982, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.0053684969886087204, | |
| "grad_norm": 0.35591790080070496, | |
| "learning_rate": 0.0002395, | |
| "loss": 1.1005, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.0054803406758714025, | |
| "grad_norm": 0.3825643062591553, | |
| "learning_rate": 0.0002445, | |
| "loss": 1.0896, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.005592184363134084, | |
| "grad_norm": 0.3784261643886566, | |
| "learning_rate": 0.0002495, | |
| "loss": 1.1039, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.005704028050396766, | |
| "grad_norm": 0.35387158393859863, | |
| "learning_rate": 0.0002545, | |
| "loss": 1.1038, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.005815871737659447, | |
| "grad_norm": 0.3992142975330353, | |
| "learning_rate": 0.0002595, | |
| "loss": 1.088, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.005927715424922129, | |
| "grad_norm": 0.36795270442962646, | |
| "learning_rate": 0.00026450000000000003, | |
| "loss": 1.0888, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.006039559112184811, | |
| "grad_norm": 0.4007701575756073, | |
| "learning_rate": 0.00026950000000000005, | |
| "loss": 1.0838, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.006151402799447492, | |
| "grad_norm": 0.34527722001075745, | |
| "learning_rate": 0.0002745, | |
| "loss": 1.0892, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.006263246486710174, | |
| "grad_norm": 0.37232115864753723, | |
| "learning_rate": 0.0002795, | |
| "loss": 1.0939, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.006375090173972855, | |
| "grad_norm": 0.4048405885696411, | |
| "learning_rate": 0.0002845, | |
| "loss": 1.0863, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.006486933861235537, | |
| "grad_norm": 0.37317511439323425, | |
| "learning_rate": 0.0002895, | |
| "loss": 1.0711, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.006598777548498219, | |
| "grad_norm": 0.38564008474349976, | |
| "learning_rate": 0.0002945, | |
| "loss": 1.091, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.0067106212357609, | |
| "grad_norm": 0.3639361262321472, | |
| "learning_rate": 0.0002995, | |
| "loss": 1.0682, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.006822464923023582, | |
| "grad_norm": 0.35907182097435, | |
| "learning_rate": 0.0003045, | |
| "loss": 1.0755, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.0069343086102862635, | |
| "grad_norm": 0.35199785232543945, | |
| "learning_rate": 0.0003095, | |
| "loss": 1.0581, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.0070461522975489455, | |
| "grad_norm": 0.35156381130218506, | |
| "learning_rate": 0.0003145, | |
| "loss": 1.0651, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.0071579959848116275, | |
| "grad_norm": 0.3742520213127136, | |
| "learning_rate": 0.0003195, | |
| "loss": 1.0555, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.007269839672074309, | |
| "grad_norm": 0.3587191700935364, | |
| "learning_rate": 0.00032450000000000003, | |
| "loss": 1.0548, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.007381683359336991, | |
| "grad_norm": 0.37587791681289673, | |
| "learning_rate": 0.00032950000000000004, | |
| "loss": 1.0437, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.007493527046599672, | |
| "grad_norm": 0.3410298526287079, | |
| "learning_rate": 0.00033450000000000005, | |
| "loss": 1.0426, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.007605370733862354, | |
| "grad_norm": 0.3450978696346283, | |
| "learning_rate": 0.0003395, | |
| "loss": 1.0487, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.007717214421125036, | |
| "grad_norm": 0.3445068299770355, | |
| "learning_rate": 0.00034449999999999997, | |
| "loss": 1.0411, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.007829058108387717, | |
| "grad_norm": 0.34611567854881287, | |
| "learning_rate": 0.0003495, | |
| "loss": 1.0404, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.007940901795650398, | |
| "grad_norm": 0.3339330852031708, | |
| "learning_rate": 0.0003545, | |
| "loss": 1.0361, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.008052745482913081, | |
| "grad_norm": 0.33232080936431885, | |
| "learning_rate": 0.0003595, | |
| "loss": 1.0271, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.008164589170175762, | |
| "grad_norm": 0.33050498366355896, | |
| "learning_rate": 0.0003645, | |
| "loss": 1.0316, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.008276432857438443, | |
| "grad_norm": 0.3449972867965698, | |
| "learning_rate": 0.0003695, | |
| "loss": 1.0426, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.008388276544701126, | |
| "grad_norm": 0.3543892502784729, | |
| "learning_rate": 0.0003745, | |
| "loss": 1.0475, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.008500120231963807, | |
| "grad_norm": 0.3447831869125366, | |
| "learning_rate": 0.0003795, | |
| "loss": 1.0482, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.008611963919226489, | |
| "grad_norm": 0.33845630288124084, | |
| "learning_rate": 0.0003845, | |
| "loss": 1.0533, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.008723807606489171, | |
| "grad_norm": 0.3394622802734375, | |
| "learning_rate": 0.00038950000000000003, | |
| "loss": 1.0803, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.008835651293751853, | |
| "grad_norm": 0.33649975061416626, | |
| "learning_rate": 0.00039450000000000005, | |
| "loss": 1.0461, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.008947494981014534, | |
| "grad_norm": 0.3265191912651062, | |
| "learning_rate": 0.0003995, | |
| "loss": 1.0714, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.009059338668277215, | |
| "grad_norm": 0.34960776567459106, | |
| "learning_rate": 0.0004045, | |
| "loss": 1.0542, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.009171182355539898, | |
| "grad_norm": 0.3353814482688904, | |
| "learning_rate": 0.0004095, | |
| "loss": 1.0625, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.009283026042802579, | |
| "grad_norm": 0.3499109148979187, | |
| "learning_rate": 0.0004145, | |
| "loss": 1.0679, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.00939486973006526, | |
| "grad_norm": 0.33906084299087524, | |
| "learning_rate": 0.0004195, | |
| "loss": 1.0659, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.009506713417327943, | |
| "grad_norm": 0.3245256543159485, | |
| "learning_rate": 0.0004245, | |
| "loss": 1.078, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.009618557104590624, | |
| "grad_norm": 0.3364386260509491, | |
| "learning_rate": 0.0004295, | |
| "loss": 1.0771, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.009730400791853305, | |
| "grad_norm": 0.348718523979187, | |
| "learning_rate": 0.0004345, | |
| "loss": 1.0751, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.009842244479115988, | |
| "grad_norm": 0.31124839186668396, | |
| "learning_rate": 0.0004395, | |
| "loss": 1.0693, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.00995408816637867, | |
| "grad_norm": 0.3478352129459381, | |
| "learning_rate": 0.0004445, | |
| "loss": 1.0682, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.01006593185364135, | |
| "grad_norm": 0.31189802289009094, | |
| "learning_rate": 0.00044950000000000003, | |
| "loss": 1.0608, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.010177775540904033, | |
| "grad_norm": 0.34715884923934937, | |
| "learning_rate": 0.00045450000000000004, | |
| "loss": 1.0698, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.010289619228166715, | |
| "grad_norm": 0.3279336988925934, | |
| "learning_rate": 0.00045950000000000006, | |
| "loss": 1.0728, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.010401462915429396, | |
| "grad_norm": 0.32010868191719055, | |
| "learning_rate": 0.0004645, | |
| "loss": 1.0765, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.010513306602692077, | |
| "grad_norm": 0.3618028163909912, | |
| "learning_rate": 0.0004695, | |
| "loss": 1.0815, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.01062515028995476, | |
| "grad_norm": 0.3403186798095703, | |
| "learning_rate": 0.0004745, | |
| "loss": 1.0713, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.010736993977217441, | |
| "grad_norm": 0.347687691450119, | |
| "learning_rate": 0.0004795, | |
| "loss": 1.0844, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.010848837664480122, | |
| "grad_norm": 0.3537987768650055, | |
| "learning_rate": 0.0004845, | |
| "loss": 1.0762, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.010960681351742805, | |
| "grad_norm": 0.42015892267227173, | |
| "learning_rate": 0.0004895, | |
| "loss": 1.0832, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.011072525039005486, | |
| "grad_norm": 0.35781368613243103, | |
| "learning_rate": 0.0004945, | |
| "loss": 1.0606, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.011184368726268167, | |
| "grad_norm": 0.3361358344554901, | |
| "learning_rate": 0.0004995, | |
| "loss": 1.0717, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.01129621241353085, | |
| "grad_norm": 0.36569204926490784, | |
| "learning_rate": 0.0004997944172872219, | |
| "loss": 1.0602, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.011408056100793531, | |
| "grad_norm": 0.31979477405548096, | |
| "learning_rate": 0.0004995659920508017, | |
| "loss": 1.0531, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.011519899788056212, | |
| "grad_norm": 0.3295707404613495, | |
| "learning_rate": 0.0004993375668143817, | |
| "loss": 1.0346, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.011631743475318894, | |
| "grad_norm": 0.3207838833332062, | |
| "learning_rate": 0.0004991091415779616, | |
| "loss": 1.059, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.011743587162581576, | |
| "grad_norm": 0.33032119274139404, | |
| "learning_rate": 0.0004988807163415415, | |
| "loss": 1.0573, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.011855430849844258, | |
| "grad_norm": 0.3566173017024994, | |
| "learning_rate": 0.0004986522911051213, | |
| "loss": 1.0501, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.011967274537106939, | |
| "grad_norm": 0.31658655405044556, | |
| "learning_rate": 0.0004984238658687012, | |
| "loss": 1.0706, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.012079118224369622, | |
| "grad_norm": 0.3438680171966553, | |
| "learning_rate": 0.0004981954406322811, | |
| "loss": 1.0765, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.012190961911632303, | |
| "grad_norm": 0.3130144774913788, | |
| "learning_rate": 0.0004979670153958609, | |
| "loss": 1.0588, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.012302805598894984, | |
| "grad_norm": 0.31765422224998474, | |
| "learning_rate": 0.0004977385901594408, | |
| "loss": 1.0703, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.012414649286157667, | |
| "grad_norm": 0.36112868785858154, | |
| "learning_rate": 0.0004975101649230207, | |
| "loss": 1.0642, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.012526492973420348, | |
| "grad_norm": 0.33418065309524536, | |
| "learning_rate": 0.0004972817396866005, | |
| "loss": 1.0572, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.01263833666068303, | |
| "grad_norm": 0.34439629316329956, | |
| "learning_rate": 0.0004970533144501805, | |
| "loss": 1.0473, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.01275018034794571, | |
| "grad_norm": 0.32954639196395874, | |
| "learning_rate": 0.0004968248892137603, | |
| "loss": 1.054, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.012862024035208393, | |
| "grad_norm": 0.3351511061191559, | |
| "learning_rate": 0.0004965964639773402, | |
| "loss": 1.0444, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.012973867722471074, | |
| "grad_norm": 0.3065156638622284, | |
| "learning_rate": 0.0004963680387409202, | |
| "loss": 1.0546, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.013085711409733755, | |
| "grad_norm": 0.36450672149658203, | |
| "learning_rate": 0.0004961396135045, | |
| "loss": 1.0501, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.013197555096996438, | |
| "grad_norm": 0.3020591735839844, | |
| "learning_rate": 0.0004959111882680799, | |
| "loss": 1.052, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.01330939878425912, | |
| "grad_norm": 0.3097701966762543, | |
| "learning_rate": 0.0004956827630316598, | |
| "loss": 1.0695, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.0134212424715218, | |
| "grad_norm": 0.3410932719707489, | |
| "learning_rate": 0.0004954543377952396, | |
| "loss": 1.0692, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.013533086158784484, | |
| "grad_norm": 0.38478952646255493, | |
| "learning_rate": 0.0004952259125588195, | |
| "loss": 1.0592, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.013644929846047165, | |
| "grad_norm": 0.3737089931964874, | |
| "learning_rate": 0.0004949974873223994, | |
| "loss": 1.0808, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.013756773533309846, | |
| "grad_norm": 0.3264448940753937, | |
| "learning_rate": 0.0004947690620859793, | |
| "loss": 1.0759, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.013868617220572527, | |
| "grad_norm": 0.3922732472419739, | |
| "learning_rate": 0.0004945406368495591, | |
| "loss": 1.0634, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.01398046090783521, | |
| "grad_norm": 0.36068034172058105, | |
| "learning_rate": 0.000494312211613139, | |
| "loss": 1.0683, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.014092304595097891, | |
| "grad_norm": 0.3544798791408539, | |
| "learning_rate": 0.0004940837863767189, | |
| "loss": 1.0687, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.014204148282360572, | |
| "grad_norm": 0.31447795033454895, | |
| "learning_rate": 0.0004938553611402987, | |
| "loss": 1.0549, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.014315991969623255, | |
| "grad_norm": 0.37639158964157104, | |
| "learning_rate": 0.0004936269359038786, | |
| "loss": 1.0698, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.014427835656885936, | |
| "grad_norm": 0.32416418194770813, | |
| "learning_rate": 0.0004933985106674586, | |
| "loss": 1.0617, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.014539679344148617, | |
| "grad_norm": 0.3122979998588562, | |
| "learning_rate": 0.0004931700854310385, | |
| "loss": 1.0553, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.0146515230314113, | |
| "grad_norm": 0.3574884533882141, | |
| "learning_rate": 0.0004929416601946184, | |
| "loss": 1.0598, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.014763366718673981, | |
| "grad_norm": 0.30762428045272827, | |
| "learning_rate": 0.0004927132349581982, | |
| "loss": 1.0642, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.014875210405936663, | |
| "grad_norm": 0.34350454807281494, | |
| "learning_rate": 0.0004924848097217781, | |
| "loss": 1.0663, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.014987054093199344, | |
| "grad_norm": 0.33486828207969666, | |
| "learning_rate": 0.000492256384485358, | |
| "loss": 1.0479, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.015098897780462027, | |
| "grad_norm": 0.3025324046611786, | |
| "learning_rate": 0.0004920279592489378, | |
| "loss": 1.0705, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.015210741467724708, | |
| "grad_norm": 0.35260385274887085, | |
| "learning_rate": 0.0004917995340125177, | |
| "loss": 1.0762, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.015322585154987389, | |
| "grad_norm": 0.3188925087451935, | |
| "learning_rate": 0.0004915711087760976, | |
| "loss": 1.069, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.015434428842250072, | |
| "grad_norm": 0.332660436630249, | |
| "learning_rate": 0.0004913426835396775, | |
| "loss": 1.0749, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.015546272529512753, | |
| "grad_norm": 0.31745171546936035, | |
| "learning_rate": 0.0004911142583032573, | |
| "loss": 1.0811, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.015658116216775434, | |
| "grad_norm": 0.3237819969654083, | |
| "learning_rate": 0.0004908858330668372, | |
| "loss": 1.0634, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.015769959904038115, | |
| "grad_norm": 0.3300880789756775, | |
| "learning_rate": 0.0004906574078304171, | |
| "loss": 1.0554, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.015881803591300796, | |
| "grad_norm": 0.32475635409355164, | |
| "learning_rate": 0.0004904289825939969, | |
| "loss": 1.0598, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.01599364727856348, | |
| "grad_norm": 0.31278952956199646, | |
| "learning_rate": 0.0004902005573575769, | |
| "loss": 1.0498, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.016105490965826162, | |
| "grad_norm": 0.308680921792984, | |
| "learning_rate": 0.0004899721321211568, | |
| "loss": 1.0586, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.016217334653088843, | |
| "grad_norm": 0.34637314081192017, | |
| "learning_rate": 0.0004897437068847367, | |
| "loss": 1.0535, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.016329178340351524, | |
| "grad_norm": 0.3220643401145935, | |
| "learning_rate": 0.0004895152816483165, | |
| "loss": 1.0624, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.016441022027614206, | |
| "grad_norm": 0.31472912430763245, | |
| "learning_rate": 0.0004892868564118964, | |
| "loss": 1.0748, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.016552865714876887, | |
| "grad_norm": 0.3416632115840912, | |
| "learning_rate": 0.0004890584311754763, | |
| "loss": 1.0715, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.01666470940213957, | |
| "grad_norm": 0.3463667631149292, | |
| "learning_rate": 0.0004888300059390561, | |
| "loss": 1.0914, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.016776553089402253, | |
| "grad_norm": 0.3322199881076813, | |
| "learning_rate": 0.000488601580702636, | |
| "loss": 1.0707, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.016888396776664934, | |
| "grad_norm": 0.3899800479412079, | |
| "learning_rate": 0.0004883731554662159, | |
| "loss": 1.0883, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.017000240463927615, | |
| "grad_norm": 0.3409605324268341, | |
| "learning_rate": 0.0004881447302297958, | |
| "loss": 1.0982, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.017112084151190296, | |
| "grad_norm": 0.3720357120037079, | |
| "learning_rate": 0.0004879163049933757, | |
| "loss": 1.0674, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.017223927838452977, | |
| "grad_norm": 0.326050728559494, | |
| "learning_rate": 0.00048768787975695554, | |
| "loss": 1.0764, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.01733577152571566, | |
| "grad_norm": 0.3238283395767212, | |
| "learning_rate": 0.0004874594545205354, | |
| "loss": 1.0547, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.017447615212978343, | |
| "grad_norm": 0.3324073553085327, | |
| "learning_rate": 0.00048723102928411536, | |
| "loss": 1.0608, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.017559458900241024, | |
| "grad_norm": 0.3382217586040497, | |
| "learning_rate": 0.0004870026040476952, | |
| "loss": 1.0505, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.017671302587503705, | |
| "grad_norm": 0.3409116566181183, | |
| "learning_rate": 0.00048677417881127507, | |
| "loss": 1.0673, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.017783146274766386, | |
| "grad_norm": 0.3123399019241333, | |
| "learning_rate": 0.000486545753574855, | |
| "loss": 1.0461, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.017894989962029068, | |
| "grad_norm": 0.3178008198738098, | |
| "learning_rate": 0.00048631732833843484, | |
| "loss": 1.0526, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.01800683364929175, | |
| "grad_norm": 0.37002459168434143, | |
| "learning_rate": 0.0004860889031020147, | |
| "loss": 1.0483, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.01811867733655443, | |
| "grad_norm": 0.31036287546157837, | |
| "learning_rate": 0.0004858604778655946, | |
| "loss": 1.0418, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.018230521023817114, | |
| "grad_norm": 0.3027215600013733, | |
| "learning_rate": 0.00048563205262917446, | |
| "loss": 1.0467, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.018342364711079796, | |
| "grad_norm": 0.32144612073898315, | |
| "learning_rate": 0.00048540362739275437, | |
| "loss": 1.0437, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.018454208398342477, | |
| "grad_norm": 0.3156447410583496, | |
| "learning_rate": 0.0004851752021563343, | |
| "loss": 1.0447, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.018566052085605158, | |
| "grad_norm": 0.3228546380996704, | |
| "learning_rate": 0.00048494677691991413, | |
| "loss": 1.056, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.01867789577286784, | |
| "grad_norm": 0.3478510081768036, | |
| "learning_rate": 0.000484718351683494, | |
| "loss": 1.0523, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.01878973946013052, | |
| "grad_norm": 0.3413507342338562, | |
| "learning_rate": 0.0004844899264470739, | |
| "loss": 1.049, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.018901583147393205, | |
| "grad_norm": 0.3277221918106079, | |
| "learning_rate": 0.00048426150121065375, | |
| "loss": 1.0403, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.019013426834655886, | |
| "grad_norm": 0.3044646382331848, | |
| "learning_rate": 0.0004840330759742336, | |
| "loss": 1.0518, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.019125270521918567, | |
| "grad_norm": 0.31599846482276917, | |
| "learning_rate": 0.0004838046507378135, | |
| "loss": 1.0475, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.01923711420918125, | |
| "grad_norm": 0.346741646528244, | |
| "learning_rate": 0.00048357622550139343, | |
| "loss": 1.0515, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.01934895789644393, | |
| "grad_norm": 0.32756108045578003, | |
| "learning_rate": 0.0004833478002649733, | |
| "loss": 1.054, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.01946080158370661, | |
| "grad_norm": 0.3318345546722412, | |
| "learning_rate": 0.0004831193750285532, | |
| "loss": 1.0575, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.019572645270969292, | |
| "grad_norm": 0.3389560282230377, | |
| "learning_rate": 0.00048289094979213305, | |
| "loss": 1.0576, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.019684488958231976, | |
| "grad_norm": 0.31532642245292664, | |
| "learning_rate": 0.0004826625245557129, | |
| "loss": 1.0554, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.019796332645494658, | |
| "grad_norm": 0.3263496160507202, | |
| "learning_rate": 0.0004824340993192928, | |
| "loss": 1.0697, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.01990817633275734, | |
| "grad_norm": 0.328225314617157, | |
| "learning_rate": 0.00048220567408287267, | |
| "loss": 1.0584, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.02002002002002002, | |
| "grad_norm": 0.3030998706817627, | |
| "learning_rate": 0.00048197724884645253, | |
| "loss": 1.0555, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.0201318637072827, | |
| "grad_norm": 0.32594701647758484, | |
| "learning_rate": 0.0004817488236100325, | |
| "loss": 1.0512, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.020243707394545382, | |
| "grad_norm": 0.2882954776287079, | |
| "learning_rate": 0.00048152039837361235, | |
| "loss": 1.0441, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.020355551081808067, | |
| "grad_norm": 0.33917129039764404, | |
| "learning_rate": 0.0004812919731371922, | |
| "loss": 1.048, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.020467394769070748, | |
| "grad_norm": 0.32748523354530334, | |
| "learning_rate": 0.0004810635479007721, | |
| "loss": 1.042, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.02057923845633343, | |
| "grad_norm": 0.32332462072372437, | |
| "learning_rate": 0.00048083512266435197, | |
| "loss": 1.0396, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.02069108214359611, | |
| "grad_norm": 0.36977729201316833, | |
| "learning_rate": 0.0004806066974279318, | |
| "loss": 1.0337, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.02080292583085879, | |
| "grad_norm": 0.33298948407173157, | |
| "learning_rate": 0.00048037827219151174, | |
| "loss": 1.045, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.020914769518121473, | |
| "grad_norm": 0.328861802816391, | |
| "learning_rate": 0.00048014984695509165, | |
| "loss": 1.053, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.021026613205384154, | |
| "grad_norm": 0.3438888490200043, | |
| "learning_rate": 0.0004799214217186715, | |
| "loss": 1.0385, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.02113845689264684, | |
| "grad_norm": 0.3251883387565613, | |
| "learning_rate": 0.00047969299648225136, | |
| "loss": 1.0436, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.02125030057990952, | |
| "grad_norm": 0.3300330340862274, | |
| "learning_rate": 0.00047946457124583127, | |
| "loss": 1.0627, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.0213621442671722, | |
| "grad_norm": 0.31774377822875977, | |
| "learning_rate": 0.0004792361460094111, | |
| "loss": 1.0491, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.021473987954434882, | |
| "grad_norm": 0.36171990633010864, | |
| "learning_rate": 0.000479007720772991, | |
| "loss": 1.0536, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.021585831641697563, | |
| "grad_norm": 0.33032888174057007, | |
| "learning_rate": 0.0004787792955365709, | |
| "loss": 1.0327, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.021697675328960244, | |
| "grad_norm": 0.34056538343429565, | |
| "learning_rate": 0.00047855087030015074, | |
| "loss": 1.0354, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.021809519016222925, | |
| "grad_norm": 0.31768256425857544, | |
| "learning_rate": 0.00047832244506373065, | |
| "loss": 1.0278, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.02192136270348561, | |
| "grad_norm": 0.33165955543518066, | |
| "learning_rate": 0.00047809401982731056, | |
| "loss": 1.057, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.02203320639074829, | |
| "grad_norm": 0.34456339478492737, | |
| "learning_rate": 0.0004778655945908904, | |
| "loss": 1.0465, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.022145050078010972, | |
| "grad_norm": 0.35331544280052185, | |
| "learning_rate": 0.0004776371693544703, | |
| "loss": 1.0509, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.022256893765273653, | |
| "grad_norm": 0.3497447669506073, | |
| "learning_rate": 0.0004774087441180502, | |
| "loss": 1.0579, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.022368737452536334, | |
| "grad_norm": 0.31631171703338623, | |
| "learning_rate": 0.00047718031888163004, | |
| "loss": 1.0747, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.022480581139799016, | |
| "grad_norm": 0.34811535477638245, | |
| "learning_rate": 0.0004769518936452099, | |
| "loss": 1.0443, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.0225924248270617, | |
| "grad_norm": 0.350975900888443, | |
| "learning_rate": 0.0004767234684087898, | |
| "loss": 1.0721, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.02270426851432438, | |
| "grad_norm": 0.38026875257492065, | |
| "learning_rate": 0.0004764950431723697, | |
| "loss": 1.0502, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.022816112201587063, | |
| "grad_norm": 0.3079335391521454, | |
| "learning_rate": 0.00047626661793594957, | |
| "loss": 1.0325, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.022927955888849744, | |
| "grad_norm": 0.3412174582481384, | |
| "learning_rate": 0.0004760381926995295, | |
| "loss": 1.026, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.023039799576112425, | |
| "grad_norm": 0.31905752420425415, | |
| "learning_rate": 0.00047580976746310934, | |
| "loss": 1.033, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.023151643263375106, | |
| "grad_norm": 0.3110033869743347, | |
| "learning_rate": 0.0004755813422266892, | |
| "loss": 1.026, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.023263486950637787, | |
| "grad_norm": 0.3087383210659027, | |
| "learning_rate": 0.0004753529169902691, | |
| "loss": 1.0285, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.023375330637900472, | |
| "grad_norm": 0.310497522354126, | |
| "learning_rate": 0.00047512449175384896, | |
| "loss": 1.012, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.023487174325163153, | |
| "grad_norm": 0.35822993516921997, | |
| "learning_rate": 0.0004748960665174288, | |
| "loss": 1.0124, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.023599018012425834, | |
| "grad_norm": 0.3355759084224701, | |
| "learning_rate": 0.0004746676412810088, | |
| "loss": 1.0159, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.023710861699688515, | |
| "grad_norm": 0.29633432626724243, | |
| "learning_rate": 0.00047443921604458863, | |
| "loss": 1.0068, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.023822705386951196, | |
| "grad_norm": 0.3268597424030304, | |
| "learning_rate": 0.0004742107908081685, | |
| "loss": 1.0029, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.023934549074213878, | |
| "grad_norm": 0.32010769844055176, | |
| "learning_rate": 0.0004739823655717484, | |
| "loss": 1.0081, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.02404639276147656, | |
| "grad_norm": 0.30638498067855835, | |
| "learning_rate": 0.00047375394033532826, | |
| "loss": 0.9955, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.024158236448739243, | |
| "grad_norm": 0.32299259305000305, | |
| "learning_rate": 0.0004735255150989081, | |
| "loss": 1.0028, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.024270080136001924, | |
| "grad_norm": 0.30714213848114014, | |
| "learning_rate": 0.000473297089862488, | |
| "loss": 1.0163, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.024381923823264606, | |
| "grad_norm": 0.3207940459251404, | |
| "learning_rate": 0.0004730686646260679, | |
| "loss": 1.0053, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.024493767510527287, | |
| "grad_norm": 0.3073663115501404, | |
| "learning_rate": 0.0004728402393896478, | |
| "loss": 1.0007, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.024605611197789968, | |
| "grad_norm": 0.3209913671016693, | |
| "learning_rate": 0.0004726118141532277, | |
| "loss": 1.0065, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.02471745488505265, | |
| "grad_norm": 0.2987804114818573, | |
| "learning_rate": 0.00047238338891680755, | |
| "loss": 1.0015, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.024829298572315334, | |
| "grad_norm": 0.31511807441711426, | |
| "learning_rate": 0.0004721549636803874, | |
| "loss": 0.9892, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.024941142259578015, | |
| "grad_norm": 0.2840864956378937, | |
| "learning_rate": 0.0004719265384439673, | |
| "loss": 1.0084, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.025052985946840696, | |
| "grad_norm": 0.3094743490219116, | |
| "learning_rate": 0.0004716981132075472, | |
| "loss": 1.0169, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.025164829634103377, | |
| "grad_norm": 0.2905067205429077, | |
| "learning_rate": 0.00047146968797112703, | |
| "loss": 0.9991, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.02527667332136606, | |
| "grad_norm": 0.31322264671325684, | |
| "learning_rate": 0.00047124126273470694, | |
| "loss": 1.0169, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.02538851700862874, | |
| "grad_norm": 0.29053428769111633, | |
| "learning_rate": 0.00047101283749828685, | |
| "loss": 0.9942, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.02550036069589142, | |
| "grad_norm": 0.2863853871822357, | |
| "learning_rate": 0.0004707844122618667, | |
| "loss": 1.002, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.025612204383154105, | |
| "grad_norm": 0.3087761104106903, | |
| "learning_rate": 0.0004705559870254466, | |
| "loss": 1.0025, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.025724048070416786, | |
| "grad_norm": 0.3308629095554352, | |
| "learning_rate": 0.00047032756178902647, | |
| "loss": 1.0078, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.025835891757679467, | |
| "grad_norm": 0.29703134298324585, | |
| "learning_rate": 0.0004700991365526063, | |
| "loss": 1.006, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.02594773544494215, | |
| "grad_norm": 0.27238258719444275, | |
| "learning_rate": 0.0004698707113161862, | |
| "loss": 0.9963, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.02605957913220483, | |
| "grad_norm": 0.2795617878437042, | |
| "learning_rate": 0.0004696422860797661, | |
| "loss": 0.9876, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.02617142281946751, | |
| "grad_norm": 0.2989327013492584, | |
| "learning_rate": 0.000469413860843346, | |
| "loss": 0.9864, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.026283266506730196, | |
| "grad_norm": 0.3229614794254303, | |
| "learning_rate": 0.00046918543560692586, | |
| "loss": 0.9849, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.026395110193992877, | |
| "grad_norm": 0.2921406328678131, | |
| "learning_rate": 0.00046895701037050577, | |
| "loss": 0.9764, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.026506953881255558, | |
| "grad_norm": 0.2955220639705658, | |
| "learning_rate": 0.0004687285851340856, | |
| "loss": 0.9883, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.02661879756851824, | |
| "grad_norm": 0.31378960609436035, | |
| "learning_rate": 0.0004685001598976655, | |
| "loss": 0.9978, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.02673064125578092, | |
| "grad_norm": 0.30504587292671204, | |
| "learning_rate": 0.0004682717346612454, | |
| "loss": 0.9912, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.0268424849430436, | |
| "grad_norm": 0.3066459000110626, | |
| "learning_rate": 0.00046804330942482524, | |
| "loss": 0.9877, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.026954328630306282, | |
| "grad_norm": 0.3198714256286621, | |
| "learning_rate": 0.0004678148841884051, | |
| "loss": 0.98, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.027066172317568967, | |
| "grad_norm": 0.27119094133377075, | |
| "learning_rate": 0.00046758645895198506, | |
| "loss": 1.001, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.027178016004831648, | |
| "grad_norm": 0.28178098797798157, | |
| "learning_rate": 0.0004673580337155649, | |
| "loss": 0.9605, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.02728985969209433, | |
| "grad_norm": 0.29373088479042053, | |
| "learning_rate": 0.0004671296084791448, | |
| "loss": 0.9834, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.02740170337935701, | |
| "grad_norm": 0.2861827313899994, | |
| "learning_rate": 0.0004669011832427247, | |
| "loss": 0.9797, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.02751354706661969, | |
| "grad_norm": 0.3488409221172333, | |
| "learning_rate": 0.00046667275800630454, | |
| "loss": 0.9682, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.027625390753882373, | |
| "grad_norm": 0.29631665349006653, | |
| "learning_rate": 0.0004664443327698844, | |
| "loss": 0.9751, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.027737234441145054, | |
| "grad_norm": 0.27299416065216064, | |
| "learning_rate": 0.0004662159075334643, | |
| "loss": 0.9571, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.02784907812840774, | |
| "grad_norm": 0.30409684777259827, | |
| "learning_rate": 0.00046598748229704416, | |
| "loss": 0.968, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.02796092181567042, | |
| "grad_norm": 0.2957991063594818, | |
| "learning_rate": 0.00046575905706062407, | |
| "loss": 0.9814, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.0280727655029331, | |
| "grad_norm": 0.28328225016593933, | |
| "learning_rate": 0.000465530631824204, | |
| "loss": 0.9816, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.028184609190195782, | |
| "grad_norm": 0.40670067071914673, | |
| "learning_rate": 0.00046530220658778384, | |
| "loss": 0.9737, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.028296452877458463, | |
| "grad_norm": 0.2818649411201477, | |
| "learning_rate": 0.0004650737813513637, | |
| "loss": 0.9891, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.028408296564721144, | |
| "grad_norm": 0.3054118752479553, | |
| "learning_rate": 0.0004648453561149436, | |
| "loss": 0.9976, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.02852014025198383, | |
| "grad_norm": 0.31439468264579773, | |
| "learning_rate": 0.00046461693087852346, | |
| "loss": 0.9928, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.02863198393924651, | |
| "grad_norm": 0.3173445761203766, | |
| "learning_rate": 0.0004643885056421033, | |
| "loss": 1.0002, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.02874382762650919, | |
| "grad_norm": 0.32495757937431335, | |
| "learning_rate": 0.0004641600804056832, | |
| "loss": 0.9981, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.028855671313771872, | |
| "grad_norm": 0.35957351326942444, | |
| "learning_rate": 0.00046393165516926313, | |
| "loss": 1.0112, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.028967515001034554, | |
| "grad_norm": 0.3070557713508606, | |
| "learning_rate": 0.000463703229932843, | |
| "loss": 1.0047, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.029079358688297235, | |
| "grad_norm": 0.3227770924568176, | |
| "learning_rate": 0.0004634748046964229, | |
| "loss": 1.0115, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.029191202375559916, | |
| "grad_norm": 0.34345880150794983, | |
| "learning_rate": 0.00046324637946000276, | |
| "loss": 0.9984, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.0293030460628226, | |
| "grad_norm": 0.34459254145622253, | |
| "learning_rate": 0.0004630179542235826, | |
| "loss": 0.9965, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.02941488975008528, | |
| "grad_norm": 0.3396269679069519, | |
| "learning_rate": 0.0004627895289871625, | |
| "loss": 0.9986, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.029526733437347963, | |
| "grad_norm": 0.3370846211910248, | |
| "learning_rate": 0.0004625611037507424, | |
| "loss": 0.9987, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.029638577124610644, | |
| "grad_norm": 0.30689191818237305, | |
| "learning_rate": 0.00046233267851432223, | |
| "loss": 1.0081, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.029750420811873325, | |
| "grad_norm": 0.35536935925483704, | |
| "learning_rate": 0.0004621042532779022, | |
| "loss": 0.9948, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.029862264499136006, | |
| "grad_norm": 0.3295105993747711, | |
| "learning_rate": 0.00046187582804148205, | |
| "loss": 1.0115, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.029974108186398687, | |
| "grad_norm": 0.34881895780563354, | |
| "learning_rate": 0.0004616474028050619, | |
| "loss": 1.0024, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.030085951873661372, | |
| "grad_norm": 0.379261314868927, | |
| "learning_rate": 0.0004614189775686418, | |
| "loss": 0.9965, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.030197795560924053, | |
| "grad_norm": 0.34729093313217163, | |
| "learning_rate": 0.0004611905523322217, | |
| "loss": 1.0026, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.030309639248186734, | |
| "grad_norm": 0.34687525033950806, | |
| "learning_rate": 0.00046096212709580153, | |
| "loss": 0.9992, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.030421482935449416, | |
| "grad_norm": 0.3564583659172058, | |
| "learning_rate": 0.00046073370185938144, | |
| "loss": 0.9859, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.030533326622712097, | |
| "grad_norm": 0.3762670159339905, | |
| "learning_rate": 0.0004605052766229613, | |
| "loss": 1.0059, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.030645170309974778, | |
| "grad_norm": 0.3470481038093567, | |
| "learning_rate": 0.0004602768513865412, | |
| "loss": 1.0044, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.030757013997237462, | |
| "grad_norm": 0.3322189450263977, | |
| "learning_rate": 0.0004600484261501211, | |
| "loss": 0.9811, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.030868857684500144, | |
| "grad_norm": 0.3248903751373291, | |
| "learning_rate": 0.00045982000091370097, | |
| "loss": 0.9721, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.030980701371762825, | |
| "grad_norm": 0.32881951332092285, | |
| "learning_rate": 0.0004595915756772808, | |
| "loss": 0.9821, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.031092545059025506, | |
| "grad_norm": 0.35410797595977783, | |
| "learning_rate": 0.0004593631504408607, | |
| "loss": 0.9786, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.031204388746288187, | |
| "grad_norm": 0.3307279050350189, | |
| "learning_rate": 0.0004591347252044406, | |
| "loss": 0.9759, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.03131623243355087, | |
| "grad_norm": 0.3207128643989563, | |
| "learning_rate": 0.00045890629996802045, | |
| "loss": 0.9812, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.03142807612081355, | |
| "grad_norm": 0.3065459728240967, | |
| "learning_rate": 0.0004586778747316003, | |
| "loss": 0.9596, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.03153991980807623, | |
| "grad_norm": 0.3115104138851166, | |
| "learning_rate": 0.00045844944949518027, | |
| "loss": 0.9732, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.031651763495338915, | |
| "grad_norm": 0.3136879801750183, | |
| "learning_rate": 0.0004582210242587601, | |
| "loss": 0.9818, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.03176360718260159, | |
| "grad_norm": 0.3240731656551361, | |
| "learning_rate": 0.00045799259902234, | |
| "loss": 0.9836, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.03187545086986428, | |
| "grad_norm": 0.31390219926834106, | |
| "learning_rate": 0.0004577641737859199, | |
| "loss": 0.9837, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.03198729455712696, | |
| "grad_norm": 0.3056069612503052, | |
| "learning_rate": 0.00045753574854949975, | |
| "loss": 0.995, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.03209913824438964, | |
| "grad_norm": 0.29556363821029663, | |
| "learning_rate": 0.0004573073233130796, | |
| "loss": 1.0018, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.032210981931652324, | |
| "grad_norm": 0.2931666374206543, | |
| "learning_rate": 0.0004570788980766595, | |
| "loss": 1.0124, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.032322825618915, | |
| "grad_norm": 0.31029924750328064, | |
| "learning_rate": 0.0004568504728402394, | |
| "loss": 1.0115, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.03243466930617769, | |
| "grad_norm": 0.3164144456386566, | |
| "learning_rate": 0.0004566220476038193, | |
| "loss": 0.9966, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.032546512993440364, | |
| "grad_norm": 0.31638383865356445, | |
| "learning_rate": 0.0004563936223673992, | |
| "loss": 0.989, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.03265835668070305, | |
| "grad_norm": 0.28559473156929016, | |
| "learning_rate": 0.00045616519713097904, | |
| "loss": 1.0038, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.032770200367965734, | |
| "grad_norm": 0.285154789686203, | |
| "learning_rate": 0.0004559367718945589, | |
| "loss": 1.0009, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.03288204405522841, | |
| "grad_norm": 0.2722555100917816, | |
| "learning_rate": 0.0004557083466581388, | |
| "loss": 0.9977, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.032993887742491096, | |
| "grad_norm": 0.2854909896850586, | |
| "learning_rate": 0.00045547992142171866, | |
| "loss": 0.9996, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.033105731429753774, | |
| "grad_norm": 0.2726607620716095, | |
| "learning_rate": 0.0004552514961852985, | |
| "loss": 0.9925, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.03321757511701646, | |
| "grad_norm": 0.30692654848098755, | |
| "learning_rate": 0.0004550230709488785, | |
| "loss": 0.9776, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.03332941880427914, | |
| "grad_norm": 0.2921067774295807, | |
| "learning_rate": 0.00045479464571245834, | |
| "loss": 0.9831, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.03344126249154182, | |
| "grad_norm": 0.30490297079086304, | |
| "learning_rate": 0.0004545662204760382, | |
| "loss": 0.9835, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.033553106178804505, | |
| "grad_norm": 0.2823980450630188, | |
| "learning_rate": 0.0004543377952396181, | |
| "loss": 0.9859, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.03366494986606718, | |
| "grad_norm": 0.31844133138656616, | |
| "learning_rate": 0.00045410937000319796, | |
| "loss": 1.0007, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.03377679355332987, | |
| "grad_norm": 0.30595019459724426, | |
| "learning_rate": 0.0004538809447667778, | |
| "loss": 1.0069, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.033888637240592545, | |
| "grad_norm": 0.31177419424057007, | |
| "learning_rate": 0.0004536525195303577, | |
| "loss": 1.0068, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.03400048092785523, | |
| "grad_norm": 0.33921870589256287, | |
| "learning_rate": 0.0004534240942939376, | |
| "loss": 1.0116, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.034112324615117914, | |
| "grad_norm": 0.29299408197402954, | |
| "learning_rate": 0.0004531956690575175, | |
| "loss": 1.0014, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.03422416830238059, | |
| "grad_norm": 0.28572002053260803, | |
| "learning_rate": 0.0004529672438210974, | |
| "loss": 0.9976, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.03433601198964328, | |
| "grad_norm": 0.30842283368110657, | |
| "learning_rate": 0.00045273881858467726, | |
| "loss": 0.9994, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.034447855676905954, | |
| "grad_norm": 0.29677408933639526, | |
| "learning_rate": 0.0004525103933482571, | |
| "loss": 1.0055, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.03455969936416864, | |
| "grad_norm": 0.388823926448822, | |
| "learning_rate": 0.000452281968111837, | |
| "loss": 1.0062, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.03467154305143132, | |
| "grad_norm": 0.2956707775592804, | |
| "learning_rate": 0.0004520535428754169, | |
| "loss": 0.9794, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.034783386738694, | |
| "grad_norm": 0.3179475665092468, | |
| "learning_rate": 0.00045182511763899673, | |
| "loss": 0.9831, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.034895230425956686, | |
| "grad_norm": 0.29509803652763367, | |
| "learning_rate": 0.00045159669240257664, | |
| "loss": 0.9851, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.035007074113219364, | |
| "grad_norm": 0.31095758080482483, | |
| "learning_rate": 0.00045136826716615655, | |
| "loss": 0.9852, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.03511891780048205, | |
| "grad_norm": 0.27768880128860474, | |
| "learning_rate": 0.0004511398419297364, | |
| "loss": 0.9741, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.035230761487744726, | |
| "grad_norm": 0.3117106854915619, | |
| "learning_rate": 0.0004509114166933163, | |
| "loss": 0.9987, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.03534260517500741, | |
| "grad_norm": 0.30113616585731506, | |
| "learning_rate": 0.0004506829914568962, | |
| "loss": 0.9855, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.03545444886227009, | |
| "grad_norm": 0.2842777967453003, | |
| "learning_rate": 0.00045045456622047603, | |
| "loss": 0.9793, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.03556629254953277, | |
| "grad_norm": 0.30115559697151184, | |
| "learning_rate": 0.00045022614098405594, | |
| "loss": 0.9854, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.03567813623679546, | |
| "grad_norm": 0.3350517153739929, | |
| "learning_rate": 0.0004499977157476358, | |
| "loss": 0.9787, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.035789979924058135, | |
| "grad_norm": 0.2736664414405823, | |
| "learning_rate": 0.00044976929051121565, | |
| "loss": 1.0067, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.03590182361132082, | |
| "grad_norm": 0.2868112027645111, | |
| "learning_rate": 0.0004495408652747956, | |
| "loss": 1.0002, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.0360136672985835, | |
| "grad_norm": 0.27296972274780273, | |
| "learning_rate": 0.00044931244003837547, | |
| "loss": 0.9939, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.03612551098584618, | |
| "grad_norm": 0.2894013226032257, | |
| "learning_rate": 0.00044908401480195533, | |
| "loss": 1.0017, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.03623735467310886, | |
| "grad_norm": 0.26549386978149414, | |
| "learning_rate": 0.0004488555895655352, | |
| "loss": 0.9953, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.036349198360371544, | |
| "grad_norm": 0.27381303906440735, | |
| "learning_rate": 0.0004486271643291151, | |
| "loss": 1.0077, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.03646104204763423, | |
| "grad_norm": 0.2829972505569458, | |
| "learning_rate": 0.00044839873909269495, | |
| "loss": 1.0008, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.03657288573489691, | |
| "grad_norm": 0.29023584723472595, | |
| "learning_rate": 0.0004481703138562748, | |
| "loss": 0.9999, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.03668472942215959, | |
| "grad_norm": 0.29526880383491516, | |
| "learning_rate": 0.00044794188861985477, | |
| "loss": 0.9982, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.03679657310942227, | |
| "grad_norm": 0.27724817395210266, | |
| "learning_rate": 0.0004477134633834346, | |
| "loss": 1.0109, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.036908416796684954, | |
| "grad_norm": 0.2780180275440216, | |
| "learning_rate": 0.0004474850381470145, | |
| "loss": 0.997, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.03702026048394764, | |
| "grad_norm": 0.29814234375953674, | |
| "learning_rate": 0.0004472566129105944, | |
| "loss": 1.0056, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.037132104171210316, | |
| "grad_norm": 0.3131207823753357, | |
| "learning_rate": 0.00044702818767417425, | |
| "loss": 0.999, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.037243947858473, | |
| "grad_norm": 0.2865641415119171, | |
| "learning_rate": 0.0004467997624377541, | |
| "loss": 0.9938, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.03735579154573568, | |
| "grad_norm": 0.31247007846832275, | |
| "learning_rate": 0.000446571337201334, | |
| "loss": 1.0029, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.03746763523299836, | |
| "grad_norm": 0.3432846665382385, | |
| "learning_rate": 0.00044634291196491387, | |
| "loss": 0.9861, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.03757947892026104, | |
| "grad_norm": 0.3200684189796448, | |
| "learning_rate": 0.0004461144867284938, | |
| "loss": 0.9958, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.037691322607523725, | |
| "grad_norm": 0.3280775547027588, | |
| "learning_rate": 0.0004458860614920737, | |
| "loss": 0.9972, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 0.03780316629478641, | |
| "grad_norm": 0.3129955232143402, | |
| "learning_rate": 0.00044565763625565354, | |
| "loss": 0.9947, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.03791500998204909, | |
| "grad_norm": 0.27574583888053894, | |
| "learning_rate": 0.0004454292110192334, | |
| "loss": 1.0004, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.03802685366931177, | |
| "grad_norm": 0.3088320791721344, | |
| "learning_rate": 0.0004452007857828133, | |
| "loss": 0.9907, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.03813869735657445, | |
| "grad_norm": 0.3232235908508301, | |
| "learning_rate": 0.00044497236054639316, | |
| "loss": 0.9956, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 0.038250541043837134, | |
| "grad_norm": 0.3009951114654541, | |
| "learning_rate": 0.000444743935309973, | |
| "loss": 0.9899, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.03836238473109981, | |
| "grad_norm": 0.2987104058265686, | |
| "learning_rate": 0.00044451551007355293, | |
| "loss": 0.9852, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 0.0384742284183625, | |
| "grad_norm": 0.2890870273113251, | |
| "learning_rate": 0.00044428708483713284, | |
| "loss": 0.9775, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.03858607210562518, | |
| "grad_norm": 0.2704969048500061, | |
| "learning_rate": 0.0004440586596007127, | |
| "loss": 0.9745, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.03869791579288786, | |
| "grad_norm": 0.3041844964027405, | |
| "learning_rate": 0.0004438302343642926, | |
| "loss": 0.977, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.038809759480150544, | |
| "grad_norm": 0.2794378995895386, | |
| "learning_rate": 0.00044360180912787246, | |
| "loss": 0.9818, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 0.03892160316741322, | |
| "grad_norm": 0.2784910798072815, | |
| "learning_rate": 0.0004433733838914523, | |
| "loss": 0.9655, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.039033446854675906, | |
| "grad_norm": 0.2610478103160858, | |
| "learning_rate": 0.0004431449586550322, | |
| "loss": 0.975, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 0.039145290541938584, | |
| "grad_norm": 0.2646799087524414, | |
| "learning_rate": 0.0004429165334186121, | |
| "loss": 0.9767, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.03925713422920127, | |
| "grad_norm": 0.2622663676738739, | |
| "learning_rate": 0.00044268810818219194, | |
| "loss": 0.98, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.03936897791646395, | |
| "grad_norm": 0.26897987723350525, | |
| "learning_rate": 0.0004424596829457719, | |
| "loss": 0.9718, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 0.03948082160372663, | |
| "grad_norm": 0.29816752672195435, | |
| "learning_rate": 0.00044223125770935176, | |
| "loss": 1.0074, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 0.039592665290989315, | |
| "grad_norm": 0.2652198076248169, | |
| "learning_rate": 0.0004420028324729316, | |
| "loss": 0.9789, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.03970450897825199, | |
| "grad_norm": 0.2648336887359619, | |
| "learning_rate": 0.0004417744072365115, | |
| "loss": 0.9794, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.03981635266551468, | |
| "grad_norm": 0.25409677624702454, | |
| "learning_rate": 0.0004415459820000914, | |
| "loss": 0.9868, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 0.039928196352777355, | |
| "grad_norm": 0.25675469636917114, | |
| "learning_rate": 0.00044131755676367123, | |
| "loss": 0.9827, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 0.04004004004004004, | |
| "grad_norm": 0.2915634214878082, | |
| "learning_rate": 0.00044108913152725114, | |
| "loss": 0.9833, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 0.040151883727302724, | |
| "grad_norm": 0.29538393020629883, | |
| "learning_rate": 0.000440860706290831, | |
| "loss": 0.9848, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 0.0402637274145654, | |
| "grad_norm": 0.3026215732097626, | |
| "learning_rate": 0.0004406322810544109, | |
| "loss": 0.9778, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.04037557110182809, | |
| "grad_norm": 0.30865418910980225, | |
| "learning_rate": 0.0004404038558179908, | |
| "loss": 0.9743, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 0.040487414789090764, | |
| "grad_norm": 0.28092265129089355, | |
| "learning_rate": 0.0004401754305815707, | |
| "loss": 0.9795, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 0.04059925847635345, | |
| "grad_norm": 0.27747923135757446, | |
| "learning_rate": 0.00043994700534515053, | |
| "loss": 0.9642, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 0.040711102163616134, | |
| "grad_norm": 0.28192010521888733, | |
| "learning_rate": 0.00043971858010873044, | |
| "loss": 0.9742, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 0.04082294585087881, | |
| "grad_norm": 0.2670564651489258, | |
| "learning_rate": 0.0004394901548723103, | |
| "loss": 0.9544, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.040934789538141496, | |
| "grad_norm": 0.3089617192745209, | |
| "learning_rate": 0.00043926172963589015, | |
| "loss": 0.9563, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 0.041046633225404174, | |
| "grad_norm": 0.26768213510513306, | |
| "learning_rate": 0.00043903330439947, | |
| "loss": 0.9531, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 0.04115847691266686, | |
| "grad_norm": 0.28865131735801697, | |
| "learning_rate": 0.00043880487916305, | |
| "loss": 0.9579, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 0.041270320599929536, | |
| "grad_norm": 0.27369582653045654, | |
| "learning_rate": 0.00043857645392662983, | |
| "loss": 0.9679, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 0.04138216428719222, | |
| "grad_norm": 0.2889108955860138, | |
| "learning_rate": 0.0004383480286902097, | |
| "loss": 0.9561, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.041494007974454905, | |
| "grad_norm": 0.2701929211616516, | |
| "learning_rate": 0.0004381196034537896, | |
| "loss": 0.9642, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 0.04160585166171758, | |
| "grad_norm": 0.2817586064338684, | |
| "learning_rate": 0.00043789117821736945, | |
| "loss": 0.9701, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.04171769534898027, | |
| "grad_norm": 0.2924664318561554, | |
| "learning_rate": 0.0004376627529809493, | |
| "loss": 0.9617, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 0.041829539036242945, | |
| "grad_norm": 0.28590497374534607, | |
| "learning_rate": 0.0004374343277445292, | |
| "loss": 0.9646, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 0.04194138272350563, | |
| "grad_norm": 0.270046591758728, | |
| "learning_rate": 0.0004372059025081091, | |
| "loss": 0.95, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.04205322641076831, | |
| "grad_norm": 0.2508755326271057, | |
| "learning_rate": 0.000436977477271689, | |
| "loss": 0.9525, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 0.04216507009803099, | |
| "grad_norm": 0.26878127455711365, | |
| "learning_rate": 0.0004367490520352689, | |
| "loss": 0.9609, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 0.04227691378529368, | |
| "grad_norm": 0.26882994174957275, | |
| "learning_rate": 0.00043652062679884875, | |
| "loss": 0.9671, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.042388757472556354, | |
| "grad_norm": 0.28049325942993164, | |
| "learning_rate": 0.0004362922015624286, | |
| "loss": 0.9492, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 0.04250060115981904, | |
| "grad_norm": 0.33502647280693054, | |
| "learning_rate": 0.0004360637763260085, | |
| "loss": 0.9537, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.04261244484708172, | |
| "grad_norm": 0.321997731924057, | |
| "learning_rate": 0.00043583535108958837, | |
| "loss": 0.9646, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 0.0427242885343444, | |
| "grad_norm": 0.29477357864379883, | |
| "learning_rate": 0.0004356069258531682, | |
| "loss": 0.9794, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 0.04283613222160708, | |
| "grad_norm": 0.2989972233772278, | |
| "learning_rate": 0.0004353785006167482, | |
| "loss": 0.9645, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 0.042947975908869764, | |
| "grad_norm": 0.33459851145744324, | |
| "learning_rate": 0.00043515007538032804, | |
| "loss": 0.9556, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 0.04305981959613245, | |
| "grad_norm": 0.2941781282424927, | |
| "learning_rate": 0.0004349216501439079, | |
| "loss": 0.9507, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.043171663283395126, | |
| "grad_norm": 0.27801111340522766, | |
| "learning_rate": 0.0004346932249074878, | |
| "loss": 0.9623, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 0.04328350697065781, | |
| "grad_norm": 0.2765832841396332, | |
| "learning_rate": 0.00043446479967106767, | |
| "loss": 0.9815, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 0.04339535065792049, | |
| "grad_norm": 0.303786039352417, | |
| "learning_rate": 0.0004342363744346475, | |
| "loss": 0.9575, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 0.04350719434518317, | |
| "grad_norm": 0.29517048597335815, | |
| "learning_rate": 0.00043400794919822743, | |
| "loss": 0.9554, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 0.04361903803244585, | |
| "grad_norm": 0.28657206892967224, | |
| "learning_rate": 0.0004337795239618073, | |
| "loss": 0.9631, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.043730881719708535, | |
| "grad_norm": 0.2933245003223419, | |
| "learning_rate": 0.0004335510987253872, | |
| "loss": 0.987, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 0.04384272540697122, | |
| "grad_norm": 0.31331002712249756, | |
| "learning_rate": 0.0004333226734889671, | |
| "loss": 0.971, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 0.0439545690942339, | |
| "grad_norm": 0.32431700825691223, | |
| "learning_rate": 0.00043309424825254696, | |
| "loss": 0.9603, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 0.04406641278149658, | |
| "grad_norm": 0.3346642851829529, | |
| "learning_rate": 0.0004328658230161268, | |
| "loss": 0.9721, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 0.04417825646875926, | |
| "grad_norm": 0.33921241760253906, | |
| "learning_rate": 0.00043263739777970673, | |
| "loss": 0.9639, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.044290100156021944, | |
| "grad_norm": 0.3068247139453888, | |
| "learning_rate": 0.0004324089725432866, | |
| "loss": 0.9756, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 0.04440194384328462, | |
| "grad_norm": 0.3049049973487854, | |
| "learning_rate": 0.00043218054730686644, | |
| "loss": 0.9693, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 0.04451378753054731, | |
| "grad_norm": 0.30104655027389526, | |
| "learning_rate": 0.00043195212207044635, | |
| "loss": 0.9704, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 0.04462563121780999, | |
| "grad_norm": 0.36955609917640686, | |
| "learning_rate": 0.00043172369683402626, | |
| "loss": 0.9527, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 0.04473747490507267, | |
| "grad_norm": 0.318854957818985, | |
| "learning_rate": 0.0004314952715976061, | |
| "loss": 0.9543, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.044849318592335354, | |
| "grad_norm": 0.3166191875934601, | |
| "learning_rate": 0.000431266846361186, | |
| "loss": 0.968, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 0.04496116227959803, | |
| "grad_norm": 0.2976950407028198, | |
| "learning_rate": 0.0004310384211247659, | |
| "loss": 0.9822, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 0.045073005966860716, | |
| "grad_norm": 0.2912284731864929, | |
| "learning_rate": 0.00043080999588834574, | |
| "loss": 0.9759, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 0.0451848496541234, | |
| "grad_norm": 0.31027549505233765, | |
| "learning_rate": 0.00043058157065192565, | |
| "loss": 0.9794, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 0.04529669334138608, | |
| "grad_norm": 0.3182738721370697, | |
| "learning_rate": 0.0004303531454155055, | |
| "loss": 0.9654, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.04540853702864876, | |
| "grad_norm": 0.3006060719490051, | |
| "learning_rate": 0.00043012472017908536, | |
| "loss": 0.9548, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 0.04552038071591144, | |
| "grad_norm": 0.2828291654586792, | |
| "learning_rate": 0.0004298962949426653, | |
| "loss": 0.9611, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 0.045632224403174125, | |
| "grad_norm": 0.30988603830337524, | |
| "learning_rate": 0.0004296678697062452, | |
| "loss": 0.9614, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 0.0457440680904368, | |
| "grad_norm": 0.29344943165779114, | |
| "learning_rate": 0.00042943944446982503, | |
| "loss": 0.9522, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 0.04585591177769949, | |
| "grad_norm": 0.29713529348373413, | |
| "learning_rate": 0.00042921101923340494, | |
| "loss": 0.9468, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.04596775546496217, | |
| "grad_norm": 0.2815961539745331, | |
| "learning_rate": 0.0004289825939969848, | |
| "loss": 0.9546, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 0.04607959915222485, | |
| "grad_norm": 0.25218480825424194, | |
| "learning_rate": 0.00042875416876056465, | |
| "loss": 0.9372, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 0.046191442839487534, | |
| "grad_norm": 0.2735552191734314, | |
| "learning_rate": 0.0004285257435241445, | |
| "loss": 0.942, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 0.04630328652675021, | |
| "grad_norm": 0.27451473474502563, | |
| "learning_rate": 0.0004282973182877245, | |
| "loss": 0.931, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 0.0464151302140129, | |
| "grad_norm": 0.24361196160316467, | |
| "learning_rate": 0.00042806889305130433, | |
| "loss": 0.924, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.046526973901275574, | |
| "grad_norm": 0.25817179679870605, | |
| "learning_rate": 0.0004278404678148842, | |
| "loss": 0.9373, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 0.04663881758853826, | |
| "grad_norm": 0.28722450137138367, | |
| "learning_rate": 0.0004276120425784641, | |
| "loss": 0.9271, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 0.046750661275800943, | |
| "grad_norm": 0.25202882289886475, | |
| "learning_rate": 0.00042738361734204395, | |
| "loss": 0.9187, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 0.04686250496306362, | |
| "grad_norm": 0.2637481391429901, | |
| "learning_rate": 0.0004271551921056238, | |
| "loss": 0.9402, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 0.046974348650326306, | |
| "grad_norm": 0.2684090733528137, | |
| "learning_rate": 0.0004269267668692037, | |
| "loss": 0.9574, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.047086192337588983, | |
| "grad_norm": 0.28711873292922974, | |
| "learning_rate": 0.00042669834163278357, | |
| "loss": 0.9551, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 0.04719803602485167, | |
| "grad_norm": 0.2933102250099182, | |
| "learning_rate": 0.0004264699163963635, | |
| "loss": 0.9457, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 0.047309879712114346, | |
| "grad_norm": 0.2875578701496124, | |
| "learning_rate": 0.0004262414911599434, | |
| "loss": 0.9667, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 0.04742172339937703, | |
| "grad_norm": 0.3007104694843292, | |
| "learning_rate": 0.00042601306592352325, | |
| "loss": 0.9672, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 0.047533567086639715, | |
| "grad_norm": 0.30211201310157776, | |
| "learning_rate": 0.0004257846406871031, | |
| "loss": 0.9781, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.04764541077390239, | |
| "grad_norm": 0.29263827204704285, | |
| "learning_rate": 0.000425556215450683, | |
| "loss": 0.9923, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 0.04775725446116508, | |
| "grad_norm": 0.29569676518440247, | |
| "learning_rate": 0.00042532779021426287, | |
| "loss": 0.9913, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 0.047869098148427755, | |
| "grad_norm": 0.28223690390586853, | |
| "learning_rate": 0.0004250993649778427, | |
| "loss": 0.9817, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 0.04798094183569044, | |
| "grad_norm": 0.271419882774353, | |
| "learning_rate": 0.00042487093974142263, | |
| "loss": 0.9977, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 0.04809278552295312, | |
| "grad_norm": 0.26362791657447815, | |
| "learning_rate": 0.00042464251450500254, | |
| "loss": 0.9859, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.0482046292102158, | |
| "grad_norm": 0.31365934014320374, | |
| "learning_rate": 0.0004244140892685824, | |
| "loss": 0.9862, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 0.04831647289747849, | |
| "grad_norm": 0.26915237307548523, | |
| "learning_rate": 0.0004241856640321623, | |
| "loss": 0.9693, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 0.048428316584741164, | |
| "grad_norm": 0.2639203369617462, | |
| "learning_rate": 0.00042395723879574217, | |
| "loss": 0.9691, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 0.04854016027200385, | |
| "grad_norm": 0.30106601119041443, | |
| "learning_rate": 0.000423728813559322, | |
| "loss": 0.9521, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 0.04865200395926653, | |
| "grad_norm": 0.2807524800300598, | |
| "learning_rate": 0.00042350038832290193, | |
| "loss": 0.9616, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.04876384764652921, | |
| "grad_norm": 0.27363407611846924, | |
| "learning_rate": 0.0004232719630864818, | |
| "loss": 0.9538, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 0.048875691333791896, | |
| "grad_norm": 0.29041701555252075, | |
| "learning_rate": 0.00042304353785006164, | |
| "loss": 0.9455, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 0.048987535021054573, | |
| "grad_norm": 0.28237226605415344, | |
| "learning_rate": 0.0004228151126136416, | |
| "loss": 0.9615, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 0.04909937870831726, | |
| "grad_norm": 0.30885329842567444, | |
| "learning_rate": 0.00042258668737722146, | |
| "loss": 0.9691, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 0.049211222395579936, | |
| "grad_norm": 0.2734643220901489, | |
| "learning_rate": 0.0004223582621408013, | |
| "loss": 0.9663, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.04932306608284262, | |
| "grad_norm": 0.2652278244495392, | |
| "learning_rate": 0.00042212983690438123, | |
| "loss": 0.9439, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 0.0494349097701053, | |
| "grad_norm": 0.27749761939048767, | |
| "learning_rate": 0.0004219014116679611, | |
| "loss": 0.9623, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 0.04954675345736798, | |
| "grad_norm": 0.2812553942203522, | |
| "learning_rate": 0.00042167298643154094, | |
| "loss": 0.9557, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 0.04965859714463067, | |
| "grad_norm": 0.2762252688407898, | |
| "learning_rate": 0.00042144456119512085, | |
| "loss": 0.945, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 0.049770440831893345, | |
| "grad_norm": 0.277118980884552, | |
| "learning_rate": 0.0004212161359587007, | |
| "loss": 0.93, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.04988228451915603, | |
| "grad_norm": 0.2723037004470825, | |
| "learning_rate": 0.0004209877107222806, | |
| "loss": 0.963, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 0.04999412820641871, | |
| "grad_norm": 0.29789137840270996, | |
| "learning_rate": 0.0004207592854858605, | |
| "loss": 0.954, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 0.05010597189368139, | |
| "grad_norm": 0.26940014958381653, | |
| "learning_rate": 0.0004205308602494404, | |
| "loss": 0.9443, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 0.05021781558094407, | |
| "grad_norm": 0.263300359249115, | |
| "learning_rate": 0.00042030243501302024, | |
| "loss": 0.9403, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 0.050329659268206754, | |
| "grad_norm": 0.27823972702026367, | |
| "learning_rate": 0.00042007400977660015, | |
| "loss": 0.95, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.05044150295546944, | |
| "grad_norm": 0.2782444357872009, | |
| "learning_rate": 0.00041984558454018, | |
| "loss": 0.953, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 0.05055334664273212, | |
| "grad_norm": 0.277182936668396, | |
| "learning_rate": 0.00041961715930375986, | |
| "loss": 0.9498, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 0.0506651903299948, | |
| "grad_norm": 0.2942575514316559, | |
| "learning_rate": 0.00041938873406733977, | |
| "loss": 0.957, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 0.05077703401725748, | |
| "grad_norm": 0.3258327543735504, | |
| "learning_rate": 0.0004191603088309197, | |
| "loss": 0.9626, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 0.05088887770452016, | |
| "grad_norm": 0.27874353528022766, | |
| "learning_rate": 0.00041893188359449953, | |
| "loss": 0.971, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.05100072139178284, | |
| "grad_norm": 0.2981313169002533, | |
| "learning_rate": 0.00041870345835807944, | |
| "loss": 0.965, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 0.051112565079045526, | |
| "grad_norm": 0.30568984150886536, | |
| "learning_rate": 0.0004184750331216593, | |
| "loss": 0.9566, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 0.05122440876630821, | |
| "grad_norm": 0.27867600321769714, | |
| "learning_rate": 0.00041824660788523915, | |
| "loss": 0.94, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 0.05133625245357089, | |
| "grad_norm": 0.30877605080604553, | |
| "learning_rate": 0.000418018182648819, | |
| "loss": 0.9453, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 0.05144809614083357, | |
| "grad_norm": 0.3018844425678253, | |
| "learning_rate": 0.0004177897574123989, | |
| "loss": 0.9511, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.05155993982809625, | |
| "grad_norm": 0.27943944931030273, | |
| "learning_rate": 0.0004175613321759788, | |
| "loss": 0.9371, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 0.051671783515358935, | |
| "grad_norm": 0.2654775381088257, | |
| "learning_rate": 0.0004173329069395587, | |
| "loss": 0.9366, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 0.05178362720262161, | |
| "grad_norm": 0.27594050765037537, | |
| "learning_rate": 0.0004171044817031386, | |
| "loss": 0.9229, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 0.0518954708898843, | |
| "grad_norm": 0.26856914162635803, | |
| "learning_rate": 0.00041687605646671845, | |
| "loss": 0.9357, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 0.05200731457714698, | |
| "grad_norm": 0.2956237494945526, | |
| "learning_rate": 0.0004166476312302983, | |
| "loss": 0.9023, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.05211915826440966, | |
| "grad_norm": 0.30004164576530457, | |
| "learning_rate": 0.0004164192059938782, | |
| "loss": 0.9273, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 0.052231001951672344, | |
| "grad_norm": 0.2691096365451813, | |
| "learning_rate": 0.0004161907807574581, | |
| "loss": 0.9332, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 0.05234284563893502, | |
| "grad_norm": 0.2551780641078949, | |
| "learning_rate": 0.00041596235552103793, | |
| "loss": 0.9327, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 0.052454689326197707, | |
| "grad_norm": 0.2806546092033386, | |
| "learning_rate": 0.0004157339302846179, | |
| "loss": 0.9355, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 0.05256653301346039, | |
| "grad_norm": 0.27648645639419556, | |
| "learning_rate": 0.00041550550504819775, | |
| "loss": 0.9348, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.05267837670072307, | |
| "grad_norm": 0.2816336750984192, | |
| "learning_rate": 0.0004152770798117776, | |
| "loss": 0.9294, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 0.05279022038798575, | |
| "grad_norm": 0.29570698738098145, | |
| "learning_rate": 0.0004150486545753575, | |
| "loss": 0.9317, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 0.05290206407524843, | |
| "grad_norm": 0.26981687545776367, | |
| "learning_rate": 0.00041482022933893737, | |
| "loss": 0.9317, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 0.053013907762511116, | |
| "grad_norm": 0.2586159110069275, | |
| "learning_rate": 0.0004145918041025172, | |
| "loss": 0.9162, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 0.05312575144977379, | |
| "grad_norm": 0.24129503965377808, | |
| "learning_rate": 0.00041436337886609714, | |
| "loss": 0.934, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.05323759513703648, | |
| "grad_norm": 0.28072717785835266, | |
| "learning_rate": 0.000414134953629677, | |
| "loss": 0.9089, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 0.05334943882429916, | |
| "grad_norm": 0.2760024964809418, | |
| "learning_rate": 0.0004139065283932569, | |
| "loss": 0.9115, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 0.05346128251156184, | |
| "grad_norm": 0.28894710540771484, | |
| "learning_rate": 0.0004136781031568368, | |
| "loss": 0.9108, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 0.053573126198824525, | |
| "grad_norm": 0.27882319688796997, | |
| "learning_rate": 0.00041344967792041667, | |
| "loss": 0.9184, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 0.0536849698860872, | |
| "grad_norm": 0.27242934703826904, | |
| "learning_rate": 0.0004132212526839965, | |
| "loss": 0.9498, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.05379681357334989, | |
| "grad_norm": 0.2809596359729767, | |
| "learning_rate": 0.00041299282744757643, | |
| "loss": 0.9365, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 0.053908657260612565, | |
| "grad_norm": 0.3026556074619293, | |
| "learning_rate": 0.0004127644022111563, | |
| "loss": 0.9433, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 0.05402050094787525, | |
| "grad_norm": 0.2933846116065979, | |
| "learning_rate": 0.00041253597697473614, | |
| "loss": 0.9351, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 0.054132344635137934, | |
| "grad_norm": 0.2774868309497833, | |
| "learning_rate": 0.00041230755173831605, | |
| "loss": 0.9285, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 0.05424418832240061, | |
| "grad_norm": 0.2859903573989868, | |
| "learning_rate": 0.00041207912650189596, | |
| "loss": 0.9344, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.054356032009663297, | |
| "grad_norm": 0.26687270402908325, | |
| "learning_rate": 0.0004118507012654758, | |
| "loss": 0.9281, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 0.054467875696925974, | |
| "grad_norm": 0.31075340509414673, | |
| "learning_rate": 0.00041162227602905573, | |
| "loss": 0.9418, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 0.05457971938418866, | |
| "grad_norm": 0.2569184899330139, | |
| "learning_rate": 0.0004113938507926356, | |
| "loss": 0.9394, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 0.054691563071451336, | |
| "grad_norm": 0.26250478625297546, | |
| "learning_rate": 0.00041116542555621544, | |
| "loss": 0.9499, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 0.05480340675871402, | |
| "grad_norm": 0.27604004740715027, | |
| "learning_rate": 0.00041093700031979535, | |
| "loss": 0.9268, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.054915250445976706, | |
| "grad_norm": 0.26279163360595703, | |
| "learning_rate": 0.0004107085750833752, | |
| "loss": 0.9313, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 0.05502709413323938, | |
| "grad_norm": 0.29265978932380676, | |
| "learning_rate": 0.00041048014984695506, | |
| "loss": 0.9498, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 0.05513893782050207, | |
| "grad_norm": 0.32107868790626526, | |
| "learning_rate": 0.000410251724610535, | |
| "loss": 0.9708, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 0.055250781507764746, | |
| "grad_norm": 0.32804161310195923, | |
| "learning_rate": 0.0004100232993741149, | |
| "loss": 0.9624, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 0.05536262519502743, | |
| "grad_norm": 0.3207037150859833, | |
| "learning_rate": 0.00040979487413769474, | |
| "loss": 0.9538, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.05547446888229011, | |
| "grad_norm": 0.29660555720329285, | |
| "learning_rate": 0.00040956644890127465, | |
| "loss": 0.9677, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 0.05558631256955279, | |
| "grad_norm": 0.34930771589279175, | |
| "learning_rate": 0.0004093380236648545, | |
| "loss": 0.9777, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 0.05569815625681548, | |
| "grad_norm": 0.3037464916706085, | |
| "learning_rate": 0.00040910959842843436, | |
| "loss": 0.9826, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 0.055809999944078155, | |
| "grad_norm": 0.31435292959213257, | |
| "learning_rate": 0.00040888117319201427, | |
| "loss": 0.9677, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 0.05592184363134084, | |
| "grad_norm": 0.29182785749435425, | |
| "learning_rate": 0.0004086527479555941, | |
| "loss": 0.9563, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.05603368731860352, | |
| "grad_norm": 0.34796231985092163, | |
| "learning_rate": 0.00040842432271917403, | |
| "loss": 0.957, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 0.0561455310058662, | |
| "grad_norm": 0.3027050495147705, | |
| "learning_rate": 0.00040819589748275394, | |
| "loss": 0.967, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 0.056257374693128887, | |
| "grad_norm": 0.3419332802295685, | |
| "learning_rate": 0.0004079674722463338, | |
| "loss": 0.9654, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 0.056369218380391564, | |
| "grad_norm": 0.29381224513053894, | |
| "learning_rate": 0.00040773904700991366, | |
| "loss": 0.9647, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 0.05648106206765425, | |
| "grad_norm": 0.29206860065460205, | |
| "learning_rate": 0.0004075106217734935, | |
| "loss": 0.9637, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 0.056592905754916926, | |
| "grad_norm": 0.3169795274734497, | |
| "learning_rate": 0.0004072821965370734, | |
| "loss": 0.963, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 0.05670474944217961, | |
| "grad_norm": 0.30713772773742676, | |
| "learning_rate": 0.0004070537713006533, | |
| "loss": 0.9766, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 0.05681659312944229, | |
| "grad_norm": 0.29805994033813477, | |
| "learning_rate": 0.00040682534606423313, | |
| "loss": 0.9597, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 0.05692843681670497, | |
| "grad_norm": 0.33419644832611084, | |
| "learning_rate": 0.0004065969208278131, | |
| "loss": 0.9598, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 0.05704028050396766, | |
| "grad_norm": 0.31769025325775146, | |
| "learning_rate": 0.00040636849559139295, | |
| "loss": 0.942, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.057152124191230336, | |
| "grad_norm": 0.3017726242542267, | |
| "learning_rate": 0.0004061400703549728, | |
| "loss": 0.9627, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 0.05726396787849302, | |
| "grad_norm": 0.32213470339775085, | |
| "learning_rate": 0.0004059116451185527, | |
| "loss": 0.9518, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 0.0573758115657557, | |
| "grad_norm": 0.29069948196411133, | |
| "learning_rate": 0.0004056832198821326, | |
| "loss": 0.9337, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 0.05748765525301838, | |
| "grad_norm": 0.32283100485801697, | |
| "learning_rate": 0.00040545479464571243, | |
| "loss": 0.959, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 0.05759949894028106, | |
| "grad_norm": 0.3191847801208496, | |
| "learning_rate": 0.00040522636940929234, | |
| "loss": 0.9439, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 0.057711342627543745, | |
| "grad_norm": 0.565864622592926, | |
| "learning_rate": 0.00040499794417287225, | |
| "loss": 0.9587, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 0.05782318631480643, | |
| "grad_norm": 0.3419003188610077, | |
| "learning_rate": 0.0004047695189364521, | |
| "loss": 0.9466, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 0.05793503000206911, | |
| "grad_norm": 0.28331097960472107, | |
| "learning_rate": 0.000404541093700032, | |
| "loss": 0.9472, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 0.05804687368933179, | |
| "grad_norm": 0.2994554042816162, | |
| "learning_rate": 0.00040431266846361187, | |
| "loss": 0.9434, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 0.05815871737659447, | |
| "grad_norm": 0.30070000886917114, | |
| "learning_rate": 0.0004040842432271917, | |
| "loss": 0.9408, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.058270561063857154, | |
| "grad_norm": 0.29924333095550537, | |
| "learning_rate": 0.00040385581799077164, | |
| "loss": 0.9484, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 0.05838240475111983, | |
| "grad_norm": 0.2905283272266388, | |
| "learning_rate": 0.0004036273927543515, | |
| "loss": 0.9636, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 0.058494248438382516, | |
| "grad_norm": 0.3290540874004364, | |
| "learning_rate": 0.00040339896751793135, | |
| "loss": 0.9396, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 0.0586060921256452, | |
| "grad_norm": 0.29686272144317627, | |
| "learning_rate": 0.0004031705422815113, | |
| "loss": 0.9408, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 0.05871793581290788, | |
| "grad_norm": 0.2768057882785797, | |
| "learning_rate": 0.00040294211704509117, | |
| "loss": 0.9328, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.05882977950017056, | |
| "grad_norm": 0.2614899277687073, | |
| "learning_rate": 0.000402713691808671, | |
| "loss": 0.9483, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 0.05894162318743324, | |
| "grad_norm": 0.2692766487598419, | |
| "learning_rate": 0.00040248526657225093, | |
| "loss": 0.9479, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 0.059053466874695926, | |
| "grad_norm": 0.3009514808654785, | |
| "learning_rate": 0.0004022568413358308, | |
| "loss": 0.9681, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 0.0591653105619586, | |
| "grad_norm": 0.27767086029052734, | |
| "learning_rate": 0.00040202841609941064, | |
| "loss": 0.9685, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 0.05927715424922129, | |
| "grad_norm": 0.2956901788711548, | |
| "learning_rate": 0.00040179999086299055, | |
| "loss": 0.9609, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.05938899793648397, | |
| "grad_norm": 0.3046570420265198, | |
| "learning_rate": 0.0004015715656265704, | |
| "loss": 0.961, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 0.05950084162374665, | |
| "grad_norm": 0.24477365612983704, | |
| "learning_rate": 0.0004013431403901503, | |
| "loss": 0.9501, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 0.059612685311009335, | |
| "grad_norm": 0.25505194067955017, | |
| "learning_rate": 0.00040111471515373023, | |
| "loss": 0.946, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 0.05972452899827201, | |
| "grad_norm": 0.26015251874923706, | |
| "learning_rate": 0.0004008862899173101, | |
| "loss": 0.9372, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 0.0598363726855347, | |
| "grad_norm": 0.24911250174045563, | |
| "learning_rate": 0.00040065786468088994, | |
| "loss": 0.9487, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 0.059948216372797375, | |
| "grad_norm": 0.2779735028743744, | |
| "learning_rate": 0.00040042943944446985, | |
| "loss": 0.9316, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 0.06006006006006006, | |
| "grad_norm": 0.30663251876831055, | |
| "learning_rate": 0.0004002010142080497, | |
| "loss": 0.9461, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 0.060171903747322744, | |
| "grad_norm": 0.2724740505218506, | |
| "learning_rate": 0.00039997258897162956, | |
| "loss": 0.9214, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 0.06028374743458542, | |
| "grad_norm": 0.26819276809692383, | |
| "learning_rate": 0.00039974416373520947, | |
| "loss": 0.9368, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 0.060395591121848106, | |
| "grad_norm": 0.26342320442199707, | |
| "learning_rate": 0.0003995157384987894, | |
| "loss": 0.9332, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.060507434809110784, | |
| "grad_norm": 0.32590556144714355, | |
| "learning_rate": 0.00039928731326236924, | |
| "loss": 0.9286, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 0.06061927849637347, | |
| "grad_norm": 0.2747272849082947, | |
| "learning_rate": 0.00039905888802594915, | |
| "loss": 0.932, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 0.06073112218363615, | |
| "grad_norm": 0.23089702427387238, | |
| "learning_rate": 0.000398830462789529, | |
| "loss": 0.9216, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 0.06084296587089883, | |
| "grad_norm": 0.24383346736431122, | |
| "learning_rate": 0.00039860203755310886, | |
| "loss": 0.9333, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 0.060954809558161516, | |
| "grad_norm": 0.23999489843845367, | |
| "learning_rate": 0.00039837361231668877, | |
| "loss": 0.9134, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 0.06106665324542419, | |
| "grad_norm": 0.3041435480117798, | |
| "learning_rate": 0.0003981451870802686, | |
| "loss": 0.9226, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 0.06117849693268688, | |
| "grad_norm": 0.2667579650878906, | |
| "learning_rate": 0.0003979167618438485, | |
| "loss": 0.9148, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 0.061290340619949556, | |
| "grad_norm": 0.2730364203453064, | |
| "learning_rate": 0.0003976883366074284, | |
| "loss": 0.9073, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 0.06140218430721224, | |
| "grad_norm": 0.28175118565559387, | |
| "learning_rate": 0.0003974599113710083, | |
| "loss": 0.9097, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 0.061514027994474925, | |
| "grad_norm": 0.2826266288757324, | |
| "learning_rate": 0.00039723148613458816, | |
| "loss": 0.8972, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.0616258716817376, | |
| "grad_norm": 0.25821810960769653, | |
| "learning_rate": 0.000397003060898168, | |
| "loss": 0.8898, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 0.06173771536900029, | |
| "grad_norm": 0.31401073932647705, | |
| "learning_rate": 0.0003967746356617479, | |
| "loss": 0.8986, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 0.061849559056262965, | |
| "grad_norm": 0.2664715349674225, | |
| "learning_rate": 0.0003965462104253278, | |
| "loss": 0.9178, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 0.06196140274352565, | |
| "grad_norm": 0.2725924253463745, | |
| "learning_rate": 0.00039631778518890763, | |
| "loss": 0.8941, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 0.06207324643078833, | |
| "grad_norm": 0.2991993725299835, | |
| "learning_rate": 0.0003960893599524876, | |
| "loss": 0.899, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 0.06218509011805101, | |
| "grad_norm": 0.2683865427970886, | |
| "learning_rate": 0.00039586093471606745, | |
| "loss": 0.9105, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 0.062296933805313696, | |
| "grad_norm": 0.29127469658851624, | |
| "learning_rate": 0.0003956325094796473, | |
| "loss": 0.9091, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 0.062408777492576374, | |
| "grad_norm": 0.28191229701042175, | |
| "learning_rate": 0.0003954040842432272, | |
| "loss": 0.9078, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 0.06252062117983906, | |
| "grad_norm": 0.28319644927978516, | |
| "learning_rate": 0.0003951756590068071, | |
| "loss": 0.9134, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 0.06263246486710174, | |
| "grad_norm": 0.2563108205795288, | |
| "learning_rate": 0.00039494723377038693, | |
| "loss": 0.9166, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.06274430855436441, | |
| "grad_norm": 0.29730817675590515, | |
| "learning_rate": 0.00039471880853396684, | |
| "loss": 0.9101, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 0.0628561522416271, | |
| "grad_norm": 0.25925830006599426, | |
| "learning_rate": 0.0003944903832975467, | |
| "loss": 0.9131, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 0.06296799592888978, | |
| "grad_norm": 0.2645208537578583, | |
| "learning_rate": 0.0003942619580611266, | |
| "loss": 0.9203, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 0.06307983961615246, | |
| "grad_norm": 0.2844574749469757, | |
| "learning_rate": 0.0003940335328247065, | |
| "loss": 0.914, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 0.06319168330341515, | |
| "grad_norm": 0.2687402367591858, | |
| "learning_rate": 0.00039380510758828637, | |
| "loss": 0.9095, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 0.06330352699067783, | |
| "grad_norm": 0.22893477976322174, | |
| "learning_rate": 0.00039357668235186623, | |
| "loss": 0.8993, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 0.06341537067794051, | |
| "grad_norm": 0.27271768450737, | |
| "learning_rate": 0.00039334825711544614, | |
| "loss": 0.8989, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 0.06352721436520319, | |
| "grad_norm": 0.27709853649139404, | |
| "learning_rate": 0.000393119831879026, | |
| "loss": 0.8998, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 0.06363905805246588, | |
| "grad_norm": 0.24321520328521729, | |
| "learning_rate": 0.00039289140664260585, | |
| "loss": 0.887, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 0.06375090173972855, | |
| "grad_norm": 0.26779887080192566, | |
| "learning_rate": 0.00039266298140618576, | |
| "loss": 0.9091, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.06386274542699123, | |
| "grad_norm": 0.2612350881099701, | |
| "learning_rate": 0.00039243455616976567, | |
| "loss": 0.9043, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 0.06397458911425392, | |
| "grad_norm": 0.26247987151145935, | |
| "learning_rate": 0.0003922061309333455, | |
| "loss": 0.9024, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 0.0640864328015166, | |
| "grad_norm": 0.2605653703212738, | |
| "learning_rate": 0.00039197770569692543, | |
| "loss": 0.9311, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 0.06419827648877928, | |
| "grad_norm": 0.28249841928482056, | |
| "learning_rate": 0.0003917492804605053, | |
| "loss": 0.9265, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 0.06431012017604196, | |
| "grad_norm": 0.2880108654499054, | |
| "learning_rate": 0.00039152085522408515, | |
| "loss": 0.9331, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 0.06442196386330465, | |
| "grad_norm": 0.31626009941101074, | |
| "learning_rate": 0.00039129242998766506, | |
| "loss": 0.9483, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 0.06453380755056733, | |
| "grad_norm": 0.28972744941711426, | |
| "learning_rate": 0.0003910640047512449, | |
| "loss": 0.9239, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 0.06464565123783, | |
| "grad_norm": 0.27140864729881287, | |
| "learning_rate": 0.00039083557951482477, | |
| "loss": 0.9259, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 0.0647574949250927, | |
| "grad_norm": 0.26331818103790283, | |
| "learning_rate": 0.00039060715427840473, | |
| "loss": 0.9383, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 0.06486933861235537, | |
| "grad_norm": 0.26927000284194946, | |
| "learning_rate": 0.0003903787290419846, | |
| "loss": 0.9236, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.06498118229961805, | |
| "grad_norm": 0.2833601236343384, | |
| "learning_rate": 0.00039015030380556444, | |
| "loss": 0.9257, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 0.06509302598688073, | |
| "grad_norm": 0.2970174551010132, | |
| "learning_rate": 0.00038992187856914435, | |
| "loss": 0.9164, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 0.06520486967414342, | |
| "grad_norm": 0.27904263138771057, | |
| "learning_rate": 0.0003896934533327242, | |
| "loss": 0.9045, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 0.0653167133614061, | |
| "grad_norm": 0.24879537522792816, | |
| "learning_rate": 0.00038946502809630406, | |
| "loss": 0.9, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 0.06542855704866878, | |
| "grad_norm": 0.2897798717021942, | |
| "learning_rate": 0.000389236602859884, | |
| "loss": 0.919, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 0.06554040073593147, | |
| "grad_norm": 0.26522865891456604, | |
| "learning_rate": 0.00038900817762346383, | |
| "loss": 0.9168, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 0.06565224442319414, | |
| "grad_norm": 0.26405441761016846, | |
| "learning_rate": 0.00038877975238704374, | |
| "loss": 0.9169, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 0.06576408811045682, | |
| "grad_norm": 0.2543514370918274, | |
| "learning_rate": 0.00038855132715062365, | |
| "loss": 0.917, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 0.06587593179771951, | |
| "grad_norm": 0.2683538794517517, | |
| "learning_rate": 0.0003883229019142035, | |
| "loss": 0.9179, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 0.06598777548498219, | |
| "grad_norm": 0.24559274315834045, | |
| "learning_rate": 0.00038809447667778336, | |
| "loss": 0.907, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.06609961917224487, | |
| "grad_norm": 0.2604455351829529, | |
| "learning_rate": 0.00038786605144136327, | |
| "loss": 0.9172, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 0.06621146285950755, | |
| "grad_norm": 0.24329319596290588, | |
| "learning_rate": 0.0003876376262049431, | |
| "loss": 0.9171, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 0.06632330654677024, | |
| "grad_norm": 0.237509623169899, | |
| "learning_rate": 0.000387409200968523, | |
| "loss": 0.9272, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 0.06643515023403292, | |
| "grad_norm": 0.2569025754928589, | |
| "learning_rate": 0.00038718077573210284, | |
| "loss": 0.9327, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 0.0665469939212956, | |
| "grad_norm": 0.2908497750759125, | |
| "learning_rate": 0.0003869523504956828, | |
| "loss": 0.9299, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 0.06665883760855829, | |
| "grad_norm": 0.24669544398784637, | |
| "learning_rate": 0.00038672392525926266, | |
| "loss": 0.9036, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 0.06677068129582096, | |
| "grad_norm": 0.23906981945037842, | |
| "learning_rate": 0.0003864955000228425, | |
| "loss": 0.9266, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 0.06688252498308364, | |
| "grad_norm": 0.2822079658508301, | |
| "learning_rate": 0.0003862670747864224, | |
| "loss": 0.9209, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 0.06699436867034632, | |
| "grad_norm": 0.27469775080680847, | |
| "learning_rate": 0.0003860386495500023, | |
| "loss": 0.9385, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 0.06710621235760901, | |
| "grad_norm": 0.24559862911701202, | |
| "learning_rate": 0.00038581022431358213, | |
| "loss": 0.9248, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.06721805604487169, | |
| "grad_norm": 0.24427008628845215, | |
| "learning_rate": 0.00038558179907716204, | |
| "loss": 0.9358, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 0.06732989973213437, | |
| "grad_norm": 0.2626965641975403, | |
| "learning_rate": 0.00038535337384074195, | |
| "loss": 0.9211, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 0.06744174341939706, | |
| "grad_norm": 0.226990208029747, | |
| "learning_rate": 0.0003851249486043218, | |
| "loss": 0.9292, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 0.06755358710665973, | |
| "grad_norm": 0.2762834131717682, | |
| "learning_rate": 0.0003848965233679017, | |
| "loss": 0.932, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 0.06766543079392241, | |
| "grad_norm": 0.2799958884716034, | |
| "learning_rate": 0.0003846680981314816, | |
| "loss": 0.943, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 0.06777727448118509, | |
| "grad_norm": 0.26224029064178467, | |
| "learning_rate": 0.00038443967289506143, | |
| "loss": 0.9236, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 0.06788911816844778, | |
| "grad_norm": 0.2897866368293762, | |
| "learning_rate": 0.00038421124765864134, | |
| "loss": 0.95, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 0.06800096185571046, | |
| "grad_norm": 0.2899113893508911, | |
| "learning_rate": 0.0003839828224222212, | |
| "loss": 0.9403, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 0.06811280554297314, | |
| "grad_norm": 0.27765581011772156, | |
| "learning_rate": 0.00038375439718580105, | |
| "loss": 0.9447, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 0.06822464923023583, | |
| "grad_norm": 0.27683207392692566, | |
| "learning_rate": 0.000383525971949381, | |
| "loss": 0.949, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.0683364929174985, | |
| "grad_norm": 0.2815559506416321, | |
| "learning_rate": 0.00038329754671296087, | |
| "loss": 0.9627, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 0.06844833660476118, | |
| "grad_norm": 0.2741657793521881, | |
| "learning_rate": 0.00038306912147654073, | |
| "loss": 0.9659, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 0.06856018029202386, | |
| "grad_norm": 0.4103181064128876, | |
| "learning_rate": 0.00038284069624012064, | |
| "loss": 0.9612, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 0.06867202397928655, | |
| "grad_norm": 0.2862701416015625, | |
| "learning_rate": 0.0003826122710037005, | |
| "loss": 0.9393, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 0.06878386766654923, | |
| "grad_norm": 0.2789844274520874, | |
| "learning_rate": 0.00038238384576728035, | |
| "loss": 0.9447, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 0.06889571135381191, | |
| "grad_norm": 0.590391993522644, | |
| "learning_rate": 0.00038215542053086026, | |
| "loss": 0.9525, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 0.0690075550410746, | |
| "grad_norm": 0.2721211016178131, | |
| "learning_rate": 0.0003819269952944401, | |
| "loss": 0.9467, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 0.06911939872833728, | |
| "grad_norm": 0.27576929330825806, | |
| "learning_rate": 0.00038169857005802, | |
| "loss": 0.9428, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 0.06923124241559996, | |
| "grad_norm": 0.28229111433029175, | |
| "learning_rate": 0.00038147014482159993, | |
| "loss": 0.9418, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 0.06934308610286263, | |
| "grad_norm": 0.29595518112182617, | |
| "learning_rate": 0.0003812417195851798, | |
| "loss": 0.9178, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.06945492979012532, | |
| "grad_norm": 0.3055596351623535, | |
| "learning_rate": 0.00038101329434875965, | |
| "loss": 0.9464, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 0.069566773477388, | |
| "grad_norm": 0.29212549328804016, | |
| "learning_rate": 0.00038078486911233956, | |
| "loss": 0.9491, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 0.06967861716465068, | |
| "grad_norm": 0.288928359746933, | |
| "learning_rate": 0.0003805564438759194, | |
| "loss": 0.9285, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 0.06979046085191337, | |
| "grad_norm": 0.2759207487106323, | |
| "learning_rate": 0.00038032801863949927, | |
| "loss": 0.9336, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 0.06990230453917605, | |
| "grad_norm": 0.31041648983955383, | |
| "learning_rate": 0.0003800995934030792, | |
| "loss": 0.9317, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 0.07001414822643873, | |
| "grad_norm": 0.29425299167633057, | |
| "learning_rate": 0.0003798711681666591, | |
| "loss": 0.9212, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 0.0701259919137014, | |
| "grad_norm": 0.278062105178833, | |
| "learning_rate": 0.00037964274293023894, | |
| "loss": 0.9291, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 0.0702378356009641, | |
| "grad_norm": 0.2983698546886444, | |
| "learning_rate": 0.00037941431769381885, | |
| "loss": 0.9169, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 0.07034967928822677, | |
| "grad_norm": 0.29595527052879333, | |
| "learning_rate": 0.0003791858924573987, | |
| "loss": 0.9286, | |
| "step": 6290 | |
| }, | |
| { | |
| "epoch": 0.07046152297548945, | |
| "grad_norm": 0.26365020871162415, | |
| "learning_rate": 0.00037895746722097856, | |
| "loss": 0.9312, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.07057336666275214, | |
| "grad_norm": 0.27807778120040894, | |
| "learning_rate": 0.0003787290419845585, | |
| "loss": 0.9274, | |
| "step": 6310 | |
| }, | |
| { | |
| "epoch": 0.07068521035001482, | |
| "grad_norm": 0.2585415840148926, | |
| "learning_rate": 0.00037850061674813833, | |
| "loss": 0.9513, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 0.0707970540372775, | |
| "grad_norm": 0.2740543484687805, | |
| "learning_rate": 0.0003782721915117182, | |
| "loss": 0.922, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 0.07090889772454018, | |
| "grad_norm": 0.28271788358688354, | |
| "learning_rate": 0.00037804376627529815, | |
| "loss": 0.94, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 0.07102074141180287, | |
| "grad_norm": 0.28767603635787964, | |
| "learning_rate": 0.000377815341038878, | |
| "loss": 0.9295, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 0.07113258509906555, | |
| "grad_norm": 0.25200092792510986, | |
| "learning_rate": 0.00037758691580245786, | |
| "loss": 0.9219, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 0.07124442878632822, | |
| "grad_norm": 0.27449852228164673, | |
| "learning_rate": 0.00037735849056603777, | |
| "loss": 0.9227, | |
| "step": 6370 | |
| }, | |
| { | |
| "epoch": 0.07135627247359091, | |
| "grad_norm": 0.27951040863990784, | |
| "learning_rate": 0.0003771300653296176, | |
| "loss": 0.9256, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 0.07146811616085359, | |
| "grad_norm": 0.27883175015449524, | |
| "learning_rate": 0.0003769016400931975, | |
| "loss": 0.9244, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 0.07157995984811627, | |
| "grad_norm": 0.27942216396331787, | |
| "learning_rate": 0.00037667321485677734, | |
| "loss": 0.9287, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.07169180353537895, | |
| "grad_norm": 0.2605076730251312, | |
| "learning_rate": 0.00037644478962035725, | |
| "loss": 0.9213, | |
| "step": 6410 | |
| }, | |
| { | |
| "epoch": 0.07180364722264164, | |
| "grad_norm": 0.25812190771102905, | |
| "learning_rate": 0.00037621636438393716, | |
| "loss": 0.9268, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 0.07191549090990432, | |
| "grad_norm": 0.27478551864624023, | |
| "learning_rate": 0.000375987939147517, | |
| "loss": 0.9341, | |
| "step": 6430 | |
| }, | |
| { | |
| "epoch": 0.072027334597167, | |
| "grad_norm": 0.2799810469150543, | |
| "learning_rate": 0.0003757595139110969, | |
| "loss": 0.9308, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 0.07213917828442969, | |
| "grad_norm": 0.2494313269853592, | |
| "learning_rate": 0.0003755310886746768, | |
| "loss": 0.9389, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 0.07225102197169236, | |
| "grad_norm": 0.3362772762775421, | |
| "learning_rate": 0.00037530266343825664, | |
| "loss": 0.9362, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 0.07236286565895504, | |
| "grad_norm": 0.28501999378204346, | |
| "learning_rate": 0.00037507423820183655, | |
| "loss": 0.9262, | |
| "step": 6470 | |
| }, | |
| { | |
| "epoch": 0.07247470934621772, | |
| "grad_norm": 0.24787545204162598, | |
| "learning_rate": 0.0003748458129654164, | |
| "loss": 0.9409, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 0.07258655303348041, | |
| "grad_norm": 0.277665913105011, | |
| "learning_rate": 0.0003746173877289963, | |
| "loss": 0.9244, | |
| "step": 6490 | |
| }, | |
| { | |
| "epoch": 0.07269839672074309, | |
| "grad_norm": 0.2613317370414734, | |
| "learning_rate": 0.0003743889624925762, | |
| "loss": 0.9429, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.07281024040800577, | |
| "grad_norm": 0.2740306258201599, | |
| "learning_rate": 0.0003741605372561561, | |
| "loss": 0.9422, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 0.07292208409526846, | |
| "grad_norm": 0.3052440881729126, | |
| "learning_rate": 0.00037393211201973593, | |
| "loss": 0.9346, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 0.07303392778253114, | |
| "grad_norm": 0.27979132533073425, | |
| "learning_rate": 0.00037370368678331584, | |
| "loss": 0.9305, | |
| "step": 6530 | |
| }, | |
| { | |
| "epoch": 0.07314577146979381, | |
| "grad_norm": 0.2834227979183197, | |
| "learning_rate": 0.0003734752615468957, | |
| "loss": 0.9305, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 0.07325761515705649, | |
| "grad_norm": 0.28621387481689453, | |
| "learning_rate": 0.00037324683631047555, | |
| "loss": 0.9505, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 0.07336945884431918, | |
| "grad_norm": 0.2539358139038086, | |
| "learning_rate": 0.00037301841107405546, | |
| "loss": 0.9491, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 0.07348130253158186, | |
| "grad_norm": 0.29257437586784363, | |
| "learning_rate": 0.0003727899858376354, | |
| "loss": 0.9428, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 0.07359314621884454, | |
| "grad_norm": 0.25158485770225525, | |
| "learning_rate": 0.00037256156060121523, | |
| "loss": 0.9471, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 0.07370498990610723, | |
| "grad_norm": 0.26301345229148865, | |
| "learning_rate": 0.00037233313536479514, | |
| "loss": 0.928, | |
| "step": 6590 | |
| }, | |
| { | |
| "epoch": 0.07381683359336991, | |
| "grad_norm": 0.2519192397594452, | |
| "learning_rate": 0.000372104710128375, | |
| "loss": 0.9189, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.07392867728063258, | |
| "grad_norm": 0.29801836609840393, | |
| "learning_rate": 0.00037187628489195485, | |
| "loss": 0.9218, | |
| "step": 6610 | |
| }, | |
| { | |
| "epoch": 0.07404052096789528, | |
| "grad_norm": 0.30779263377189636, | |
| "learning_rate": 0.00037164785965553476, | |
| "loss": 0.9263, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 0.07415236465515795, | |
| "grad_norm": 0.2758638262748718, | |
| "learning_rate": 0.0003714194344191146, | |
| "loss": 0.904, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 0.07426420834242063, | |
| "grad_norm": 0.26482871174812317, | |
| "learning_rate": 0.00037119100918269447, | |
| "loss": 0.9024, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 0.07437605202968331, | |
| "grad_norm": 0.24001047015190125, | |
| "learning_rate": 0.00037096258394627444, | |
| "loss": 0.914, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 0.074487895716946, | |
| "grad_norm": 0.2694549560546875, | |
| "learning_rate": 0.0003707341587098543, | |
| "loss": 0.921, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 0.07459973940420868, | |
| "grad_norm": 0.25042393803596497, | |
| "learning_rate": 0.00037050573347343415, | |
| "loss": 0.9108, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 0.07471158309147136, | |
| "grad_norm": 0.25945019721984863, | |
| "learning_rate": 0.00037027730823701406, | |
| "loss": 0.912, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 0.07482342677873405, | |
| "grad_norm": 0.2624742090702057, | |
| "learning_rate": 0.0003700488830005939, | |
| "loss": 0.9108, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 0.07493527046599673, | |
| "grad_norm": 0.27438145875930786, | |
| "learning_rate": 0.00036982045776417377, | |
| "loss": 0.9215, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.0750471141532594, | |
| "grad_norm": 0.27610865235328674, | |
| "learning_rate": 0.0003695920325277537, | |
| "loss": 0.9053, | |
| "step": 6710 | |
| }, | |
| { | |
| "epoch": 0.07515895784052208, | |
| "grad_norm": 0.2616426944732666, | |
| "learning_rate": 0.00036936360729133353, | |
| "loss": 0.9255, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 0.07527080152778477, | |
| "grad_norm": 0.3146522641181946, | |
| "learning_rate": 0.00036913518205491344, | |
| "loss": 0.9105, | |
| "step": 6730 | |
| }, | |
| { | |
| "epoch": 0.07538264521504745, | |
| "grad_norm": 0.29139819741249084, | |
| "learning_rate": 0.00036890675681849335, | |
| "loss": 0.9324, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 0.07549448890231013, | |
| "grad_norm": 0.3176229000091553, | |
| "learning_rate": 0.0003686783315820732, | |
| "loss": 0.9434, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 0.07560633258957282, | |
| "grad_norm": 0.2786601781845093, | |
| "learning_rate": 0.00036844990634565307, | |
| "loss": 0.9405, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 0.0757181762768355, | |
| "grad_norm": 0.2988050580024719, | |
| "learning_rate": 0.000368221481109233, | |
| "loss": 0.9477, | |
| "step": 6770 | |
| }, | |
| { | |
| "epoch": 0.07583001996409817, | |
| "grad_norm": 0.28120875358581543, | |
| "learning_rate": 0.00036799305587281283, | |
| "loss": 0.9521, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 0.07594186365136085, | |
| "grad_norm": 0.27376359701156616, | |
| "learning_rate": 0.0003677646306363927, | |
| "loss": 0.9405, | |
| "step": 6790 | |
| }, | |
| { | |
| "epoch": 0.07605370733862354, | |
| "grad_norm": 0.2721284329891205, | |
| "learning_rate": 0.0003675362053999726, | |
| "loss": 0.9392, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.07616555102588622, | |
| "grad_norm": 0.31443721055984497, | |
| "learning_rate": 0.0003673077801635525, | |
| "loss": 0.939, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 0.0762773947131489, | |
| "grad_norm": 0.27175766229629517, | |
| "learning_rate": 0.00036707935492713236, | |
| "loss": 0.9262, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 0.07638923840041159, | |
| "grad_norm": 0.2984711527824402, | |
| "learning_rate": 0.00036685092969071227, | |
| "loss": 0.9381, | |
| "step": 6830 | |
| }, | |
| { | |
| "epoch": 0.07650108208767427, | |
| "grad_norm": 0.2773591876029968, | |
| "learning_rate": 0.00036662250445429213, | |
| "loss": 0.9217, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 0.07661292577493695, | |
| "grad_norm": 0.29338565468788147, | |
| "learning_rate": 0.000366394079217872, | |
| "loss": 0.9197, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 0.07672476946219962, | |
| "grad_norm": 0.2456415593624115, | |
| "learning_rate": 0.00036616565398145184, | |
| "loss": 0.9191, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 0.07683661314946232, | |
| "grad_norm": 0.324935644865036, | |
| "learning_rate": 0.00036593722874503175, | |
| "loss": 0.8975, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 0.076948456836725, | |
| "grad_norm": 0.6967706680297852, | |
| "learning_rate": 0.0003657088035086116, | |
| "loss": 0.9053, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 0.07706030052398767, | |
| "grad_norm": 0.8192552328109741, | |
| "learning_rate": 0.0003654803782721915, | |
| "loss": 0.9129, | |
| "step": 6890 | |
| }, | |
| { | |
| "epoch": 0.07717214421125036, | |
| "grad_norm": 0.4698907136917114, | |
| "learning_rate": 0.0003652519530357714, | |
| "loss": 0.9128, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.07728398789851304, | |
| "grad_norm": 0.3055092990398407, | |
| "learning_rate": 0.0003650235277993513, | |
| "loss": 0.9207, | |
| "step": 6910 | |
| }, | |
| { | |
| "epoch": 0.07739583158577572, | |
| "grad_norm": 0.31879591941833496, | |
| "learning_rate": 0.00036479510256293114, | |
| "loss": 0.9101, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 0.0775076752730384, | |
| "grad_norm": 0.2708083987236023, | |
| "learning_rate": 0.00036456667732651105, | |
| "loss": 0.9036, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 0.07761951896030109, | |
| "grad_norm": 0.2801443040370941, | |
| "learning_rate": 0.0003643382520900909, | |
| "loss": 0.9031, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 0.07773136264756376, | |
| "grad_norm": 0.2481400966644287, | |
| "learning_rate": 0.00036410982685367076, | |
| "loss": 0.8952, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 0.07784320633482644, | |
| "grad_norm": 0.25424808263778687, | |
| "learning_rate": 0.0003638814016172507, | |
| "loss": 0.8846, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 0.07795505002208913, | |
| "grad_norm": 0.2655096650123596, | |
| "learning_rate": 0.0003636529763808306, | |
| "loss": 0.8922, | |
| "step": 6970 | |
| }, | |
| { | |
| "epoch": 0.07806689370935181, | |
| "grad_norm": 0.281180202960968, | |
| "learning_rate": 0.00036342455114441043, | |
| "loss": 0.8934, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 0.07817873739661449, | |
| "grad_norm": 0.2850550413131714, | |
| "learning_rate": 0.00036319612590799034, | |
| "loss": 0.8856, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 0.07829058108387717, | |
| "grad_norm": 0.24838604032993317, | |
| "learning_rate": 0.0003629677006715702, | |
| "loss": 0.905, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.07840242477113986, | |
| "grad_norm": 0.2703045606613159, | |
| "learning_rate": 0.00036273927543515005, | |
| "loss": 0.8816, | |
| "step": 7010 | |
| }, | |
| { | |
| "epoch": 0.07851426845840254, | |
| "grad_norm": 0.2786656320095062, | |
| "learning_rate": 0.00036251085019872996, | |
| "loss": 0.8997, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 0.07862611214566521, | |
| "grad_norm": 0.2771463692188263, | |
| "learning_rate": 0.0003622824249623098, | |
| "loss": 0.9033, | |
| "step": 7030 | |
| }, | |
| { | |
| "epoch": 0.0787379558329279, | |
| "grad_norm": 0.2721976339817047, | |
| "learning_rate": 0.00036205399972588973, | |
| "loss": 0.9109, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 0.07884979952019058, | |
| "grad_norm": 0.2606031596660614, | |
| "learning_rate": 0.00036182557448946964, | |
| "loss": 0.9221, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 0.07896164320745326, | |
| "grad_norm": 0.45895281434059143, | |
| "learning_rate": 0.0003615971492530495, | |
| "loss": 0.908, | |
| "step": 7060 | |
| }, | |
| { | |
| "epoch": 0.07907348689471594, | |
| "grad_norm": 0.30524522066116333, | |
| "learning_rate": 0.00036136872401662935, | |
| "loss": 0.9234, | |
| "step": 7070 | |
| }, | |
| { | |
| "epoch": 0.07918533058197863, | |
| "grad_norm": 0.2704319953918457, | |
| "learning_rate": 0.00036114029878020926, | |
| "loss": 0.9003, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 0.07929717426924131, | |
| "grad_norm": 0.2770727872848511, | |
| "learning_rate": 0.0003609118735437891, | |
| "loss": 0.9253, | |
| "step": 7090 | |
| }, | |
| { | |
| "epoch": 0.07940901795650399, | |
| "grad_norm": 0.25288262963294983, | |
| "learning_rate": 0.00036068344830736897, | |
| "loss": 0.9089, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.07952086164376668, | |
| "grad_norm": 0.27105236053466797, | |
| "learning_rate": 0.0003604550230709489, | |
| "loss": 0.9138, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 0.07963270533102935, | |
| "grad_norm": 0.2631518840789795, | |
| "learning_rate": 0.0003602265978345288, | |
| "loss": 0.9226, | |
| "step": 7120 | |
| }, | |
| { | |
| "epoch": 0.07974454901829203, | |
| "grad_norm": 0.25269970297813416, | |
| "learning_rate": 0.00035999817259810865, | |
| "loss": 0.9102, | |
| "step": 7130 | |
| }, | |
| { | |
| "epoch": 0.07985639270555471, | |
| "grad_norm": 0.2576468586921692, | |
| "learning_rate": 0.00035976974736168856, | |
| "loss": 0.9075, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 0.0799682363928174, | |
| "grad_norm": 0.26297688484191895, | |
| "learning_rate": 0.0003595413221252684, | |
| "loss": 0.9004, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 0.08008008008008008, | |
| "grad_norm": 0.3029099702835083, | |
| "learning_rate": 0.00035931289688884827, | |
| "loss": 0.9165, | |
| "step": 7160 | |
| }, | |
| { | |
| "epoch": 0.08019192376734276, | |
| "grad_norm": 0.2699684798717499, | |
| "learning_rate": 0.0003590844716524282, | |
| "loss": 0.9232, | |
| "step": 7170 | |
| }, | |
| { | |
| "epoch": 0.08030376745460545, | |
| "grad_norm": 0.26480093598365784, | |
| "learning_rate": 0.00035885604641600804, | |
| "loss": 0.9319, | |
| "step": 7180 | |
| }, | |
| { | |
| "epoch": 0.08041561114186813, | |
| "grad_norm": 0.27503007650375366, | |
| "learning_rate": 0.0003586276211795879, | |
| "loss": 0.9398, | |
| "step": 7190 | |
| }, | |
| { | |
| "epoch": 0.0805274548291308, | |
| "grad_norm": 0.2715147137641907, | |
| "learning_rate": 0.00035839919594316785, | |
| "loss": 0.9307, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.08063929851639348, | |
| "grad_norm": 0.2697315812110901, | |
| "learning_rate": 0.0003581707707067477, | |
| "loss": 0.9342, | |
| "step": 7210 | |
| }, | |
| { | |
| "epoch": 0.08075114220365617, | |
| "grad_norm": 0.2833189070224762, | |
| "learning_rate": 0.00035794234547032757, | |
| "loss": 0.9604, | |
| "step": 7220 | |
| }, | |
| { | |
| "epoch": 0.08086298589091885, | |
| "grad_norm": 0.3069300353527069, | |
| "learning_rate": 0.0003577139202339075, | |
| "loss": 0.9397, | |
| "step": 7230 | |
| }, | |
| { | |
| "epoch": 0.08097482957818153, | |
| "grad_norm": 0.28459593653678894, | |
| "learning_rate": 0.00035748549499748733, | |
| "loss": 0.925, | |
| "step": 7240 | |
| }, | |
| { | |
| "epoch": 0.08108667326544422, | |
| "grad_norm": 0.28896769881248474, | |
| "learning_rate": 0.0003572570697610672, | |
| "loss": 0.9245, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 0.0811985169527069, | |
| "grad_norm": 0.2574586272239685, | |
| "learning_rate": 0.0003570286445246471, | |
| "loss": 0.9326, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 0.08131036063996958, | |
| "grad_norm": 0.2965002954006195, | |
| "learning_rate": 0.00035680021928822695, | |
| "loss": 0.9221, | |
| "step": 7270 | |
| }, | |
| { | |
| "epoch": 0.08142220432723227, | |
| "grad_norm": 0.2657724618911743, | |
| "learning_rate": 0.00035657179405180686, | |
| "loss": 0.9143, | |
| "step": 7280 | |
| }, | |
| { | |
| "epoch": 0.08153404801449494, | |
| "grad_norm": 0.2973329424858093, | |
| "learning_rate": 0.0003563433688153867, | |
| "loss": 0.9164, | |
| "step": 7290 | |
| }, | |
| { | |
| "epoch": 0.08164589170175762, | |
| "grad_norm": 0.3032989501953125, | |
| "learning_rate": 0.00035611494357896663, | |
| "loss": 0.9254, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.0817577353890203, | |
| "grad_norm": 0.28107839822769165, | |
| "learning_rate": 0.0003558865183425465, | |
| "loss": 0.9155, | |
| "step": 7310 | |
| }, | |
| { | |
| "epoch": 0.08186957907628299, | |
| "grad_norm": 0.30296218395233154, | |
| "learning_rate": 0.00035565809310612634, | |
| "loss": 0.9218, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 0.08198142276354567, | |
| "grad_norm": 0.28191155195236206, | |
| "learning_rate": 0.00035542966786970625, | |
| "loss": 0.9197, | |
| "step": 7330 | |
| }, | |
| { | |
| "epoch": 0.08209326645080835, | |
| "grad_norm": 0.3113023638725281, | |
| "learning_rate": 0.0003552012426332861, | |
| "loss": 0.9228, | |
| "step": 7340 | |
| }, | |
| { | |
| "epoch": 0.08220511013807104, | |
| "grad_norm": 0.3066212832927704, | |
| "learning_rate": 0.00035497281739686596, | |
| "loss": 0.9191, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 0.08231695382533372, | |
| "grad_norm": 0.2658233940601349, | |
| "learning_rate": 0.0003547443921604459, | |
| "loss": 0.918, | |
| "step": 7360 | |
| }, | |
| { | |
| "epoch": 0.0824287975125964, | |
| "grad_norm": 0.28222033381462097, | |
| "learning_rate": 0.0003545159669240258, | |
| "loss": 0.9253, | |
| "step": 7370 | |
| }, | |
| { | |
| "epoch": 0.08254064119985907, | |
| "grad_norm": 0.2917843461036682, | |
| "learning_rate": 0.00035428754168760564, | |
| "loss": 0.9059, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 0.08265248488712176, | |
| "grad_norm": 0.290404349565506, | |
| "learning_rate": 0.00035405911645118555, | |
| "loss": 0.9044, | |
| "step": 7390 | |
| }, | |
| { | |
| "epoch": 0.08276432857438444, | |
| "grad_norm": 0.28990834951400757, | |
| "learning_rate": 0.0003538306912147654, | |
| "loss": 0.9078, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.08287617226164712, | |
| "grad_norm": 0.27296292781829834, | |
| "learning_rate": 0.00035360226597834526, | |
| "loss": 0.9081, | |
| "step": 7410 | |
| }, | |
| { | |
| "epoch": 0.08298801594890981, | |
| "grad_norm": 0.25443321466445923, | |
| "learning_rate": 0.00035337384074192517, | |
| "loss": 0.9019, | |
| "step": 7420 | |
| }, | |
| { | |
| "epoch": 0.08309985963617249, | |
| "grad_norm": 0.25014832615852356, | |
| "learning_rate": 0.0003531454155055051, | |
| "loss": 0.8976, | |
| "step": 7430 | |
| }, | |
| { | |
| "epoch": 0.08321170332343517, | |
| "grad_norm": 0.2844237983226776, | |
| "learning_rate": 0.00035291699026908493, | |
| "loss": 0.9039, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 0.08332354701069784, | |
| "grad_norm": 0.26745542883872986, | |
| "learning_rate": 0.00035268856503266484, | |
| "loss": 0.8813, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 0.08343539069796053, | |
| "grad_norm": 0.30750566720962524, | |
| "learning_rate": 0.0003524601397962447, | |
| "loss": 0.8988, | |
| "step": 7460 | |
| }, | |
| { | |
| "epoch": 0.08354723438522321, | |
| "grad_norm": 0.2960536777973175, | |
| "learning_rate": 0.00035223171455982456, | |
| "loss": 0.8966, | |
| "step": 7470 | |
| }, | |
| { | |
| "epoch": 0.08365907807248589, | |
| "grad_norm": 0.28923213481903076, | |
| "learning_rate": 0.00035200328932340447, | |
| "loss": 0.8872, | |
| "step": 7480 | |
| }, | |
| { | |
| "epoch": 0.08377092175974858, | |
| "grad_norm": 0.2762465476989746, | |
| "learning_rate": 0.0003517748640869843, | |
| "loss": 0.8655, | |
| "step": 7490 | |
| }, | |
| { | |
| "epoch": 0.08388276544701126, | |
| "grad_norm": 0.2870965301990509, | |
| "learning_rate": 0.0003515464388505642, | |
| "loss": 0.889, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.08399460913427394, | |
| "grad_norm": 0.3135611116886139, | |
| "learning_rate": 0.00035131801361414414, | |
| "loss": 0.8898, | |
| "step": 7510 | |
| }, | |
| { | |
| "epoch": 0.08410645282153661, | |
| "grad_norm": 0.29541128873825073, | |
| "learning_rate": 0.000351089588377724, | |
| "loss": 0.8884, | |
| "step": 7520 | |
| }, | |
| { | |
| "epoch": 0.0842182965087993, | |
| "grad_norm": 0.2667001485824585, | |
| "learning_rate": 0.00035086116314130385, | |
| "loss": 0.8923, | |
| "step": 7530 | |
| }, | |
| { | |
| "epoch": 0.08433014019606198, | |
| "grad_norm": 0.28677645325660706, | |
| "learning_rate": 0.00035063273790488376, | |
| "loss": 0.8862, | |
| "step": 7540 | |
| }, | |
| { | |
| "epoch": 0.08444198388332466, | |
| "grad_norm": 0.26973757147789, | |
| "learning_rate": 0.0003504043126684636, | |
| "loss": 0.8739, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 0.08455382757058735, | |
| "grad_norm": 0.2670735716819763, | |
| "learning_rate": 0.0003501758874320435, | |
| "loss": 0.8843, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 0.08466567125785003, | |
| "grad_norm": 0.2678844928741455, | |
| "learning_rate": 0.0003499474621956234, | |
| "loss": 0.8855, | |
| "step": 7570 | |
| }, | |
| { | |
| "epoch": 0.08477751494511271, | |
| "grad_norm": 0.26894411444664, | |
| "learning_rate": 0.00034971903695920324, | |
| "loss": 0.8828, | |
| "step": 7580 | |
| }, | |
| { | |
| "epoch": 0.08488935863237539, | |
| "grad_norm": 0.28703927993774414, | |
| "learning_rate": 0.00034949061172278315, | |
| "loss": 0.885, | |
| "step": 7590 | |
| }, | |
| { | |
| "epoch": 0.08500120231963808, | |
| "grad_norm": 0.2618086636066437, | |
| "learning_rate": 0.00034926218648636306, | |
| "loss": 0.8777, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.08511304600690076, | |
| "grad_norm": 0.28816747665405273, | |
| "learning_rate": 0.0003490337612499429, | |
| "loss": 0.8836, | |
| "step": 7610 | |
| }, | |
| { | |
| "epoch": 0.08522488969416343, | |
| "grad_norm": 0.29172763228416443, | |
| "learning_rate": 0.00034880533601352277, | |
| "loss": 0.8835, | |
| "step": 7620 | |
| }, | |
| { | |
| "epoch": 0.08533673338142612, | |
| "grad_norm": 0.2613106667995453, | |
| "learning_rate": 0.0003485769107771027, | |
| "loss": 0.8736, | |
| "step": 7630 | |
| }, | |
| { | |
| "epoch": 0.0854485770686888, | |
| "grad_norm": 0.2737283408641815, | |
| "learning_rate": 0.00034834848554068254, | |
| "loss": 0.8589, | |
| "step": 7640 | |
| }, | |
| { | |
| "epoch": 0.08556042075595148, | |
| "grad_norm": 0.2709786295890808, | |
| "learning_rate": 0.0003481200603042624, | |
| "loss": 0.8675, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 0.08567226444321416, | |
| "grad_norm": 0.2982759177684784, | |
| "learning_rate": 0.0003478916350678423, | |
| "loss": 0.8827, | |
| "step": 7660 | |
| }, | |
| { | |
| "epoch": 0.08578410813047685, | |
| "grad_norm": 0.21551093459129333, | |
| "learning_rate": 0.0003476632098314222, | |
| "loss": 0.8663, | |
| "step": 7670 | |
| }, | |
| { | |
| "epoch": 0.08589595181773953, | |
| "grad_norm": 0.26418018341064453, | |
| "learning_rate": 0.00034743478459500207, | |
| "loss": 0.8845, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 0.0860077955050022, | |
| "grad_norm": 0.2310175597667694, | |
| "learning_rate": 0.000347206359358582, | |
| "loss": 0.8874, | |
| "step": 7690 | |
| }, | |
| { | |
| "epoch": 0.0861196391922649, | |
| "grad_norm": 0.25112512707710266, | |
| "learning_rate": 0.00034697793412216183, | |
| "loss": 0.8896, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.08623148287952757, | |
| "grad_norm": 0.33391082286834717, | |
| "learning_rate": 0.0003467495088857417, | |
| "loss": 0.8765, | |
| "step": 7710 | |
| }, | |
| { | |
| "epoch": 0.08634332656679025, | |
| "grad_norm": 0.24641484022140503, | |
| "learning_rate": 0.0003465210836493216, | |
| "loss": 0.8572, | |
| "step": 7720 | |
| }, | |
| { | |
| "epoch": 0.08645517025405293, | |
| "grad_norm": 0.26017534732818604, | |
| "learning_rate": 0.00034629265841290145, | |
| "loss": 0.8585, | |
| "step": 7730 | |
| }, | |
| { | |
| "epoch": 0.08656701394131562, | |
| "grad_norm": 0.23500847816467285, | |
| "learning_rate": 0.0003460642331764813, | |
| "loss": 0.8797, | |
| "step": 7740 | |
| }, | |
| { | |
| "epoch": 0.0866788576285783, | |
| "grad_norm": 0.25485488772392273, | |
| "learning_rate": 0.0003458358079400612, | |
| "loss": 0.8796, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 0.08679070131584098, | |
| "grad_norm": 0.27644404768943787, | |
| "learning_rate": 0.00034560738270364113, | |
| "loss": 0.8708, | |
| "step": 7760 | |
| }, | |
| { | |
| "epoch": 0.08690254500310367, | |
| "grad_norm": 0.233077734708786, | |
| "learning_rate": 0.000345378957467221, | |
| "loss": 0.8652, | |
| "step": 7770 | |
| }, | |
| { | |
| "epoch": 0.08701438869036635, | |
| "grad_norm": 0.24039144814014435, | |
| "learning_rate": 0.00034515053223080084, | |
| "loss": 0.8723, | |
| "step": 7780 | |
| }, | |
| { | |
| "epoch": 0.08712623237762902, | |
| "grad_norm": 0.23007874190807343, | |
| "learning_rate": 0.00034492210699438075, | |
| "loss": 0.8644, | |
| "step": 7790 | |
| }, | |
| { | |
| "epoch": 0.0872380760648917, | |
| "grad_norm": 0.27570798993110657, | |
| "learning_rate": 0.0003446936817579606, | |
| "loss": 0.872, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.08734991975215439, | |
| "grad_norm": 0.24157382547855377, | |
| "learning_rate": 0.00034446525652154046, | |
| "loss": 0.8846, | |
| "step": 7810 | |
| }, | |
| { | |
| "epoch": 0.08746176343941707, | |
| "grad_norm": 0.2703733742237091, | |
| "learning_rate": 0.0003442368312851204, | |
| "loss": 0.889, | |
| "step": 7820 | |
| }, | |
| { | |
| "epoch": 0.08757360712667975, | |
| "grad_norm": 0.26786255836486816, | |
| "learning_rate": 0.0003440084060487003, | |
| "loss": 0.8933, | |
| "step": 7830 | |
| }, | |
| { | |
| "epoch": 0.08768545081394244, | |
| "grad_norm": 0.2595812976360321, | |
| "learning_rate": 0.00034377998081228014, | |
| "loss": 0.9156, | |
| "step": 7840 | |
| }, | |
| { | |
| "epoch": 0.08779729450120512, | |
| "grad_norm": 0.24396800994873047, | |
| "learning_rate": 0.00034355155557586005, | |
| "loss": 0.8849, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 0.0879091381884678, | |
| "grad_norm": 0.24363452196121216, | |
| "learning_rate": 0.0003433231303394399, | |
| "loss": 0.9011, | |
| "step": 7860 | |
| }, | |
| { | |
| "epoch": 0.08802098187573047, | |
| "grad_norm": 0.2666647434234619, | |
| "learning_rate": 0.00034309470510301976, | |
| "loss": 0.8952, | |
| "step": 7870 | |
| }, | |
| { | |
| "epoch": 0.08813282556299316, | |
| "grad_norm": 0.267863005399704, | |
| "learning_rate": 0.00034286627986659967, | |
| "loss": 0.9113, | |
| "step": 7880 | |
| }, | |
| { | |
| "epoch": 0.08824466925025584, | |
| "grad_norm": 0.24397262930870056, | |
| "learning_rate": 0.0003426378546301795, | |
| "loss": 0.8762, | |
| "step": 7890 | |
| }, | |
| { | |
| "epoch": 0.08835651293751852, | |
| "grad_norm": 0.23912496864795685, | |
| "learning_rate": 0.00034240942939375943, | |
| "loss": 0.8865, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.08846835662478121, | |
| "grad_norm": 0.2737523913383484, | |
| "learning_rate": 0.00034218100415733934, | |
| "loss": 0.8732, | |
| "step": 7910 | |
| }, | |
| { | |
| "epoch": 0.08858020031204389, | |
| "grad_norm": 0.24978673458099365, | |
| "learning_rate": 0.0003419525789209192, | |
| "loss": 0.8832, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 0.08869204399930657, | |
| "grad_norm": 0.25200751423835754, | |
| "learning_rate": 0.00034172415368449906, | |
| "loss": 0.8952, | |
| "step": 7930 | |
| }, | |
| { | |
| "epoch": 0.08880388768656924, | |
| "grad_norm": 0.7863819003105164, | |
| "learning_rate": 0.00034149572844807897, | |
| "loss": 0.8708, | |
| "step": 7940 | |
| }, | |
| { | |
| "epoch": 0.08891573137383194, | |
| "grad_norm": 0.2560253441333771, | |
| "learning_rate": 0.0003412673032116588, | |
| "loss": 0.8681, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 0.08902757506109461, | |
| "grad_norm": 0.2669181823730469, | |
| "learning_rate": 0.0003410388779752387, | |
| "loss": 0.9007, | |
| "step": 7960 | |
| }, | |
| { | |
| "epoch": 0.08913941874835729, | |
| "grad_norm": 0.27906209230422974, | |
| "learning_rate": 0.0003408104527388186, | |
| "loss": 0.8988, | |
| "step": 7970 | |
| }, | |
| { | |
| "epoch": 0.08925126243561998, | |
| "grad_norm": 0.2506297826766968, | |
| "learning_rate": 0.0003405820275023985, | |
| "loss": 0.8997, | |
| "step": 7980 | |
| }, | |
| { | |
| "epoch": 0.08936310612288266, | |
| "grad_norm": 0.2513269782066345, | |
| "learning_rate": 0.00034035360226597835, | |
| "loss": 0.9215, | |
| "step": 7990 | |
| }, | |
| { | |
| "epoch": 0.08947494981014534, | |
| "grad_norm": 0.2672421634197235, | |
| "learning_rate": 0.00034012517702955826, | |
| "loss": 0.9112, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.08958679349740803, | |
| "grad_norm": 0.2553747296333313, | |
| "learning_rate": 0.0003398967517931381, | |
| "loss": 0.9255, | |
| "step": 8010 | |
| }, | |
| { | |
| "epoch": 0.08969863718467071, | |
| "grad_norm": 0.2325398176908493, | |
| "learning_rate": 0.000339668326556718, | |
| "loss": 0.9173, | |
| "step": 8020 | |
| }, | |
| { | |
| "epoch": 0.08981048087193338, | |
| "grad_norm": 0.23461295664310455, | |
| "learning_rate": 0.0003394399013202979, | |
| "loss": 0.9183, | |
| "step": 8030 | |
| }, | |
| { | |
| "epoch": 0.08992232455919606, | |
| "grad_norm": 0.26092031598091125, | |
| "learning_rate": 0.00033921147608387774, | |
| "loss": 0.9106, | |
| "step": 8040 | |
| }, | |
| { | |
| "epoch": 0.09003416824645875, | |
| "grad_norm": 0.26250872015953064, | |
| "learning_rate": 0.0003389830508474576, | |
| "loss": 0.8893, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 0.09014601193372143, | |
| "grad_norm": 0.2501981556415558, | |
| "learning_rate": 0.00033875462561103756, | |
| "loss": 0.8934, | |
| "step": 8060 | |
| }, | |
| { | |
| "epoch": 0.09025785562098411, | |
| "grad_norm": 0.26185476779937744, | |
| "learning_rate": 0.0003385262003746174, | |
| "loss": 0.8855, | |
| "step": 8070 | |
| }, | |
| { | |
| "epoch": 0.0903696993082468, | |
| "grad_norm": 0.26889827847480774, | |
| "learning_rate": 0.00033829777513819727, | |
| "loss": 0.8944, | |
| "step": 8080 | |
| }, | |
| { | |
| "epoch": 0.09048154299550948, | |
| "grad_norm": 0.2473451793193817, | |
| "learning_rate": 0.0003380693499017772, | |
| "loss": 0.8937, | |
| "step": 8090 | |
| }, | |
| { | |
| "epoch": 0.09059338668277216, | |
| "grad_norm": 0.24157559871673584, | |
| "learning_rate": 0.00033784092466535704, | |
| "loss": 0.8903, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.09070523037003483, | |
| "grad_norm": 0.2701563239097595, | |
| "learning_rate": 0.0003376124994289369, | |
| "loss": 0.9109, | |
| "step": 8110 | |
| }, | |
| { | |
| "epoch": 0.09081707405729753, | |
| "grad_norm": 0.28706929087638855, | |
| "learning_rate": 0.0003373840741925168, | |
| "loss": 0.8956, | |
| "step": 8120 | |
| }, | |
| { | |
| "epoch": 0.0909289177445602, | |
| "grad_norm": 0.27120909094810486, | |
| "learning_rate": 0.00033715564895609666, | |
| "loss": 0.8947, | |
| "step": 8130 | |
| }, | |
| { | |
| "epoch": 0.09104076143182288, | |
| "grad_norm": 0.2504216432571411, | |
| "learning_rate": 0.00033692722371967657, | |
| "loss": 0.8814, | |
| "step": 8140 | |
| }, | |
| { | |
| "epoch": 0.09115260511908557, | |
| "grad_norm": 0.2921849489212036, | |
| "learning_rate": 0.0003366987984832565, | |
| "loss": 0.8856, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 0.09126444880634825, | |
| "grad_norm": 0.2587922513484955, | |
| "learning_rate": 0.00033647037324683633, | |
| "loss": 0.8778, | |
| "step": 8160 | |
| }, | |
| { | |
| "epoch": 0.09137629249361093, | |
| "grad_norm": 0.2399989813566208, | |
| "learning_rate": 0.0003362419480104162, | |
| "loss": 0.883, | |
| "step": 8170 | |
| }, | |
| { | |
| "epoch": 0.0914881361808736, | |
| "grad_norm": 0.24794407188892365, | |
| "learning_rate": 0.0003360135227739961, | |
| "loss": 0.8935, | |
| "step": 8180 | |
| }, | |
| { | |
| "epoch": 0.0915999798681363, | |
| "grad_norm": 0.26669082045555115, | |
| "learning_rate": 0.00033578509753757595, | |
| "loss": 0.863, | |
| "step": 8190 | |
| }, | |
| { | |
| "epoch": 0.09171182355539897, | |
| "grad_norm": 0.25162795186042786, | |
| "learning_rate": 0.0003355566723011558, | |
| "loss": 0.8887, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.09182366724266165, | |
| "grad_norm": 0.28969621658325195, | |
| "learning_rate": 0.00033532824706473567, | |
| "loss": 0.9066, | |
| "step": 8210 | |
| }, | |
| { | |
| "epoch": 0.09193551092992434, | |
| "grad_norm": 0.25944870710372925, | |
| "learning_rate": 0.00033509982182831563, | |
| "loss": 0.8875, | |
| "step": 8220 | |
| }, | |
| { | |
| "epoch": 0.09204735461718702, | |
| "grad_norm": 0.27627986669540405, | |
| "learning_rate": 0.0003348713965918955, | |
| "loss": 0.8895, | |
| "step": 8230 | |
| }, | |
| { | |
| "epoch": 0.0921591983044497, | |
| "grad_norm": 0.2673914134502411, | |
| "learning_rate": 0.00033464297135547534, | |
| "loss": 0.8937, | |
| "step": 8240 | |
| }, | |
| { | |
| "epoch": 0.09227104199171238, | |
| "grad_norm": 0.2810732126235962, | |
| "learning_rate": 0.00033441454611905525, | |
| "loss": 0.9007, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 0.09238288567897507, | |
| "grad_norm": 0.2671091556549072, | |
| "learning_rate": 0.0003341861208826351, | |
| "loss": 0.905, | |
| "step": 8260 | |
| }, | |
| { | |
| "epoch": 0.09249472936623775, | |
| "grad_norm": 0.25006943941116333, | |
| "learning_rate": 0.00033395769564621496, | |
| "loss": 0.8981, | |
| "step": 8270 | |
| }, | |
| { | |
| "epoch": 0.09260657305350042, | |
| "grad_norm": 0.2891542613506317, | |
| "learning_rate": 0.0003337292704097949, | |
| "loss": 0.8978, | |
| "step": 8280 | |
| }, | |
| { | |
| "epoch": 0.09271841674076312, | |
| "grad_norm": 0.29497236013412476, | |
| "learning_rate": 0.0003335008451733748, | |
| "loss": 0.9044, | |
| "step": 8290 | |
| }, | |
| { | |
| "epoch": 0.0928302604280258, | |
| "grad_norm": 0.29290974140167236, | |
| "learning_rate": 0.00033327241993695464, | |
| "loss": 0.9081, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.09294210411528847, | |
| "grad_norm": 0.27077415585517883, | |
| "learning_rate": 0.00033304399470053455, | |
| "loss": 0.9184, | |
| "step": 8310 | |
| }, | |
| { | |
| "epoch": 0.09305394780255115, | |
| "grad_norm": 0.26410186290740967, | |
| "learning_rate": 0.0003328155694641144, | |
| "loss": 0.8912, | |
| "step": 8320 | |
| }, | |
| { | |
| "epoch": 0.09316579148981384, | |
| "grad_norm": 0.2818413972854614, | |
| "learning_rate": 0.00033258714422769426, | |
| "loss": 0.9096, | |
| "step": 8330 | |
| }, | |
| { | |
| "epoch": 0.09327763517707652, | |
| "grad_norm": 0.265286386013031, | |
| "learning_rate": 0.00033235871899127417, | |
| "loss": 0.9192, | |
| "step": 8340 | |
| }, | |
| { | |
| "epoch": 0.0933894788643392, | |
| "grad_norm": 0.2714836597442627, | |
| "learning_rate": 0.000332130293754854, | |
| "loss": 0.9122, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 0.09350132255160189, | |
| "grad_norm": 0.2858263850212097, | |
| "learning_rate": 0.0003319018685184339, | |
| "loss": 0.9143, | |
| "step": 8360 | |
| }, | |
| { | |
| "epoch": 0.09361316623886456, | |
| "grad_norm": 0.27788257598876953, | |
| "learning_rate": 0.00033167344328201385, | |
| "loss": 0.9116, | |
| "step": 8370 | |
| }, | |
| { | |
| "epoch": 0.09372500992612724, | |
| "grad_norm": 0.27748674154281616, | |
| "learning_rate": 0.0003314450180455937, | |
| "loss": 0.8934, | |
| "step": 8380 | |
| }, | |
| { | |
| "epoch": 0.09383685361338992, | |
| "grad_norm": 0.4757048785686493, | |
| "learning_rate": 0.00033121659280917356, | |
| "loss": 0.9097, | |
| "step": 8390 | |
| }, | |
| { | |
| "epoch": 0.09394869730065261, | |
| "grad_norm": 0.3016970157623291, | |
| "learning_rate": 0.00033098816757275347, | |
| "loss": 0.8973, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.09406054098791529, | |
| "grad_norm": 0.2640211880207062, | |
| "learning_rate": 0.0003307597423363333, | |
| "loss": 0.8914, | |
| "step": 8410 | |
| }, | |
| { | |
| "epoch": 0.09417238467517797, | |
| "grad_norm": 0.2608022391796112, | |
| "learning_rate": 0.0003305313170999132, | |
| "loss": 0.9138, | |
| "step": 8420 | |
| }, | |
| { | |
| "epoch": 0.09428422836244066, | |
| "grad_norm": 0.23691967129707336, | |
| "learning_rate": 0.0003303028918634931, | |
| "loss": 0.9149, | |
| "step": 8430 | |
| }, | |
| { | |
| "epoch": 0.09439607204970334, | |
| "grad_norm": 0.28734761476516724, | |
| "learning_rate": 0.00033007446662707294, | |
| "loss": 0.9056, | |
| "step": 8440 | |
| }, | |
| { | |
| "epoch": 0.09450791573696601, | |
| "grad_norm": 0.2846873700618744, | |
| "learning_rate": 0.00032984604139065285, | |
| "loss": 0.9052, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 0.09461975942422869, | |
| "grad_norm": 0.2613682448863983, | |
| "learning_rate": 0.00032961761615423276, | |
| "loss": 0.9129, | |
| "step": 8460 | |
| }, | |
| { | |
| "epoch": 0.09473160311149138, | |
| "grad_norm": 0.25336501002311707, | |
| "learning_rate": 0.0003293891909178126, | |
| "loss": 0.9048, | |
| "step": 8470 | |
| }, | |
| { | |
| "epoch": 0.09484344679875406, | |
| "grad_norm": 0.2662324905395508, | |
| "learning_rate": 0.0003291607656813925, | |
| "loss": 0.9181, | |
| "step": 8480 | |
| }, | |
| { | |
| "epoch": 0.09495529048601674, | |
| "grad_norm": 0.2482605278491974, | |
| "learning_rate": 0.0003289323404449724, | |
| "loss": 0.8978, | |
| "step": 8490 | |
| }, | |
| { | |
| "epoch": 0.09506713417327943, | |
| "grad_norm": 0.24181032180786133, | |
| "learning_rate": 0.00032870391520855224, | |
| "loss": 0.9121, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.09517897786054211, | |
| "grad_norm": 0.276621013879776, | |
| "learning_rate": 0.0003284754899721321, | |
| "loss": 0.9106, | |
| "step": 8510 | |
| }, | |
| { | |
| "epoch": 0.09529082154780479, | |
| "grad_norm": 0.2788410186767578, | |
| "learning_rate": 0.000328247064735712, | |
| "loss": 0.9062, | |
| "step": 8520 | |
| }, | |
| { | |
| "epoch": 0.09540266523506746, | |
| "grad_norm": 0.28387385606765747, | |
| "learning_rate": 0.0003280186394992919, | |
| "loss": 0.9309, | |
| "step": 8530 | |
| }, | |
| { | |
| "epoch": 0.09551450892233015, | |
| "grad_norm": 0.2923261523246765, | |
| "learning_rate": 0.00032779021426287177, | |
| "loss": 0.9278, | |
| "step": 8540 | |
| }, | |
| { | |
| "epoch": 0.09562635260959283, | |
| "grad_norm": 0.3008005917072296, | |
| "learning_rate": 0.0003275617890264517, | |
| "loss": 0.9196, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 0.09573819629685551, | |
| "grad_norm": 0.2849402129650116, | |
| "learning_rate": 0.00032733336379003154, | |
| "loss": 0.9243, | |
| "step": 8560 | |
| }, | |
| { | |
| "epoch": 0.0958500399841182, | |
| "grad_norm": 0.262134313583374, | |
| "learning_rate": 0.0003271049385536114, | |
| "loss": 0.9346, | |
| "step": 8570 | |
| }, | |
| { | |
| "epoch": 0.09596188367138088, | |
| "grad_norm": 0.2891925573348999, | |
| "learning_rate": 0.0003268765133171913, | |
| "loss": 0.9176, | |
| "step": 8580 | |
| }, | |
| { | |
| "epoch": 0.09607372735864356, | |
| "grad_norm": 0.26165837049484253, | |
| "learning_rate": 0.00032664808808077116, | |
| "loss": 0.9229, | |
| "step": 8590 | |
| }, | |
| { | |
| "epoch": 0.09618557104590623, | |
| "grad_norm": 0.2683985233306885, | |
| "learning_rate": 0.000326419662844351, | |
| "loss": 0.9067, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.09629741473316893, | |
| "grad_norm": 0.25300973653793335, | |
| "learning_rate": 0.000326191237607931, | |
| "loss": 0.9037, | |
| "step": 8610 | |
| }, | |
| { | |
| "epoch": 0.0964092584204316, | |
| "grad_norm": 0.30520153045654297, | |
| "learning_rate": 0.00032596281237151083, | |
| "loss": 0.9038, | |
| "step": 8620 | |
| }, | |
| { | |
| "epoch": 0.09652110210769428, | |
| "grad_norm": 0.2573854327201843, | |
| "learning_rate": 0.0003257343871350907, | |
| "loss": 0.9062, | |
| "step": 8630 | |
| }, | |
| { | |
| "epoch": 0.09663294579495697, | |
| "grad_norm": 0.2664088308811188, | |
| "learning_rate": 0.0003255059618986706, | |
| "loss": 0.8864, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 0.09674478948221965, | |
| "grad_norm": 0.26375049352645874, | |
| "learning_rate": 0.00032527753666225046, | |
| "loss": 0.8804, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 0.09685663316948233, | |
| "grad_norm": 0.25367647409439087, | |
| "learning_rate": 0.0003250491114258303, | |
| "loss": 0.8987, | |
| "step": 8660 | |
| }, | |
| { | |
| "epoch": 0.09696847685674502, | |
| "grad_norm": 0.2764420807361603, | |
| "learning_rate": 0.00032482068618941017, | |
| "loss": 0.9078, | |
| "step": 8670 | |
| }, | |
| { | |
| "epoch": 0.0970803205440077, | |
| "grad_norm": 0.2663860023021698, | |
| "learning_rate": 0.0003245922609529901, | |
| "loss": 0.8838, | |
| "step": 8680 | |
| }, | |
| { | |
| "epoch": 0.09719216423127038, | |
| "grad_norm": 0.25380998849868774, | |
| "learning_rate": 0.00032436383571657, | |
| "loss": 0.8949, | |
| "step": 8690 | |
| }, | |
| { | |
| "epoch": 0.09730400791853305, | |
| "grad_norm": 0.29428210854530334, | |
| "learning_rate": 0.00032413541048014984, | |
| "loss": 0.883, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.09741585160579574, | |
| "grad_norm": 0.25604331493377686, | |
| "learning_rate": 0.00032390698524372975, | |
| "loss": 0.8891, | |
| "step": 8710 | |
| }, | |
| { | |
| "epoch": 0.09752769529305842, | |
| "grad_norm": 0.26663005352020264, | |
| "learning_rate": 0.0003236785600073096, | |
| "loss": 0.8763, | |
| "step": 8720 | |
| }, | |
| { | |
| "epoch": 0.0976395389803211, | |
| "grad_norm": 0.27305158972740173, | |
| "learning_rate": 0.00032345013477088946, | |
| "loss": 0.8877, | |
| "step": 8730 | |
| }, | |
| { | |
| "epoch": 0.09775138266758379, | |
| "grad_norm": 0.27395525574684143, | |
| "learning_rate": 0.0003232217095344694, | |
| "loss": 0.871, | |
| "step": 8740 | |
| }, | |
| { | |
| "epoch": 0.09786322635484647, | |
| "grad_norm": 0.26152902841567993, | |
| "learning_rate": 0.00032299328429804923, | |
| "loss": 0.8714, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 0.09797507004210915, | |
| "grad_norm": 0.2872631847858429, | |
| "learning_rate": 0.0003227648590616291, | |
| "loss": 0.8754, | |
| "step": 8760 | |
| }, | |
| { | |
| "epoch": 0.09808691372937182, | |
| "grad_norm": 0.2681150436401367, | |
| "learning_rate": 0.00032253643382520905, | |
| "loss": 0.8699, | |
| "step": 8770 | |
| }, | |
| { | |
| "epoch": 0.09819875741663452, | |
| "grad_norm": 0.27205002307891846, | |
| "learning_rate": 0.0003223080085887889, | |
| "loss": 0.8743, | |
| "step": 8780 | |
| }, | |
| { | |
| "epoch": 0.0983106011038972, | |
| "grad_norm": 0.27747979760169983, | |
| "learning_rate": 0.00032207958335236876, | |
| "loss": 0.8607, | |
| "step": 8790 | |
| }, | |
| { | |
| "epoch": 0.09842244479115987, | |
| "grad_norm": 0.2963927984237671, | |
| "learning_rate": 0.00032185115811594867, | |
| "loss": 0.8676, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.09853428847842256, | |
| "grad_norm": 0.26414602994918823, | |
| "learning_rate": 0.0003216227328795285, | |
| "loss": 0.8556, | |
| "step": 8810 | |
| }, | |
| { | |
| "epoch": 0.09864613216568524, | |
| "grad_norm": 0.3005480170249939, | |
| "learning_rate": 0.0003213943076431084, | |
| "loss": 0.8816, | |
| "step": 8820 | |
| }, | |
| { | |
| "epoch": 0.09875797585294792, | |
| "grad_norm": 0.29625314474105835, | |
| "learning_rate": 0.0003211658824066883, | |
| "loss": 0.8747, | |
| "step": 8830 | |
| }, | |
| { | |
| "epoch": 0.0988698195402106, | |
| "grad_norm": 0.2900589108467102, | |
| "learning_rate": 0.0003209374571702682, | |
| "loss": 0.8697, | |
| "step": 8840 | |
| }, | |
| { | |
| "epoch": 0.09898166322747329, | |
| "grad_norm": 0.2951551675796509, | |
| "learning_rate": 0.00032070903193384806, | |
| "loss": 0.8756, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 0.09909350691473597, | |
| "grad_norm": 0.3049459159374237, | |
| "learning_rate": 0.00032048060669742797, | |
| "loss": 0.8767, | |
| "step": 8860 | |
| }, | |
| { | |
| "epoch": 0.09920535060199864, | |
| "grad_norm": 0.30216872692108154, | |
| "learning_rate": 0.0003202521814610078, | |
| "loss": 0.8687, | |
| "step": 8870 | |
| }, | |
| { | |
| "epoch": 0.09931719428926133, | |
| "grad_norm": 0.2913934290409088, | |
| "learning_rate": 0.0003200237562245877, | |
| "loss": 0.8616, | |
| "step": 8880 | |
| }, | |
| { | |
| "epoch": 0.09942903797652401, | |
| "grad_norm": 0.26879578828811646, | |
| "learning_rate": 0.0003197953309881676, | |
| "loss": 0.8681, | |
| "step": 8890 | |
| }, | |
| { | |
| "epoch": 0.09954088166378669, | |
| "grad_norm": 0.28092971444129944, | |
| "learning_rate": 0.00031956690575174744, | |
| "loss": 0.8765, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.09965272535104937, | |
| "grad_norm": 0.3074035048484802, | |
| "learning_rate": 0.0003193384805153273, | |
| "loss": 0.881, | |
| "step": 8910 | |
| }, | |
| { | |
| "epoch": 0.09976456903831206, | |
| "grad_norm": 0.2945140600204468, | |
| "learning_rate": 0.00031911005527890726, | |
| "loss": 0.8913, | |
| "step": 8920 | |
| }, | |
| { | |
| "epoch": 0.09987641272557474, | |
| "grad_norm": 0.2707176208496094, | |
| "learning_rate": 0.0003188816300424871, | |
| "loss": 0.8822, | |
| "step": 8930 | |
| }, | |
| { | |
| "epoch": 0.09998825641283741, | |
| "grad_norm": 0.2639947235584259, | |
| "learning_rate": 0.000318653204806067, | |
| "loss": 0.8892, | |
| "step": 8940 | |
| }, | |
| { | |
| "epoch": 0.1001001001001001, | |
| "grad_norm": 0.2709505558013916, | |
| "learning_rate": 0.0003184247795696469, | |
| "loss": 0.8654, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 0.10021194378736278, | |
| "grad_norm": 0.27803289890289307, | |
| "learning_rate": 0.00031819635433322674, | |
| "loss": 0.8887, | |
| "step": 8960 | |
| }, | |
| { | |
| "epoch": 0.10032378747462546, | |
| "grad_norm": 0.25851163268089294, | |
| "learning_rate": 0.0003179679290968066, | |
| "loss": 0.8662, | |
| "step": 8970 | |
| }, | |
| { | |
| "epoch": 0.10043563116188814, | |
| "grad_norm": 0.261068731546402, | |
| "learning_rate": 0.0003177395038603865, | |
| "loss": 0.8641, | |
| "step": 8980 | |
| }, | |
| { | |
| "epoch": 0.10054747484915083, | |
| "grad_norm": 0.25510483980178833, | |
| "learning_rate": 0.00031751107862396636, | |
| "loss": 0.8762, | |
| "step": 8990 | |
| }, | |
| { | |
| "epoch": 0.10065931853641351, | |
| "grad_norm": 0.25765854120254517, | |
| "learning_rate": 0.00031728265338754627, | |
| "loss": 0.8837, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.10077116222367619, | |
| "grad_norm": 0.24198535084724426, | |
| "learning_rate": 0.0003170542281511262, | |
| "loss": 0.8791, | |
| "step": 9010 | |
| }, | |
| { | |
| "epoch": 0.10088300591093888, | |
| "grad_norm": 0.2673517167568207, | |
| "learning_rate": 0.00031682580291470604, | |
| "loss": 0.8795, | |
| "step": 9020 | |
| }, | |
| { | |
| "epoch": 0.10099484959820156, | |
| "grad_norm": 0.26392221450805664, | |
| "learning_rate": 0.0003165973776782859, | |
| "loss": 0.8788, | |
| "step": 9030 | |
| }, | |
| { | |
| "epoch": 0.10110669328546423, | |
| "grad_norm": 0.2698739171028137, | |
| "learning_rate": 0.0003163689524418658, | |
| "loss": 0.8959, | |
| "step": 9040 | |
| }, | |
| { | |
| "epoch": 0.10121853697272691, | |
| "grad_norm": 0.2800233066082001, | |
| "learning_rate": 0.00031614052720544566, | |
| "loss": 0.8945, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 0.1013303806599896, | |
| "grad_norm": 0.29603493213653564, | |
| "learning_rate": 0.0003159121019690255, | |
| "loss": 0.892, | |
| "step": 9060 | |
| }, | |
| { | |
| "epoch": 0.10144222434725228, | |
| "grad_norm": 0.26462167501449585, | |
| "learning_rate": 0.0003156836767326054, | |
| "loss": 0.8849, | |
| "step": 9070 | |
| }, | |
| { | |
| "epoch": 0.10155406803451496, | |
| "grad_norm": 0.27941739559173584, | |
| "learning_rate": 0.00031545525149618534, | |
| "loss": 0.8782, | |
| "step": 9080 | |
| }, | |
| { | |
| "epoch": 0.10166591172177765, | |
| "grad_norm": 0.2777186334133148, | |
| "learning_rate": 0.0003152268262597652, | |
| "loss": 0.8787, | |
| "step": 9090 | |
| }, | |
| { | |
| "epoch": 0.10177775540904033, | |
| "grad_norm": 0.25893428921699524, | |
| "learning_rate": 0.00031499840102334505, | |
| "loss": 0.8629, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.101889599096303, | |
| "grad_norm": 0.27407601475715637, | |
| "learning_rate": 0.00031476997578692496, | |
| "loss": 0.8619, | |
| "step": 9110 | |
| }, | |
| { | |
| "epoch": 0.10200144278356568, | |
| "grad_norm": 0.2663459777832031, | |
| "learning_rate": 0.0003145415505505048, | |
| "loss": 0.8474, | |
| "step": 9120 | |
| }, | |
| { | |
| "epoch": 0.10211328647082837, | |
| "grad_norm": 0.2621177136898041, | |
| "learning_rate": 0.00031431312531408467, | |
| "loss": 0.8565, | |
| "step": 9130 | |
| }, | |
| { | |
| "epoch": 0.10222513015809105, | |
| "grad_norm": 0.26687386631965637, | |
| "learning_rate": 0.0003140847000776646, | |
| "loss": 0.8438, | |
| "step": 9140 | |
| }, | |
| { | |
| "epoch": 0.10233697384535373, | |
| "grad_norm": 0.24772432446479797, | |
| "learning_rate": 0.00031385627484124443, | |
| "loss": 0.8511, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 0.10244881753261642, | |
| "grad_norm": 0.278730183839798, | |
| "learning_rate": 0.00031362784960482434, | |
| "loss": 0.8499, | |
| "step": 9160 | |
| }, | |
| { | |
| "epoch": 0.1025606612198791, | |
| "grad_norm": 0.28657999634742737, | |
| "learning_rate": 0.00031339942436840425, | |
| "loss": 0.85, | |
| "step": 9170 | |
| }, | |
| { | |
| "epoch": 0.10267250490714178, | |
| "grad_norm": 0.2848927676677704, | |
| "learning_rate": 0.0003131709991319841, | |
| "loss": 0.8411, | |
| "step": 9180 | |
| }, | |
| { | |
| "epoch": 0.10278434859440445, | |
| "grad_norm": 0.28381872177124023, | |
| "learning_rate": 0.00031294257389556396, | |
| "loss": 0.8508, | |
| "step": 9190 | |
| }, | |
| { | |
| "epoch": 0.10289619228166715, | |
| "grad_norm": 0.26624616980552673, | |
| "learning_rate": 0.0003127141486591439, | |
| "loss": 0.8658, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.10300803596892982, | |
| "grad_norm": 0.2605401277542114, | |
| "learning_rate": 0.00031248572342272373, | |
| "loss": 0.8602, | |
| "step": 9210 | |
| }, | |
| { | |
| "epoch": 0.1031198796561925, | |
| "grad_norm": 0.2819276750087738, | |
| "learning_rate": 0.0003122572981863036, | |
| "loss": 0.8614, | |
| "step": 9220 | |
| }, | |
| { | |
| "epoch": 0.10323172334345519, | |
| "grad_norm": 0.27677878737449646, | |
| "learning_rate": 0.00031202887294988355, | |
| "loss": 0.8556, | |
| "step": 9230 | |
| }, | |
| { | |
| "epoch": 0.10334356703071787, | |
| "grad_norm": 0.25589799880981445, | |
| "learning_rate": 0.0003118004477134634, | |
| "loss": 0.8704, | |
| "step": 9240 | |
| }, | |
| { | |
| "epoch": 0.10345541071798055, | |
| "grad_norm": 0.2731853425502777, | |
| "learning_rate": 0.00031157202247704326, | |
| "loss": 0.8428, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 0.10356725440524323, | |
| "grad_norm": 0.3047199547290802, | |
| "learning_rate": 0.00031134359724062317, | |
| "loss": 0.8508, | |
| "step": 9260 | |
| }, | |
| { | |
| "epoch": 0.10367909809250592, | |
| "grad_norm": 0.28696686029434204, | |
| "learning_rate": 0.00031111517200420303, | |
| "loss": 0.8571, | |
| "step": 9270 | |
| }, | |
| { | |
| "epoch": 0.1037909417797686, | |
| "grad_norm": 0.23354049026966095, | |
| "learning_rate": 0.0003108867467677829, | |
| "loss": 0.8518, | |
| "step": 9280 | |
| }, | |
| { | |
| "epoch": 0.10390278546703127, | |
| "grad_norm": 0.27123787999153137, | |
| "learning_rate": 0.0003106583215313628, | |
| "loss": 0.8621, | |
| "step": 9290 | |
| }, | |
| { | |
| "epoch": 0.10401462915429396, | |
| "grad_norm": 0.2509523332118988, | |
| "learning_rate": 0.00031042989629494265, | |
| "loss": 0.8568, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.10412647284155664, | |
| "grad_norm": 0.2359481155872345, | |
| "learning_rate": 0.00031020147105852256, | |
| "loss": 0.8598, | |
| "step": 9310 | |
| }, | |
| { | |
| "epoch": 0.10423831652881932, | |
| "grad_norm": 0.27097463607788086, | |
| "learning_rate": 0.00030997304582210247, | |
| "loss": 0.8615, | |
| "step": 9320 | |
| }, | |
| { | |
| "epoch": 0.104350160216082, | |
| "grad_norm": 0.2616114020347595, | |
| "learning_rate": 0.0003097446205856823, | |
| "loss": 0.8462, | |
| "step": 9330 | |
| }, | |
| { | |
| "epoch": 0.10446200390334469, | |
| "grad_norm": 0.30027398467063904, | |
| "learning_rate": 0.0003095161953492622, | |
| "loss": 0.8683, | |
| "step": 9340 | |
| }, | |
| { | |
| "epoch": 0.10457384759060737, | |
| "grad_norm": 0.28468623757362366, | |
| "learning_rate": 0.0003092877701128421, | |
| "loss": 0.856, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 0.10468569127787004, | |
| "grad_norm": 0.318521112203598, | |
| "learning_rate": 0.00030905934487642195, | |
| "loss": 0.8532, | |
| "step": 9360 | |
| }, | |
| { | |
| "epoch": 0.10479753496513274, | |
| "grad_norm": 0.3118298351764679, | |
| "learning_rate": 0.0003088309196400018, | |
| "loss": 0.8546, | |
| "step": 9370 | |
| }, | |
| { | |
| "epoch": 0.10490937865239541, | |
| "grad_norm": 0.28549399971961975, | |
| "learning_rate": 0.0003086024944035817, | |
| "loss": 0.8718, | |
| "step": 9380 | |
| }, | |
| { | |
| "epoch": 0.10502122233965809, | |
| "grad_norm": 0.24803526699543, | |
| "learning_rate": 0.0003083740691671616, | |
| "loss": 0.8489, | |
| "step": 9390 | |
| }, | |
| { | |
| "epoch": 0.10513306602692078, | |
| "grad_norm": 0.26765918731689453, | |
| "learning_rate": 0.0003081456439307415, | |
| "loss": 0.8617, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.10524490971418346, | |
| "grad_norm": 0.26363757252693176, | |
| "learning_rate": 0.0003079172186943214, | |
| "loss": 0.8648, | |
| "step": 9410 | |
| }, | |
| { | |
| "epoch": 0.10535675340144614, | |
| "grad_norm": 0.2734963595867157, | |
| "learning_rate": 0.00030768879345790124, | |
| "loss": 0.8556, | |
| "step": 9420 | |
| }, | |
| { | |
| "epoch": 0.10546859708870882, | |
| "grad_norm": 0.2773530185222626, | |
| "learning_rate": 0.0003074603682214811, | |
| "loss": 0.8737, | |
| "step": 9430 | |
| }, | |
| { | |
| "epoch": 0.1055804407759715, | |
| "grad_norm": 0.2684498429298401, | |
| "learning_rate": 0.000307231942985061, | |
| "loss": 0.8657, | |
| "step": 9440 | |
| }, | |
| { | |
| "epoch": 0.10569228446323418, | |
| "grad_norm": 0.26110732555389404, | |
| "learning_rate": 0.00030700351774864086, | |
| "loss": 0.8618, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 0.10580412815049686, | |
| "grad_norm": 0.27595090866088867, | |
| "learning_rate": 0.0003067750925122207, | |
| "loss": 0.8654, | |
| "step": 9460 | |
| }, | |
| { | |
| "epoch": 0.10591597183775955, | |
| "grad_norm": 0.2799736559391022, | |
| "learning_rate": 0.0003065466672758007, | |
| "loss": 0.8583, | |
| "step": 9470 | |
| }, | |
| { | |
| "epoch": 0.10602781552502223, | |
| "grad_norm": 0.2729387879371643, | |
| "learning_rate": 0.00030631824203938054, | |
| "loss": 0.8628, | |
| "step": 9480 | |
| }, | |
| { | |
| "epoch": 0.10613965921228491, | |
| "grad_norm": 0.30332332849502563, | |
| "learning_rate": 0.0003060898168029604, | |
| "loss": 0.8512, | |
| "step": 9490 | |
| }, | |
| { | |
| "epoch": 0.10625150289954759, | |
| "grad_norm": 0.276753306388855, | |
| "learning_rate": 0.0003058613915665403, | |
| "loss": 0.85, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.10636334658681028, | |
| "grad_norm": 0.3190478980541229, | |
| "learning_rate": 0.00030563296633012016, | |
| "loss": 0.8534, | |
| "step": 9510 | |
| }, | |
| { | |
| "epoch": 0.10647519027407296, | |
| "grad_norm": 0.2926968038082123, | |
| "learning_rate": 0.0003054045410937, | |
| "loss": 0.8309, | |
| "step": 9520 | |
| }, | |
| { | |
| "epoch": 0.10658703396133563, | |
| "grad_norm": 0.29631507396698, | |
| "learning_rate": 0.0003051761158572799, | |
| "loss": 0.8406, | |
| "step": 9530 | |
| }, | |
| { | |
| "epoch": 0.10669887764859833, | |
| "grad_norm": 0.2881840765476227, | |
| "learning_rate": 0.0003049476906208598, | |
| "loss": 0.8274, | |
| "step": 9540 | |
| }, | |
| { | |
| "epoch": 0.106810721335861, | |
| "grad_norm": 0.2623940408229828, | |
| "learning_rate": 0.0003047192653844397, | |
| "loss": 0.8346, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 0.10692256502312368, | |
| "grad_norm": 0.29798468947410583, | |
| "learning_rate": 0.00030449084014801955, | |
| "loss": 0.8362, | |
| "step": 9560 | |
| }, | |
| { | |
| "epoch": 0.10703440871038636, | |
| "grad_norm": 0.2976382076740265, | |
| "learning_rate": 0.00030426241491159946, | |
| "loss": 0.8179, | |
| "step": 9570 | |
| }, | |
| { | |
| "epoch": 0.10714625239764905, | |
| "grad_norm": 0.28637486696243286, | |
| "learning_rate": 0.0003040339896751793, | |
| "loss": 0.8363, | |
| "step": 9580 | |
| }, | |
| { | |
| "epoch": 0.10725809608491173, | |
| "grad_norm": 0.3023325204849243, | |
| "learning_rate": 0.00030380556443875917, | |
| "loss": 0.8382, | |
| "step": 9590 | |
| }, | |
| { | |
| "epoch": 0.1073699397721744, | |
| "grad_norm": 0.2889160215854645, | |
| "learning_rate": 0.0003035771392023391, | |
| "loss": 0.8476, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.1074817834594371, | |
| "grad_norm": 0.2868768572807312, | |
| "learning_rate": 0.00030334871396591893, | |
| "loss": 0.8482, | |
| "step": 9610 | |
| }, | |
| { | |
| "epoch": 0.10759362714669977, | |
| "grad_norm": 0.2773813307285309, | |
| "learning_rate": 0.0003031202887294988, | |
| "loss": 0.8577, | |
| "step": 9620 | |
| }, | |
| { | |
| "epoch": 0.10770547083396245, | |
| "grad_norm": 0.28698423504829407, | |
| "learning_rate": 0.00030289186349307875, | |
| "loss": 0.8663, | |
| "step": 9630 | |
| }, | |
| { | |
| "epoch": 0.10781731452122513, | |
| "grad_norm": 0.26839759945869446, | |
| "learning_rate": 0.0003026634382566586, | |
| "loss": 0.8649, | |
| "step": 9640 | |
| }, | |
| { | |
| "epoch": 0.10792915820848782, | |
| "grad_norm": 0.2686857283115387, | |
| "learning_rate": 0.00030243501302023847, | |
| "loss": 0.8563, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 0.1080410018957505, | |
| "grad_norm": 0.2815250754356384, | |
| "learning_rate": 0.0003022065877838184, | |
| "loss": 0.8538, | |
| "step": 9660 | |
| }, | |
| { | |
| "epoch": 0.10815284558301318, | |
| "grad_norm": 0.24625800549983978, | |
| "learning_rate": 0.00030197816254739823, | |
| "loss": 0.87, | |
| "step": 9670 | |
| }, | |
| { | |
| "epoch": 0.10826468927027587, | |
| "grad_norm": 0.27051877975463867, | |
| "learning_rate": 0.0003017497373109781, | |
| "loss": 0.8692, | |
| "step": 9680 | |
| }, | |
| { | |
| "epoch": 0.10837653295753855, | |
| "grad_norm": 0.253892183303833, | |
| "learning_rate": 0.000301521312074558, | |
| "loss": 0.8583, | |
| "step": 9690 | |
| }, | |
| { | |
| "epoch": 0.10848837664480122, | |
| "grad_norm": 0.26951879262924194, | |
| "learning_rate": 0.0003012928868381379, | |
| "loss": 0.8699, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.1086002203320639, | |
| "grad_norm": 0.27741488814353943, | |
| "learning_rate": 0.00030106446160171776, | |
| "loss": 0.8673, | |
| "step": 9710 | |
| }, | |
| { | |
| "epoch": 0.10871206401932659, | |
| "grad_norm": 0.2655075788497925, | |
| "learning_rate": 0.00030083603636529767, | |
| "loss": 0.8628, | |
| "step": 9720 | |
| }, | |
| { | |
| "epoch": 0.10882390770658927, | |
| "grad_norm": 0.298532098531723, | |
| "learning_rate": 0.00030060761112887753, | |
| "loss": 0.8707, | |
| "step": 9730 | |
| }, | |
| { | |
| "epoch": 0.10893575139385195, | |
| "grad_norm": 0.3105684816837311, | |
| "learning_rate": 0.0003003791858924574, | |
| "loss": 0.8661, | |
| "step": 9740 | |
| }, | |
| { | |
| "epoch": 0.10904759508111464, | |
| "grad_norm": 0.27781355381011963, | |
| "learning_rate": 0.0003001507606560373, | |
| "loss": 0.8871, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 0.10915943876837732, | |
| "grad_norm": 0.2966761589050293, | |
| "learning_rate": 0.00029992233541961715, | |
| "loss": 0.875, | |
| "step": 9760 | |
| }, | |
| { | |
| "epoch": 0.10927128245564, | |
| "grad_norm": 0.3010736405849457, | |
| "learning_rate": 0.000299693910183197, | |
| "loss": 0.8746, | |
| "step": 9770 | |
| }, | |
| { | |
| "epoch": 0.10938312614290267, | |
| "grad_norm": 0.31352171301841736, | |
| "learning_rate": 0.00029946548494677697, | |
| "loss": 0.8733, | |
| "step": 9780 | |
| }, | |
| { | |
| "epoch": 0.10949496983016536, | |
| "grad_norm": 0.30627313256263733, | |
| "learning_rate": 0.0002992370597103568, | |
| "loss": 0.8675, | |
| "step": 9790 | |
| }, | |
| { | |
| "epoch": 0.10960681351742804, | |
| "grad_norm": 0.23990577459335327, | |
| "learning_rate": 0.0002990086344739367, | |
| "loss": 0.8614, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.10971865720469072, | |
| "grad_norm": 0.2856599688529968, | |
| "learning_rate": 0.0002987802092375166, | |
| "loss": 0.8454, | |
| "step": 9810 | |
| }, | |
| { | |
| "epoch": 0.10983050089195341, | |
| "grad_norm": 0.26476389169692993, | |
| "learning_rate": 0.00029855178400109645, | |
| "loss": 0.8616, | |
| "step": 9820 | |
| }, | |
| { | |
| "epoch": 0.10994234457921609, | |
| "grad_norm": 0.2871752381324768, | |
| "learning_rate": 0.0002983233587646763, | |
| "loss": 0.8444, | |
| "step": 9830 | |
| }, | |
| { | |
| "epoch": 0.11005418826647877, | |
| "grad_norm": 0.27318039536476135, | |
| "learning_rate": 0.0002980949335282562, | |
| "loss": 0.8487, | |
| "step": 9840 | |
| }, | |
| { | |
| "epoch": 0.11016603195374144, | |
| "grad_norm": 0.25630125403404236, | |
| "learning_rate": 0.00029786650829183607, | |
| "loss": 0.846, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 0.11027787564100414, | |
| "grad_norm": 0.23908184468746185, | |
| "learning_rate": 0.000297638083055416, | |
| "loss": 0.8403, | |
| "step": 9860 | |
| }, | |
| { | |
| "epoch": 0.11038971932826681, | |
| "grad_norm": 0.2978418469429016, | |
| "learning_rate": 0.0002974096578189959, | |
| "loss": 0.8652, | |
| "step": 9870 | |
| }, | |
| { | |
| "epoch": 0.11050156301552949, | |
| "grad_norm": 0.2503781318664551, | |
| "learning_rate": 0.00029718123258257574, | |
| "loss": 0.8657, | |
| "step": 9880 | |
| }, | |
| { | |
| "epoch": 0.11061340670279218, | |
| "grad_norm": 0.28556469082832336, | |
| "learning_rate": 0.0002969528073461556, | |
| "loss": 0.8501, | |
| "step": 9890 | |
| }, | |
| { | |
| "epoch": 0.11072525039005486, | |
| "grad_norm": 0.2643977701663971, | |
| "learning_rate": 0.0002967243821097355, | |
| "loss": 0.8742, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.11083709407731754, | |
| "grad_norm": 0.2757241725921631, | |
| "learning_rate": 0.00029649595687331536, | |
| "loss": 0.8837, | |
| "step": 9910 | |
| }, | |
| { | |
| "epoch": 0.11094893776458022, | |
| "grad_norm": 0.28263452649116516, | |
| "learning_rate": 0.0002962675316368952, | |
| "loss": 0.8793, | |
| "step": 9920 | |
| }, | |
| { | |
| "epoch": 0.11106078145184291, | |
| "grad_norm": 0.27624276280403137, | |
| "learning_rate": 0.00029603910640047513, | |
| "loss": 0.8669, | |
| "step": 9930 | |
| }, | |
| { | |
| "epoch": 0.11117262513910559, | |
| "grad_norm": 0.2814600467681885, | |
| "learning_rate": 0.00029581068116405504, | |
| "loss": 0.8858, | |
| "step": 9940 | |
| }, | |
| { | |
| "epoch": 0.11128446882636826, | |
| "grad_norm": 0.2871972918510437, | |
| "learning_rate": 0.0002955822559276349, | |
| "loss": 0.8714, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 0.11139631251363095, | |
| "grad_norm": 0.2885976731777191, | |
| "learning_rate": 0.0002953538306912148, | |
| "loss": 0.8675, | |
| "step": 9960 | |
| }, | |
| { | |
| "epoch": 0.11150815620089363, | |
| "grad_norm": 0.281021386384964, | |
| "learning_rate": 0.00029512540545479466, | |
| "loss": 0.8762, | |
| "step": 9970 | |
| }, | |
| { | |
| "epoch": 0.11161999988815631, | |
| "grad_norm": 0.2923888862133026, | |
| "learning_rate": 0.0002948969802183745, | |
| "loss": 0.87, | |
| "step": 9980 | |
| }, | |
| { | |
| "epoch": 0.11173184357541899, | |
| "grad_norm": 0.2596036195755005, | |
| "learning_rate": 0.00029466855498195443, | |
| "loss": 0.8696, | |
| "step": 9990 | |
| }, | |
| { | |
| "epoch": 0.11184368726268168, | |
| "grad_norm": 0.2749873697757721, | |
| "learning_rate": 0.0002944401297455343, | |
| "loss": 0.8604, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.11195553094994436, | |
| "grad_norm": 0.2696766257286072, | |
| "learning_rate": 0.00029421170450911414, | |
| "loss": 0.8743, | |
| "step": 10010 | |
| }, | |
| { | |
| "epoch": 0.11206737463720703, | |
| "grad_norm": 0.2824450731277466, | |
| "learning_rate": 0.00029398327927269405, | |
| "loss": 0.8734, | |
| "step": 10020 | |
| }, | |
| { | |
| "epoch": 0.11217921832446973, | |
| "grad_norm": 0.2795054614543915, | |
| "learning_rate": 0.00029375485403627396, | |
| "loss": 0.865, | |
| "step": 10030 | |
| }, | |
| { | |
| "epoch": 0.1122910620117324, | |
| "grad_norm": 0.2974453866481781, | |
| "learning_rate": 0.0002935264287998538, | |
| "loss": 0.8762, | |
| "step": 10040 | |
| }, | |
| { | |
| "epoch": 0.11240290569899508, | |
| "grad_norm": 0.27134743332862854, | |
| "learning_rate": 0.00029329800356343367, | |
| "loss": 0.8616, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 0.11251474938625777, | |
| "grad_norm": 0.2651810348033905, | |
| "learning_rate": 0.0002930695783270136, | |
| "loss": 0.8653, | |
| "step": 10060 | |
| }, | |
| { | |
| "epoch": 0.11262659307352045, | |
| "grad_norm": 0.29161420464515686, | |
| "learning_rate": 0.00029284115309059344, | |
| "loss": 0.8583, | |
| "step": 10070 | |
| }, | |
| { | |
| "epoch": 0.11273843676078313, | |
| "grad_norm": 0.27624139189720154, | |
| "learning_rate": 0.0002926127278541733, | |
| "loss": 0.8447, | |
| "step": 10080 | |
| }, | |
| { | |
| "epoch": 0.1128502804480458, | |
| "grad_norm": 0.290632039308548, | |
| "learning_rate": 0.00029238430261775326, | |
| "loss": 0.8568, | |
| "step": 10090 | |
| }, | |
| { | |
| "epoch": 0.1129621241353085, | |
| "grad_norm": 0.2906644940376282, | |
| "learning_rate": 0.0002921558773813331, | |
| "loss": 0.8566, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.11307396782257118, | |
| "grad_norm": 0.29284584522247314, | |
| "learning_rate": 0.00029192745214491297, | |
| "loss": 0.8679, | |
| "step": 10110 | |
| }, | |
| { | |
| "epoch": 0.11318581150983385, | |
| "grad_norm": 0.29635393619537354, | |
| "learning_rate": 0.0002916990269084929, | |
| "loss": 0.8648, | |
| "step": 10120 | |
| }, | |
| { | |
| "epoch": 0.11329765519709654, | |
| "grad_norm": 0.2560585141181946, | |
| "learning_rate": 0.00029147060167207273, | |
| "loss": 0.8565, | |
| "step": 10130 | |
| }, | |
| { | |
| "epoch": 0.11340949888435922, | |
| "grad_norm": 0.2480679154396057, | |
| "learning_rate": 0.0002912421764356526, | |
| "loss": 0.8574, | |
| "step": 10140 | |
| }, | |
| { | |
| "epoch": 0.1135213425716219, | |
| "grad_norm": 0.28708118200302124, | |
| "learning_rate": 0.0002910137511992325, | |
| "loss": 0.8658, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 0.11363318625888458, | |
| "grad_norm": 0.2553873062133789, | |
| "learning_rate": 0.00029078532596281235, | |
| "loss": 0.8721, | |
| "step": 10160 | |
| }, | |
| { | |
| "epoch": 0.11374502994614727, | |
| "grad_norm": 0.26742488145828247, | |
| "learning_rate": 0.00029055690072639226, | |
| "loss": 0.8608, | |
| "step": 10170 | |
| }, | |
| { | |
| "epoch": 0.11385687363340995, | |
| "grad_norm": 0.2674279510974884, | |
| "learning_rate": 0.0002903284754899722, | |
| "loss": 0.8763, | |
| "step": 10180 | |
| }, | |
| { | |
| "epoch": 0.11396871732067262, | |
| "grad_norm": 0.2484348863363266, | |
| "learning_rate": 0.00029010005025355203, | |
| "loss": 0.8799, | |
| "step": 10190 | |
| }, | |
| { | |
| "epoch": 0.11408056100793532, | |
| "grad_norm": 0.2603932321071625, | |
| "learning_rate": 0.0002898716250171319, | |
| "loss": 0.8922, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.114192404695198, | |
| "grad_norm": 0.2510204613208771, | |
| "learning_rate": 0.0002896431997807118, | |
| "loss": 0.8851, | |
| "step": 10210 | |
| }, | |
| { | |
| "epoch": 0.11430424838246067, | |
| "grad_norm": 0.26795732975006104, | |
| "learning_rate": 0.00028941477454429165, | |
| "loss": 0.8917, | |
| "step": 10220 | |
| }, | |
| { | |
| "epoch": 0.11441609206972335, | |
| "grad_norm": 0.2880701422691345, | |
| "learning_rate": 0.0002891863493078715, | |
| "loss": 0.8903, | |
| "step": 10230 | |
| }, | |
| { | |
| "epoch": 0.11452793575698604, | |
| "grad_norm": 0.23970642685890198, | |
| "learning_rate": 0.0002889579240714514, | |
| "loss": 0.8882, | |
| "step": 10240 | |
| }, | |
| { | |
| "epoch": 0.11463977944424872, | |
| "grad_norm": 0.2786742150783539, | |
| "learning_rate": 0.0002887294988350313, | |
| "loss": 0.8827, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 0.1147516231315114, | |
| "grad_norm": 0.2780776619911194, | |
| "learning_rate": 0.0002885010735986112, | |
| "loss": 0.8879, | |
| "step": 10260 | |
| }, | |
| { | |
| "epoch": 0.11486346681877409, | |
| "grad_norm": 0.26984742283821106, | |
| "learning_rate": 0.0002882726483621911, | |
| "loss": 0.8732, | |
| "step": 10270 | |
| }, | |
| { | |
| "epoch": 0.11497531050603677, | |
| "grad_norm": 0.26902884244918823, | |
| "learning_rate": 0.00028804422312577095, | |
| "loss": 0.878, | |
| "step": 10280 | |
| }, | |
| { | |
| "epoch": 0.11508715419329944, | |
| "grad_norm": 0.24787285923957825, | |
| "learning_rate": 0.0002878157978893508, | |
| "loss": 0.8573, | |
| "step": 10290 | |
| }, | |
| { | |
| "epoch": 0.11519899788056212, | |
| "grad_norm": 0.22702965140342712, | |
| "learning_rate": 0.0002875873726529307, | |
| "loss": 0.8621, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.11531084156782481, | |
| "grad_norm": 0.27474096417427063, | |
| "learning_rate": 0.00028735894741651057, | |
| "loss": 0.8763, | |
| "step": 10310 | |
| }, | |
| { | |
| "epoch": 0.11542268525508749, | |
| "grad_norm": 0.2605912983417511, | |
| "learning_rate": 0.0002871305221800904, | |
| "loss": 0.8706, | |
| "step": 10320 | |
| }, | |
| { | |
| "epoch": 0.11553452894235017, | |
| "grad_norm": 0.25281742215156555, | |
| "learning_rate": 0.0002869020969436704, | |
| "loss": 0.855, | |
| "step": 10330 | |
| }, | |
| { | |
| "epoch": 0.11564637262961286, | |
| "grad_norm": 0.2559000849723816, | |
| "learning_rate": 0.00028667367170725024, | |
| "loss": 0.8549, | |
| "step": 10340 | |
| }, | |
| { | |
| "epoch": 0.11575821631687554, | |
| "grad_norm": 0.2439345121383667, | |
| "learning_rate": 0.0002864452464708301, | |
| "loss": 0.8639, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 0.11587006000413821, | |
| "grad_norm": 0.2690776288509369, | |
| "learning_rate": 0.00028621682123441, | |
| "loss": 0.8487, | |
| "step": 10360 | |
| }, | |
| { | |
| "epoch": 0.11598190369140089, | |
| "grad_norm": 0.25111067295074463, | |
| "learning_rate": 0.00028598839599798987, | |
| "loss": 0.8558, | |
| "step": 10370 | |
| }, | |
| { | |
| "epoch": 0.11609374737866358, | |
| "grad_norm": 0.26838451623916626, | |
| "learning_rate": 0.0002857599707615697, | |
| "loss": 0.8603, | |
| "step": 10380 | |
| }, | |
| { | |
| "epoch": 0.11620559106592626, | |
| "grad_norm": 0.2401856780052185, | |
| "learning_rate": 0.00028553154552514963, | |
| "loss": 0.8286, | |
| "step": 10390 | |
| }, | |
| { | |
| "epoch": 0.11631743475318894, | |
| "grad_norm": 0.26284924149513245, | |
| "learning_rate": 0.0002853031202887295, | |
| "loss": 0.8402, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.11642927844045163, | |
| "grad_norm": 0.28734955191612244, | |
| "learning_rate": 0.0002850746950523094, | |
| "loss": 0.8358, | |
| "step": 10410 | |
| }, | |
| { | |
| "epoch": 0.11654112212771431, | |
| "grad_norm": 0.2564549446105957, | |
| "learning_rate": 0.0002848462698158893, | |
| "loss": 0.8458, | |
| "step": 10420 | |
| }, | |
| { | |
| "epoch": 0.11665296581497699, | |
| "grad_norm": 0.2507050633430481, | |
| "learning_rate": 0.00028461784457946916, | |
| "loss": 0.8371, | |
| "step": 10430 | |
| }, | |
| { | |
| "epoch": 0.11676480950223966, | |
| "grad_norm": 0.25748834013938904, | |
| "learning_rate": 0.000284389419343049, | |
| "loss": 0.8527, | |
| "step": 10440 | |
| }, | |
| { | |
| "epoch": 0.11687665318950236, | |
| "grad_norm": 0.24484454095363617, | |
| "learning_rate": 0.00028416099410662893, | |
| "loss": 0.8372, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 0.11698849687676503, | |
| "grad_norm": 0.24171967804431915, | |
| "learning_rate": 0.0002839325688702088, | |
| "loss": 0.8327, | |
| "step": 10460 | |
| }, | |
| { | |
| "epoch": 0.11710034056402771, | |
| "grad_norm": 0.30423420667648315, | |
| "learning_rate": 0.00028370414363378864, | |
| "loss": 0.8271, | |
| "step": 10470 | |
| }, | |
| { | |
| "epoch": 0.1172121842512904, | |
| "grad_norm": 0.2598424553871155, | |
| "learning_rate": 0.0002834757183973685, | |
| "loss": 0.8169, | |
| "step": 10480 | |
| }, | |
| { | |
| "epoch": 0.11732402793855308, | |
| "grad_norm": 0.2608656585216522, | |
| "learning_rate": 0.00028324729316094846, | |
| "loss": 0.8261, | |
| "step": 10490 | |
| }, | |
| { | |
| "epoch": 0.11743587162581576, | |
| "grad_norm": 0.25370126962661743, | |
| "learning_rate": 0.0002830188679245283, | |
| "loss": 0.8227, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.11754771531307844, | |
| "grad_norm": 0.2760542333126068, | |
| "learning_rate": 0.00028279044268810817, | |
| "loss": 0.8413, | |
| "step": 10510 | |
| }, | |
| { | |
| "epoch": 0.11765955900034113, | |
| "grad_norm": 0.24994856119155884, | |
| "learning_rate": 0.0002825620174516881, | |
| "loss": 0.8288, | |
| "step": 10520 | |
| }, | |
| { | |
| "epoch": 0.1177714026876038, | |
| "grad_norm": 0.25439032912254333, | |
| "learning_rate": 0.00028233359221526794, | |
| "loss": 0.8318, | |
| "step": 10530 | |
| }, | |
| { | |
| "epoch": 0.11788324637486648, | |
| "grad_norm": 0.28182244300842285, | |
| "learning_rate": 0.0002821051669788478, | |
| "loss": 0.8437, | |
| "step": 10540 | |
| }, | |
| { | |
| "epoch": 0.11799509006212917, | |
| "grad_norm": 0.2419012039899826, | |
| "learning_rate": 0.0002818767417424277, | |
| "loss": 0.8446, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 0.11810693374939185, | |
| "grad_norm": 0.2598857581615448, | |
| "learning_rate": 0.0002816483165060076, | |
| "loss": 0.8428, | |
| "step": 10560 | |
| }, | |
| { | |
| "epoch": 0.11821877743665453, | |
| "grad_norm": 0.25206229090690613, | |
| "learning_rate": 0.00028141989126958747, | |
| "loss": 0.8533, | |
| "step": 10570 | |
| }, | |
| { | |
| "epoch": 0.1183306211239172, | |
| "grad_norm": 0.25155991315841675, | |
| "learning_rate": 0.0002811914660331674, | |
| "loss": 0.8538, | |
| "step": 10580 | |
| }, | |
| { | |
| "epoch": 0.1184424648111799, | |
| "grad_norm": 0.2342199832201004, | |
| "learning_rate": 0.00028096304079674723, | |
| "loss": 0.8519, | |
| "step": 10590 | |
| }, | |
| { | |
| "epoch": 0.11855430849844258, | |
| "grad_norm": 0.25823327898979187, | |
| "learning_rate": 0.0002807346155603271, | |
| "loss": 0.8483, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.11866615218570525, | |
| "grad_norm": 0.26428598165512085, | |
| "learning_rate": 0.000280506190323907, | |
| "loss": 0.86, | |
| "step": 10610 | |
| }, | |
| { | |
| "epoch": 0.11877799587296795, | |
| "grad_norm": 0.25176918506622314, | |
| "learning_rate": 0.00028027776508748685, | |
| "loss": 0.8589, | |
| "step": 10620 | |
| }, | |
| { | |
| "epoch": 0.11888983956023062, | |
| "grad_norm": 0.28826919198036194, | |
| "learning_rate": 0.0002800493398510667, | |
| "loss": 0.8627, | |
| "step": 10630 | |
| }, | |
| { | |
| "epoch": 0.1190016832474933, | |
| "grad_norm": 0.24679958820343018, | |
| "learning_rate": 0.0002798209146146467, | |
| "loss": 0.8563, | |
| "step": 10640 | |
| }, | |
| { | |
| "epoch": 0.11911352693475598, | |
| "grad_norm": 0.2550687789916992, | |
| "learning_rate": 0.00027959248937822653, | |
| "loss": 0.8535, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 0.11922537062201867, | |
| "grad_norm": 0.2506476640701294, | |
| "learning_rate": 0.0002793640641418064, | |
| "loss": 0.8553, | |
| "step": 10660 | |
| }, | |
| { | |
| "epoch": 0.11933721430928135, | |
| "grad_norm": 0.24980700016021729, | |
| "learning_rate": 0.0002791356389053863, | |
| "loss": 0.854, | |
| "step": 10670 | |
| }, | |
| { | |
| "epoch": 0.11944905799654403, | |
| "grad_norm": 0.2280970811843872, | |
| "learning_rate": 0.00027890721366896615, | |
| "loss": 0.8569, | |
| "step": 10680 | |
| }, | |
| { | |
| "epoch": 0.11956090168380672, | |
| "grad_norm": 0.25191232562065125, | |
| "learning_rate": 0.000278678788432546, | |
| "loss": 0.8566, | |
| "step": 10690 | |
| }, | |
| { | |
| "epoch": 0.1196727453710694, | |
| "grad_norm": 0.2748493552207947, | |
| "learning_rate": 0.0002784503631961259, | |
| "loss": 0.8573, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.11978458905833207, | |
| "grad_norm": 0.25123515725135803, | |
| "learning_rate": 0.00027822193795970577, | |
| "loss": 0.8473, | |
| "step": 10710 | |
| }, | |
| { | |
| "epoch": 0.11989643274559475, | |
| "grad_norm": 0.25573378801345825, | |
| "learning_rate": 0.0002779935127232857, | |
| "loss": 0.8469, | |
| "step": 10720 | |
| }, | |
| { | |
| "epoch": 0.12000827643285744, | |
| "grad_norm": 0.23367713391780853, | |
| "learning_rate": 0.0002777650874868656, | |
| "loss": 0.8452, | |
| "step": 10730 | |
| }, | |
| { | |
| "epoch": 0.12012012012012012, | |
| "grad_norm": 0.24593010544776917, | |
| "learning_rate": 0.00027753666225044545, | |
| "loss": 0.838, | |
| "step": 10740 | |
| }, | |
| { | |
| "epoch": 0.1202319638073828, | |
| "grad_norm": 0.2422724962234497, | |
| "learning_rate": 0.0002773082370140253, | |
| "loss": 0.8398, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 0.12034380749464549, | |
| "grad_norm": 0.24471783638000488, | |
| "learning_rate": 0.0002770798117776052, | |
| "loss": 0.8409, | |
| "step": 10760 | |
| }, | |
| { | |
| "epoch": 0.12045565118190817, | |
| "grad_norm": 0.25523480772972107, | |
| "learning_rate": 0.00027685138654118507, | |
| "loss": 0.835, | |
| "step": 10770 | |
| }, | |
| { | |
| "epoch": 0.12056749486917084, | |
| "grad_norm": 0.24846532940864563, | |
| "learning_rate": 0.0002766229613047649, | |
| "loss": 0.842, | |
| "step": 10780 | |
| }, | |
| { | |
| "epoch": 0.12067933855643354, | |
| "grad_norm": 0.26955240964889526, | |
| "learning_rate": 0.00027639453606834484, | |
| "loss": 0.8525, | |
| "step": 10790 | |
| }, | |
| { | |
| "epoch": 0.12079118224369621, | |
| "grad_norm": 0.2711884081363678, | |
| "learning_rate": 0.00027616611083192475, | |
| "loss": 0.8352, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.12090302593095889, | |
| "grad_norm": 0.24954953789710999, | |
| "learning_rate": 0.0002759376855955046, | |
| "loss": 0.8257, | |
| "step": 10810 | |
| }, | |
| { | |
| "epoch": 0.12101486961822157, | |
| "grad_norm": 0.27029111981391907, | |
| "learning_rate": 0.0002757092603590845, | |
| "loss": 0.8147, | |
| "step": 10820 | |
| }, | |
| { | |
| "epoch": 0.12112671330548426, | |
| "grad_norm": 0.2440258413553238, | |
| "learning_rate": 0.00027548083512266437, | |
| "loss": 0.8239, | |
| "step": 10830 | |
| }, | |
| { | |
| "epoch": 0.12123855699274694, | |
| "grad_norm": 0.27082934975624084, | |
| "learning_rate": 0.0002752524098862442, | |
| "loss": 0.8391, | |
| "step": 10840 | |
| }, | |
| { | |
| "epoch": 0.12135040068000962, | |
| "grad_norm": 0.27641886472702026, | |
| "learning_rate": 0.00027502398464982413, | |
| "loss": 0.8276, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 0.1214622443672723, | |
| "grad_norm": 0.24772177636623383, | |
| "learning_rate": 0.000274795559413404, | |
| "loss": 0.8226, | |
| "step": 10860 | |
| }, | |
| { | |
| "epoch": 0.12157408805453498, | |
| "grad_norm": 0.2585364580154419, | |
| "learning_rate": 0.00027456713417698384, | |
| "loss": 0.8096, | |
| "step": 10870 | |
| }, | |
| { | |
| "epoch": 0.12168593174179766, | |
| "grad_norm": 0.2730146050453186, | |
| "learning_rate": 0.0002743387089405638, | |
| "loss": 0.8156, | |
| "step": 10880 | |
| }, | |
| { | |
| "epoch": 0.12179777542906034, | |
| "grad_norm": 0.2693599760532379, | |
| "learning_rate": 0.00027411028370414366, | |
| "loss": 0.8125, | |
| "step": 10890 | |
| }, | |
| { | |
| "epoch": 0.12190961911632303, | |
| "grad_norm": 0.26071295142173767, | |
| "learning_rate": 0.0002738818584677235, | |
| "loss": 0.8106, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.12202146280358571, | |
| "grad_norm": 0.2560258209705353, | |
| "learning_rate": 0.0002736534332313034, | |
| "loss": 0.8195, | |
| "step": 10910 | |
| }, | |
| { | |
| "epoch": 0.12213330649084839, | |
| "grad_norm": 0.27529552578926086, | |
| "learning_rate": 0.0002734250079948833, | |
| "loss": 0.8104, | |
| "step": 10920 | |
| }, | |
| { | |
| "epoch": 0.12224515017811108, | |
| "grad_norm": 0.2782133221626282, | |
| "learning_rate": 0.00027319658275846314, | |
| "loss": 0.8105, | |
| "step": 10930 | |
| }, | |
| { | |
| "epoch": 0.12235699386537376, | |
| "grad_norm": 0.27981024980545044, | |
| "learning_rate": 0.000272968157522043, | |
| "loss": 0.8085, | |
| "step": 10940 | |
| }, | |
| { | |
| "epoch": 0.12246883755263643, | |
| "grad_norm": 0.2741667926311493, | |
| "learning_rate": 0.0002727397322856229, | |
| "loss": 0.8042, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 0.12258068123989911, | |
| "grad_norm": 0.2468159943819046, | |
| "learning_rate": 0.0002725113070492028, | |
| "loss": 0.8198, | |
| "step": 10960 | |
| }, | |
| { | |
| "epoch": 0.1226925249271618, | |
| "grad_norm": 0.26167941093444824, | |
| "learning_rate": 0.00027228288181278267, | |
| "loss": 0.8176, | |
| "step": 10970 | |
| }, | |
| { | |
| "epoch": 0.12280436861442448, | |
| "grad_norm": 0.26660802960395813, | |
| "learning_rate": 0.0002720544565763626, | |
| "loss": 0.8036, | |
| "step": 10980 | |
| }, | |
| { | |
| "epoch": 0.12291621230168716, | |
| "grad_norm": 0.301575243473053, | |
| "learning_rate": 0.00027182603133994244, | |
| "loss": 0.8049, | |
| "step": 10990 | |
| }, | |
| { | |
| "epoch": 0.12302805598894985, | |
| "grad_norm": 0.2759682834148407, | |
| "learning_rate": 0.0002715976061035223, | |
| "loss": 0.8024, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.12313989967621253, | |
| "grad_norm": 0.25659626722335815, | |
| "learning_rate": 0.0002713691808671022, | |
| "loss": 0.8229, | |
| "step": 11010 | |
| }, | |
| { | |
| "epoch": 0.1232517433634752, | |
| "grad_norm": 0.2672923505306244, | |
| "learning_rate": 0.00027114075563068206, | |
| "loss": 0.8018, | |
| "step": 11020 | |
| }, | |
| { | |
| "epoch": 0.12336358705073788, | |
| "grad_norm": 0.25423988699913025, | |
| "learning_rate": 0.0002709123303942619, | |
| "loss": 0.836, | |
| "step": 11030 | |
| }, | |
| { | |
| "epoch": 0.12347543073800057, | |
| "grad_norm": 0.28428804874420166, | |
| "learning_rate": 0.0002706839051578419, | |
| "loss": 0.8299, | |
| "step": 11040 | |
| }, | |
| { | |
| "epoch": 0.12358727442526325, | |
| "grad_norm": 0.2924467921257019, | |
| "learning_rate": 0.00027045547992142173, | |
| "loss": 0.8236, | |
| "step": 11050 | |
| }, | |
| { | |
| "epoch": 0.12369911811252593, | |
| "grad_norm": 0.25230658054351807, | |
| "learning_rate": 0.0002702270546850016, | |
| "loss": 0.8274, | |
| "step": 11060 | |
| }, | |
| { | |
| "epoch": 0.12381096179978862, | |
| "grad_norm": 0.27876734733581543, | |
| "learning_rate": 0.0002699986294485815, | |
| "loss": 0.8244, | |
| "step": 11070 | |
| }, | |
| { | |
| "epoch": 0.1239228054870513, | |
| "grad_norm": 0.29841694235801697, | |
| "learning_rate": 0.00026977020421216136, | |
| "loss": 0.8327, | |
| "step": 11080 | |
| }, | |
| { | |
| "epoch": 0.12403464917431398, | |
| "grad_norm": 0.3055926263332367, | |
| "learning_rate": 0.0002695417789757412, | |
| "loss": 0.8247, | |
| "step": 11090 | |
| }, | |
| { | |
| "epoch": 0.12414649286157665, | |
| "grad_norm": 0.275919109582901, | |
| "learning_rate": 0.0002693133537393211, | |
| "loss": 0.8263, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.12425833654883935, | |
| "grad_norm": 0.3069559931755066, | |
| "learning_rate": 0.00026908492850290103, | |
| "loss": 0.8242, | |
| "step": 11110 | |
| }, | |
| { | |
| "epoch": 0.12437018023610202, | |
| "grad_norm": 0.2574029564857483, | |
| "learning_rate": 0.0002688565032664809, | |
| "loss": 0.819, | |
| "step": 11120 | |
| }, | |
| { | |
| "epoch": 0.1244820239233647, | |
| "grad_norm": 0.25053170323371887, | |
| "learning_rate": 0.0002686280780300608, | |
| "loss": 0.8022, | |
| "step": 11130 | |
| }, | |
| { | |
| "epoch": 0.12459386761062739, | |
| "grad_norm": 0.27337634563446045, | |
| "learning_rate": 0.00026839965279364065, | |
| "loss": 0.8127, | |
| "step": 11140 | |
| }, | |
| { | |
| "epoch": 0.12470571129789007, | |
| "grad_norm": 0.2531510889530182, | |
| "learning_rate": 0.0002681712275572205, | |
| "loss": 0.8138, | |
| "step": 11150 | |
| }, | |
| { | |
| "epoch": 0.12481755498515275, | |
| "grad_norm": 0.27455076575279236, | |
| "learning_rate": 0.0002679428023208004, | |
| "loss": 0.7974, | |
| "step": 11160 | |
| }, | |
| { | |
| "epoch": 0.12492939867241543, | |
| "grad_norm": 0.2515604496002197, | |
| "learning_rate": 0.0002677143770843803, | |
| "loss": 0.8077, | |
| "step": 11170 | |
| }, | |
| { | |
| "epoch": 0.12504124235967812, | |
| "grad_norm": 0.27941974997520447, | |
| "learning_rate": 0.00026748595184796013, | |
| "loss": 0.8099, | |
| "step": 11180 | |
| }, | |
| { | |
| "epoch": 0.1251530860469408, | |
| "grad_norm": 0.2508449852466583, | |
| "learning_rate": 0.0002672575266115401, | |
| "loss": 0.8077, | |
| "step": 11190 | |
| }, | |
| { | |
| "epoch": 0.12526492973420347, | |
| "grad_norm": 0.24805410206317902, | |
| "learning_rate": 0.00026702910137511995, | |
| "loss": 0.8029, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.12537677342146616, | |
| "grad_norm": 0.2730201184749603, | |
| "learning_rate": 0.0002668006761386998, | |
| "loss": 0.8383, | |
| "step": 11210 | |
| }, | |
| { | |
| "epoch": 0.12548861710872883, | |
| "grad_norm": 0.24301932752132416, | |
| "learning_rate": 0.0002665722509022797, | |
| "loss": 0.8245, | |
| "step": 11220 | |
| }, | |
| { | |
| "epoch": 0.12560046079599152, | |
| "grad_norm": 0.270059734582901, | |
| "learning_rate": 0.00026634382566585957, | |
| "loss": 0.8287, | |
| "step": 11230 | |
| }, | |
| { | |
| "epoch": 0.1257123044832542, | |
| "grad_norm": 0.24491746723651886, | |
| "learning_rate": 0.0002661154004294394, | |
| "loss": 0.8283, | |
| "step": 11240 | |
| }, | |
| { | |
| "epoch": 0.12582414817051688, | |
| "grad_norm": 0.2461182177066803, | |
| "learning_rate": 0.00026588697519301934, | |
| "loss": 0.8285, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 0.12593599185777957, | |
| "grad_norm": 0.26306700706481934, | |
| "learning_rate": 0.0002656585499565992, | |
| "loss": 0.8366, | |
| "step": 11260 | |
| }, | |
| { | |
| "epoch": 0.12604783554504226, | |
| "grad_norm": 0.2317613661289215, | |
| "learning_rate": 0.0002654301247201791, | |
| "loss": 0.8373, | |
| "step": 11270 | |
| }, | |
| { | |
| "epoch": 0.12615967923230492, | |
| "grad_norm": 0.25218284130096436, | |
| "learning_rate": 0.000265201699483759, | |
| "loss": 0.8163, | |
| "step": 11280 | |
| }, | |
| { | |
| "epoch": 0.1262715229195676, | |
| "grad_norm": 0.2527898848056793, | |
| "learning_rate": 0.00026497327424733887, | |
| "loss": 0.819, | |
| "step": 11290 | |
| }, | |
| { | |
| "epoch": 0.1263833666068303, | |
| "grad_norm": 0.2344309389591217, | |
| "learning_rate": 0.0002647448490109187, | |
| "loss": 0.8335, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.12649521029409297, | |
| "grad_norm": 0.23913320899009705, | |
| "learning_rate": 0.00026451642377449863, | |
| "loss": 0.8289, | |
| "step": 11310 | |
| }, | |
| { | |
| "epoch": 0.12660705398135566, | |
| "grad_norm": 0.24901095032691956, | |
| "learning_rate": 0.0002642879985380785, | |
| "loss": 0.8159, | |
| "step": 11320 | |
| }, | |
| { | |
| "epoch": 0.12671889766861835, | |
| "grad_norm": 0.2503173351287842, | |
| "learning_rate": 0.00026405957330165834, | |
| "loss": 0.8372, | |
| "step": 11330 | |
| }, | |
| { | |
| "epoch": 0.12683074135588102, | |
| "grad_norm": 0.2341470569372177, | |
| "learning_rate": 0.00026383114806523825, | |
| "loss": 0.8264, | |
| "step": 11340 | |
| }, | |
| { | |
| "epoch": 0.1269425850431437, | |
| "grad_norm": 0.23143555223941803, | |
| "learning_rate": 0.00026360272282881816, | |
| "loss": 0.824, | |
| "step": 11350 | |
| }, | |
| { | |
| "epoch": 0.12705442873040637, | |
| "grad_norm": 0.24911652505397797, | |
| "learning_rate": 0.000263374297592398, | |
| "loss": 0.82, | |
| "step": 11360 | |
| }, | |
| { | |
| "epoch": 0.12716627241766906, | |
| "grad_norm": 0.21931353211402893, | |
| "learning_rate": 0.0002631458723559779, | |
| "loss": 0.8194, | |
| "step": 11370 | |
| }, | |
| { | |
| "epoch": 0.12727811610493175, | |
| "grad_norm": 0.2432345151901245, | |
| "learning_rate": 0.0002629174471195578, | |
| "loss": 0.8371, | |
| "step": 11380 | |
| }, | |
| { | |
| "epoch": 0.12738995979219442, | |
| "grad_norm": 0.24188277125358582, | |
| "learning_rate": 0.00026268902188313764, | |
| "loss": 0.8096, | |
| "step": 11390 | |
| }, | |
| { | |
| "epoch": 0.1275018034794571, | |
| "grad_norm": 0.2522214651107788, | |
| "learning_rate": 0.0002624605966467175, | |
| "loss": 0.8187, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.1276136471667198, | |
| "grad_norm": 0.2596495449542999, | |
| "learning_rate": 0.0002622321714102974, | |
| "loss": 0.8138, | |
| "step": 11410 | |
| }, | |
| { | |
| "epoch": 0.12772549085398247, | |
| "grad_norm": 0.2708049714565277, | |
| "learning_rate": 0.00026200374617387726, | |
| "loss": 0.8066, | |
| "step": 11420 | |
| }, | |
| { | |
| "epoch": 0.12783733454124516, | |
| "grad_norm": 0.27820831537246704, | |
| "learning_rate": 0.00026177532093745717, | |
| "loss": 0.8112, | |
| "step": 11430 | |
| }, | |
| { | |
| "epoch": 0.12794917822850785, | |
| "grad_norm": 0.23918400704860687, | |
| "learning_rate": 0.0002615468957010371, | |
| "loss": 0.8148, | |
| "step": 11440 | |
| }, | |
| { | |
| "epoch": 0.1280610219157705, | |
| "grad_norm": 0.22054031491279602, | |
| "learning_rate": 0.00026131847046461694, | |
| "loss": 0.8183, | |
| "step": 11450 | |
| }, | |
| { | |
| "epoch": 0.1281728656030332, | |
| "grad_norm": 0.25998455286026, | |
| "learning_rate": 0.0002610900452281968, | |
| "loss": 0.8242, | |
| "step": 11460 | |
| }, | |
| { | |
| "epoch": 0.1282847092902959, | |
| "grad_norm": 0.26852914690971375, | |
| "learning_rate": 0.0002608616199917767, | |
| "loss": 0.8161, | |
| "step": 11470 | |
| }, | |
| { | |
| "epoch": 0.12839655297755856, | |
| "grad_norm": 0.24028563499450684, | |
| "learning_rate": 0.00026063319475535656, | |
| "loss": 0.8083, | |
| "step": 11480 | |
| }, | |
| { | |
| "epoch": 0.12850839666482125, | |
| "grad_norm": 0.24944745004177094, | |
| "learning_rate": 0.0002604047695189364, | |
| "loss": 0.8168, | |
| "step": 11490 | |
| }, | |
| { | |
| "epoch": 0.12862024035208391, | |
| "grad_norm": 0.26595303416252136, | |
| "learning_rate": 0.0002601763442825164, | |
| "loss": 0.8178, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.1287320840393466, | |
| "grad_norm": 0.24556541442871094, | |
| "learning_rate": 0.00025994791904609623, | |
| "loss": 0.8229, | |
| "step": 11510 | |
| }, | |
| { | |
| "epoch": 0.1288439277266093, | |
| "grad_norm": 0.24716900289058685, | |
| "learning_rate": 0.0002597194938096761, | |
| "loss": 0.809, | |
| "step": 11520 | |
| }, | |
| { | |
| "epoch": 0.12895577141387196, | |
| "grad_norm": 0.24745820462703705, | |
| "learning_rate": 0.000259491068573256, | |
| "loss": 0.8293, | |
| "step": 11530 | |
| }, | |
| { | |
| "epoch": 0.12906761510113465, | |
| "grad_norm": 0.2732492983341217, | |
| "learning_rate": 0.00025926264333683586, | |
| "loss": 0.8, | |
| "step": 11540 | |
| }, | |
| { | |
| "epoch": 0.12917945878839734, | |
| "grad_norm": 0.23239663243293762, | |
| "learning_rate": 0.0002590342181004157, | |
| "loss": 0.8175, | |
| "step": 11550 | |
| }, | |
| { | |
| "epoch": 0.12929130247566, | |
| "grad_norm": 0.24953389167785645, | |
| "learning_rate": 0.0002588057928639956, | |
| "loss": 0.8152, | |
| "step": 11560 | |
| }, | |
| { | |
| "epoch": 0.1294031461629227, | |
| "grad_norm": 0.25258156657218933, | |
| "learning_rate": 0.0002585773676275755, | |
| "loss": 0.8301, | |
| "step": 11570 | |
| }, | |
| { | |
| "epoch": 0.1295149898501854, | |
| "grad_norm": 0.2609168291091919, | |
| "learning_rate": 0.0002583489423911554, | |
| "loss": 0.8197, | |
| "step": 11580 | |
| }, | |
| { | |
| "epoch": 0.12962683353744806, | |
| "grad_norm": 0.2484872192144394, | |
| "learning_rate": 0.0002581205171547353, | |
| "loss": 0.8362, | |
| "step": 11590 | |
| }, | |
| { | |
| "epoch": 0.12973867722471075, | |
| "grad_norm": 0.2833307385444641, | |
| "learning_rate": 0.00025789209191831515, | |
| "loss": 0.8338, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.12985052091197344, | |
| "grad_norm": 0.24657459557056427, | |
| "learning_rate": 0.000257663666681895, | |
| "loss": 0.8205, | |
| "step": 11610 | |
| }, | |
| { | |
| "epoch": 0.1299623645992361, | |
| "grad_norm": 0.2499598115682602, | |
| "learning_rate": 0.0002574352414454749, | |
| "loss": 0.8406, | |
| "step": 11620 | |
| }, | |
| { | |
| "epoch": 0.1300742082864988, | |
| "grad_norm": 0.2757512629032135, | |
| "learning_rate": 0.0002572068162090548, | |
| "loss": 0.8247, | |
| "step": 11630 | |
| }, | |
| { | |
| "epoch": 0.13018605197376146, | |
| "grad_norm": 0.25661805272102356, | |
| "learning_rate": 0.00025697839097263463, | |
| "loss": 0.8384, | |
| "step": 11640 | |
| }, | |
| { | |
| "epoch": 0.13029789566102415, | |
| "grad_norm": 0.27651283144950867, | |
| "learning_rate": 0.00025674996573621454, | |
| "loss": 0.818, | |
| "step": 11650 | |
| }, | |
| { | |
| "epoch": 0.13040973934828684, | |
| "grad_norm": 0.247050940990448, | |
| "learning_rate": 0.00025652154049979445, | |
| "loss": 0.8261, | |
| "step": 11660 | |
| }, | |
| { | |
| "epoch": 0.1305215830355495, | |
| "grad_norm": 0.23124581575393677, | |
| "learning_rate": 0.0002562931152633743, | |
| "loss": 0.8259, | |
| "step": 11670 | |
| }, | |
| { | |
| "epoch": 0.1306334267228122, | |
| "grad_norm": 0.2694045603275299, | |
| "learning_rate": 0.0002560646900269542, | |
| "loss": 0.8304, | |
| "step": 11680 | |
| }, | |
| { | |
| "epoch": 0.1307452704100749, | |
| "grad_norm": 0.26821568608283997, | |
| "learning_rate": 0.00025583626479053407, | |
| "loss": 0.8441, | |
| "step": 11690 | |
| }, | |
| { | |
| "epoch": 0.13085711409733755, | |
| "grad_norm": 0.2747989892959595, | |
| "learning_rate": 0.0002556078395541139, | |
| "loss": 0.841, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.13096895778460024, | |
| "grad_norm": 0.28248855471611023, | |
| "learning_rate": 0.00025537941431769384, | |
| "loss": 0.857, | |
| "step": 11710 | |
| }, | |
| { | |
| "epoch": 0.13108080147186293, | |
| "grad_norm": 0.25378182530403137, | |
| "learning_rate": 0.0002551509890812737, | |
| "loss": 0.8437, | |
| "step": 11720 | |
| }, | |
| { | |
| "epoch": 0.1311926451591256, | |
| "grad_norm": 0.25950944423675537, | |
| "learning_rate": 0.00025492256384485355, | |
| "loss": 0.8497, | |
| "step": 11730 | |
| }, | |
| { | |
| "epoch": 0.1313044888463883, | |
| "grad_norm": 0.26261699199676514, | |
| "learning_rate": 0.0002546941386084335, | |
| "loss": 0.8477, | |
| "step": 11740 | |
| }, | |
| { | |
| "epoch": 0.13141633253365098, | |
| "grad_norm": 0.30151599645614624, | |
| "learning_rate": 0.00025446571337201337, | |
| "loss": 0.8405, | |
| "step": 11750 | |
| }, | |
| { | |
| "epoch": 0.13152817622091365, | |
| "grad_norm": 0.2556060254573822, | |
| "learning_rate": 0.0002542372881355932, | |
| "loss": 0.831, | |
| "step": 11760 | |
| }, | |
| { | |
| "epoch": 0.13164001990817634, | |
| "grad_norm": 0.26560309529304504, | |
| "learning_rate": 0.00025400886289917313, | |
| "loss": 0.8445, | |
| "step": 11770 | |
| }, | |
| { | |
| "epoch": 0.13175186359543903, | |
| "grad_norm": 0.28504636883735657, | |
| "learning_rate": 0.000253780437662753, | |
| "loss": 0.8432, | |
| "step": 11780 | |
| }, | |
| { | |
| "epoch": 0.1318637072827017, | |
| "grad_norm": 0.2985188663005829, | |
| "learning_rate": 0.00025355201242633285, | |
| "loss": 0.8584, | |
| "step": 11790 | |
| }, | |
| { | |
| "epoch": 0.13197555096996438, | |
| "grad_norm": 0.28022414445877075, | |
| "learning_rate": 0.00025332358718991276, | |
| "loss": 0.8393, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.13208739465722705, | |
| "grad_norm": 0.28535568714141846, | |
| "learning_rate": 0.0002530951619534926, | |
| "loss": 0.8369, | |
| "step": 11810 | |
| }, | |
| { | |
| "epoch": 0.13219923834448974, | |
| "grad_norm": 0.27764952182769775, | |
| "learning_rate": 0.0002528667367170725, | |
| "loss": 0.8435, | |
| "step": 11820 | |
| }, | |
| { | |
| "epoch": 0.13231108203175243, | |
| "grad_norm": 0.28943151235580444, | |
| "learning_rate": 0.0002526383114806524, | |
| "loss": 0.8334, | |
| "step": 11830 | |
| }, | |
| { | |
| "epoch": 0.1324229257190151, | |
| "grad_norm": 0.28240668773651123, | |
| "learning_rate": 0.0002524098862442323, | |
| "loss": 0.8338, | |
| "step": 11840 | |
| }, | |
| { | |
| "epoch": 0.13253476940627779, | |
| "grad_norm": 0.27650541067123413, | |
| "learning_rate": 0.00025218146100781214, | |
| "loss": 0.8275, | |
| "step": 11850 | |
| }, | |
| { | |
| "epoch": 0.13264661309354048, | |
| "grad_norm": 0.27569788694381714, | |
| "learning_rate": 0.000251953035771392, | |
| "loss": 0.8323, | |
| "step": 11860 | |
| }, | |
| { | |
| "epoch": 0.13275845678080314, | |
| "grad_norm": 0.29103782773017883, | |
| "learning_rate": 0.0002517246105349719, | |
| "loss": 0.8401, | |
| "step": 11870 | |
| }, | |
| { | |
| "epoch": 0.13287030046806583, | |
| "grad_norm": 0.28769806027412415, | |
| "learning_rate": 0.00025149618529855176, | |
| "loss": 0.8369, | |
| "step": 11880 | |
| }, | |
| { | |
| "epoch": 0.13298214415532852, | |
| "grad_norm": 0.2803378701210022, | |
| "learning_rate": 0.0002512677600621316, | |
| "loss": 0.8308, | |
| "step": 11890 | |
| }, | |
| { | |
| "epoch": 0.1330939878425912, | |
| "grad_norm": 0.29264572262763977, | |
| "learning_rate": 0.0002510393348257116, | |
| "loss": 0.8314, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.13320583152985388, | |
| "grad_norm": 0.27434802055358887, | |
| "learning_rate": 0.00025081090958929144, | |
| "loss": 0.8337, | |
| "step": 11910 | |
| }, | |
| { | |
| "epoch": 0.13331767521711657, | |
| "grad_norm": 0.270589143037796, | |
| "learning_rate": 0.0002505824843528713, | |
| "loss": 0.8503, | |
| "step": 11920 | |
| }, | |
| { | |
| "epoch": 0.13342951890437924, | |
| "grad_norm": 0.27260124683380127, | |
| "learning_rate": 0.0002503540591164512, | |
| "loss": 0.8293, | |
| "step": 11930 | |
| }, | |
| { | |
| "epoch": 0.13354136259164193, | |
| "grad_norm": 0.2684808075428009, | |
| "learning_rate": 0.00025012563388003106, | |
| "loss": 0.8339, | |
| "step": 11940 | |
| }, | |
| { | |
| "epoch": 0.1336532062789046, | |
| "grad_norm": 0.2510156035423279, | |
| "learning_rate": 0.00024989720864361097, | |
| "loss": 0.8464, | |
| "step": 11950 | |
| }, | |
| { | |
| "epoch": 0.13376504996616728, | |
| "grad_norm": 0.24331960082054138, | |
| "learning_rate": 0.0002496687834071908, | |
| "loss": 0.8443, | |
| "step": 11960 | |
| }, | |
| { | |
| "epoch": 0.13387689365342997, | |
| "grad_norm": 0.2688249349594116, | |
| "learning_rate": 0.00024944035817077074, | |
| "loss": 0.8483, | |
| "step": 11970 | |
| }, | |
| { | |
| "epoch": 0.13398873734069264, | |
| "grad_norm": 0.2608729898929596, | |
| "learning_rate": 0.0002492119329343506, | |
| "loss": 0.852, | |
| "step": 11980 | |
| }, | |
| { | |
| "epoch": 0.13410058102795533, | |
| "grad_norm": 0.28415507078170776, | |
| "learning_rate": 0.00024898350769793045, | |
| "loss": 0.8449, | |
| "step": 11990 | |
| }, | |
| { | |
| "epoch": 0.13421242471521802, | |
| "grad_norm": 0.2920886278152466, | |
| "learning_rate": 0.00024875508246151036, | |
| "loss": 0.8281, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.13432426840248068, | |
| "grad_norm": 0.2763430178165436, | |
| "learning_rate": 0.00024852665722509027, | |
| "loss": 0.8492, | |
| "step": 12010 | |
| }, | |
| { | |
| "epoch": 0.13443611208974338, | |
| "grad_norm": 0.26460400223731995, | |
| "learning_rate": 0.0002482982319886701, | |
| "loss": 0.8409, | |
| "step": 12020 | |
| }, | |
| { | |
| "epoch": 0.13454795577700607, | |
| "grad_norm": 0.2698183059692383, | |
| "learning_rate": 0.00024806980675225, | |
| "loss": 0.8295, | |
| "step": 12030 | |
| }, | |
| { | |
| "epoch": 0.13465979946426873, | |
| "grad_norm": 0.2728478014469147, | |
| "learning_rate": 0.0002478413815158299, | |
| "loss": 0.837, | |
| "step": 12040 | |
| }, | |
| { | |
| "epoch": 0.13477164315153142, | |
| "grad_norm": 0.282924085855484, | |
| "learning_rate": 0.00024761295627940974, | |
| "loss": 0.8482, | |
| "step": 12050 | |
| }, | |
| { | |
| "epoch": 0.13488348683879411, | |
| "grad_norm": 0.264614999294281, | |
| "learning_rate": 0.00024738453104298965, | |
| "loss": 0.8432, | |
| "step": 12060 | |
| }, | |
| { | |
| "epoch": 0.13499533052605678, | |
| "grad_norm": 0.2475707232952118, | |
| "learning_rate": 0.0002471561058065695, | |
| "loss": 0.8387, | |
| "step": 12070 | |
| }, | |
| { | |
| "epoch": 0.13510717421331947, | |
| "grad_norm": 0.2620779573917389, | |
| "learning_rate": 0.00024692768057014937, | |
| "loss": 0.8559, | |
| "step": 12080 | |
| }, | |
| { | |
| "epoch": 0.13521901790058213, | |
| "grad_norm": 0.2645311951637268, | |
| "learning_rate": 0.0002466992553337293, | |
| "loss": 0.8363, | |
| "step": 12090 | |
| }, | |
| { | |
| "epoch": 0.13533086158784483, | |
| "grad_norm": 0.27586236596107483, | |
| "learning_rate": 0.0002464708300973092, | |
| "loss": 0.8365, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.13544270527510752, | |
| "grad_norm": 0.2695125341415405, | |
| "learning_rate": 0.00024624240486088904, | |
| "loss": 0.8412, | |
| "step": 12110 | |
| }, | |
| { | |
| "epoch": 0.13555454896237018, | |
| "grad_norm": 0.2473846971988678, | |
| "learning_rate": 0.0002460139796244689, | |
| "loss": 0.8362, | |
| "step": 12120 | |
| }, | |
| { | |
| "epoch": 0.13566639264963287, | |
| "grad_norm": 0.28001588582992554, | |
| "learning_rate": 0.0002457855543880488, | |
| "loss": 0.8462, | |
| "step": 12130 | |
| }, | |
| { | |
| "epoch": 0.13577823633689556, | |
| "grad_norm": 0.29486599564552307, | |
| "learning_rate": 0.00024555712915162866, | |
| "loss": 0.8607, | |
| "step": 12140 | |
| }, | |
| { | |
| "epoch": 0.13589008002415823, | |
| "grad_norm": 0.2761843204498291, | |
| "learning_rate": 0.00024532870391520857, | |
| "loss": 0.8668, | |
| "step": 12150 | |
| }, | |
| { | |
| "epoch": 0.13600192371142092, | |
| "grad_norm": 0.25779953598976135, | |
| "learning_rate": 0.00024510027867878843, | |
| "loss": 0.853, | |
| "step": 12160 | |
| }, | |
| { | |
| "epoch": 0.1361137673986836, | |
| "grad_norm": 0.27593857049942017, | |
| "learning_rate": 0.00024487185344236834, | |
| "loss": 0.8506, | |
| "step": 12170 | |
| }, | |
| { | |
| "epoch": 0.13622561108594627, | |
| "grad_norm": 0.24426791071891785, | |
| "learning_rate": 0.0002446434282059482, | |
| "loss": 0.8623, | |
| "step": 12180 | |
| }, | |
| { | |
| "epoch": 0.13633745477320897, | |
| "grad_norm": 0.25555628538131714, | |
| "learning_rate": 0.00024441500296952805, | |
| "loss": 0.8493, | |
| "step": 12190 | |
| }, | |
| { | |
| "epoch": 0.13644929846047166, | |
| "grad_norm": 0.2234913557767868, | |
| "learning_rate": 0.00024418657773310796, | |
| "loss": 0.8644, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.13656114214773432, | |
| "grad_norm": 0.27130651473999023, | |
| "learning_rate": 0.00024395815249668784, | |
| "loss": 0.8791, | |
| "step": 12210 | |
| }, | |
| { | |
| "epoch": 0.136672985834997, | |
| "grad_norm": 0.24734824895858765, | |
| "learning_rate": 0.0002437297272602677, | |
| "loss": 0.8719, | |
| "step": 12220 | |
| }, | |
| { | |
| "epoch": 0.13678482952225968, | |
| "grad_norm": 0.24316945672035217, | |
| "learning_rate": 0.0002435013020238476, | |
| "loss": 0.8546, | |
| "step": 12230 | |
| }, | |
| { | |
| "epoch": 0.13689667320952237, | |
| "grad_norm": 0.2349976748228073, | |
| "learning_rate": 0.0002432728767874275, | |
| "loss": 0.8458, | |
| "step": 12240 | |
| }, | |
| { | |
| "epoch": 0.13700851689678506, | |
| "grad_norm": 0.26791033148765564, | |
| "learning_rate": 0.00024304445155100735, | |
| "loss": 0.8485, | |
| "step": 12250 | |
| }, | |
| { | |
| "epoch": 0.13712036058404772, | |
| "grad_norm": 0.23598451912403107, | |
| "learning_rate": 0.00024281602631458723, | |
| "loss": 0.8451, | |
| "step": 12260 | |
| }, | |
| { | |
| "epoch": 0.13723220427131042, | |
| "grad_norm": 0.23012129962444305, | |
| "learning_rate": 0.00024258760107816714, | |
| "loss": 0.8332, | |
| "step": 12270 | |
| }, | |
| { | |
| "epoch": 0.1373440479585731, | |
| "grad_norm": 0.22834524512290955, | |
| "learning_rate": 0.000242359175841747, | |
| "loss": 0.8203, | |
| "step": 12280 | |
| }, | |
| { | |
| "epoch": 0.13745589164583577, | |
| "grad_norm": 0.2247861921787262, | |
| "learning_rate": 0.00024213075060532688, | |
| "loss": 0.8303, | |
| "step": 12290 | |
| }, | |
| { | |
| "epoch": 0.13756773533309846, | |
| "grad_norm": 0.2438284307718277, | |
| "learning_rate": 0.00024190232536890676, | |
| "loss": 0.8216, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.13767957902036115, | |
| "grad_norm": 0.24075888097286224, | |
| "learning_rate": 0.00024167390013248664, | |
| "loss": 0.7964, | |
| "step": 12310 | |
| }, | |
| { | |
| "epoch": 0.13779142270762382, | |
| "grad_norm": 0.24668976664543152, | |
| "learning_rate": 0.00024144547489606653, | |
| "loss": 0.8028, | |
| "step": 12320 | |
| }, | |
| { | |
| "epoch": 0.1379032663948865, | |
| "grad_norm": 0.26727405190467834, | |
| "learning_rate": 0.0002412170496596464, | |
| "loss": 0.8081, | |
| "step": 12330 | |
| }, | |
| { | |
| "epoch": 0.1380151100821492, | |
| "grad_norm": 0.2645564377307892, | |
| "learning_rate": 0.00024098862442322626, | |
| "loss": 0.8116, | |
| "step": 12340 | |
| }, | |
| { | |
| "epoch": 0.13812695376941186, | |
| "grad_norm": 0.25368645787239075, | |
| "learning_rate": 0.00024076019918680617, | |
| "loss": 0.8105, | |
| "step": 12350 | |
| }, | |
| { | |
| "epoch": 0.13823879745667456, | |
| "grad_norm": 0.26823967695236206, | |
| "learning_rate": 0.00024053177395038606, | |
| "loss": 0.8249, | |
| "step": 12360 | |
| }, | |
| { | |
| "epoch": 0.13835064114393722, | |
| "grad_norm": 0.2827225625514984, | |
| "learning_rate": 0.0002403033487139659, | |
| "loss": 0.8191, | |
| "step": 12370 | |
| }, | |
| { | |
| "epoch": 0.1384624848311999, | |
| "grad_norm": 0.23261433839797974, | |
| "learning_rate": 0.00024007492347754582, | |
| "loss": 0.8215, | |
| "step": 12380 | |
| }, | |
| { | |
| "epoch": 0.1385743285184626, | |
| "grad_norm": 0.27331966161727905, | |
| "learning_rate": 0.00023984649824112568, | |
| "loss": 0.8232, | |
| "step": 12390 | |
| }, | |
| { | |
| "epoch": 0.13868617220572527, | |
| "grad_norm": 0.2801966369152069, | |
| "learning_rate": 0.00023961807300470556, | |
| "loss": 0.8074, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.13879801589298796, | |
| "grad_norm": 0.2379591315984726, | |
| "learning_rate": 0.00023938964776828544, | |
| "loss": 0.8209, | |
| "step": 12410 | |
| }, | |
| { | |
| "epoch": 0.13890985958025065, | |
| "grad_norm": 0.27151694893836975, | |
| "learning_rate": 0.00023916122253186533, | |
| "loss": 0.8258, | |
| "step": 12420 | |
| }, | |
| { | |
| "epoch": 0.1390217032675133, | |
| "grad_norm": 0.21429865062236786, | |
| "learning_rate": 0.0002389327972954452, | |
| "loss": 0.8178, | |
| "step": 12430 | |
| }, | |
| { | |
| "epoch": 0.139133546954776, | |
| "grad_norm": 0.2777722477912903, | |
| "learning_rate": 0.0002387043720590251, | |
| "loss": 0.826, | |
| "step": 12440 | |
| }, | |
| { | |
| "epoch": 0.1392453906420387, | |
| "grad_norm": 0.2514742910861969, | |
| "learning_rate": 0.00023847594682260495, | |
| "loss": 0.8362, | |
| "step": 12450 | |
| }, | |
| { | |
| "epoch": 0.13935723432930136, | |
| "grad_norm": 0.23247656226158142, | |
| "learning_rate": 0.00023824752158618486, | |
| "loss": 0.8049, | |
| "step": 12460 | |
| }, | |
| { | |
| "epoch": 0.13946907801656405, | |
| "grad_norm": 0.2391313910484314, | |
| "learning_rate": 0.00023801909634976474, | |
| "loss": 0.8082, | |
| "step": 12470 | |
| }, | |
| { | |
| "epoch": 0.13958092170382674, | |
| "grad_norm": 0.2366340011358261, | |
| "learning_rate": 0.0002377906711133446, | |
| "loss": 0.8214, | |
| "step": 12480 | |
| }, | |
| { | |
| "epoch": 0.1396927653910894, | |
| "grad_norm": 0.2570713758468628, | |
| "learning_rate": 0.00023756224587692448, | |
| "loss": 0.827, | |
| "step": 12490 | |
| }, | |
| { | |
| "epoch": 0.1398046090783521, | |
| "grad_norm": 0.22823789715766907, | |
| "learning_rate": 0.0002373338206405044, | |
| "loss": 0.8314, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.1399164527656148, | |
| "grad_norm": 0.24660278856754303, | |
| "learning_rate": 0.00023710539540408424, | |
| "loss": 0.838, | |
| "step": 12510 | |
| }, | |
| { | |
| "epoch": 0.14002829645287745, | |
| "grad_norm": 0.25041723251342773, | |
| "learning_rate": 0.00023687697016766413, | |
| "loss": 0.8371, | |
| "step": 12520 | |
| }, | |
| { | |
| "epoch": 0.14014014014014015, | |
| "grad_norm": 0.23942531645298004, | |
| "learning_rate": 0.000236648544931244, | |
| "loss": 0.8282, | |
| "step": 12530 | |
| }, | |
| { | |
| "epoch": 0.1402519838274028, | |
| "grad_norm": 0.2445865273475647, | |
| "learning_rate": 0.0002364201196948239, | |
| "loss": 0.8307, | |
| "step": 12540 | |
| }, | |
| { | |
| "epoch": 0.1403638275146655, | |
| "grad_norm": 0.25278452038764954, | |
| "learning_rate": 0.00023619169445840378, | |
| "loss": 0.8483, | |
| "step": 12550 | |
| }, | |
| { | |
| "epoch": 0.1404756712019282, | |
| "grad_norm": 0.22890037298202515, | |
| "learning_rate": 0.00023596326922198366, | |
| "loss": 0.8328, | |
| "step": 12560 | |
| }, | |
| { | |
| "epoch": 0.14058751488919086, | |
| "grad_norm": 0.2360977679491043, | |
| "learning_rate": 0.00023573484398556351, | |
| "loss": 0.8373, | |
| "step": 12570 | |
| }, | |
| { | |
| "epoch": 0.14069935857645355, | |
| "grad_norm": 0.22873692214488983, | |
| "learning_rate": 0.00023550641874914342, | |
| "loss": 0.8399, | |
| "step": 12580 | |
| }, | |
| { | |
| "epoch": 0.14081120226371624, | |
| "grad_norm": 0.228402242064476, | |
| "learning_rate": 0.0002352779935127233, | |
| "loss": 0.8272, | |
| "step": 12590 | |
| }, | |
| { | |
| "epoch": 0.1409230459509789, | |
| "grad_norm": 0.2625369131565094, | |
| "learning_rate": 0.00023504956827630316, | |
| "loss": 0.8413, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.1410348896382416, | |
| "grad_norm": 0.2744843363761902, | |
| "learning_rate": 0.00023482114303988305, | |
| "loss": 0.823, | |
| "step": 12610 | |
| }, | |
| { | |
| "epoch": 0.1411467333255043, | |
| "grad_norm": 0.24845914542675018, | |
| "learning_rate": 0.00023459271780346293, | |
| "loss": 0.8089, | |
| "step": 12620 | |
| }, | |
| { | |
| "epoch": 0.14125857701276695, | |
| "grad_norm": 0.2431713193655014, | |
| "learning_rate": 0.0002343642925670428, | |
| "loss": 0.8204, | |
| "step": 12630 | |
| }, | |
| { | |
| "epoch": 0.14137042070002964, | |
| "grad_norm": 0.2636731266975403, | |
| "learning_rate": 0.0002341358673306227, | |
| "loss": 0.8241, | |
| "step": 12640 | |
| }, | |
| { | |
| "epoch": 0.14148226438729233, | |
| "grad_norm": 0.24605631828308105, | |
| "learning_rate": 0.00023390744209420255, | |
| "loss": 0.837, | |
| "step": 12650 | |
| }, | |
| { | |
| "epoch": 0.141594108074555, | |
| "grad_norm": 0.25722581148147583, | |
| "learning_rate": 0.00023367901685778246, | |
| "loss": 0.8338, | |
| "step": 12660 | |
| }, | |
| { | |
| "epoch": 0.1417059517618177, | |
| "grad_norm": 0.2628157138824463, | |
| "learning_rate": 0.00023345059162136234, | |
| "loss": 0.8271, | |
| "step": 12670 | |
| }, | |
| { | |
| "epoch": 0.14181779544908035, | |
| "grad_norm": 0.24534687399864197, | |
| "learning_rate": 0.0002332221663849422, | |
| "loss": 0.8281, | |
| "step": 12680 | |
| }, | |
| { | |
| "epoch": 0.14192963913634304, | |
| "grad_norm": 0.24370639026165009, | |
| "learning_rate": 0.00023299374114852208, | |
| "loss": 0.8243, | |
| "step": 12690 | |
| }, | |
| { | |
| "epoch": 0.14204148282360574, | |
| "grad_norm": 0.2993674576282501, | |
| "learning_rate": 0.000232765315912102, | |
| "loss": 0.8191, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 0.1421533265108684, | |
| "grad_norm": 0.2372383326292038, | |
| "learning_rate": 0.00023253689067568185, | |
| "loss": 0.8115, | |
| "step": 12710 | |
| }, | |
| { | |
| "epoch": 0.1422651701981311, | |
| "grad_norm": 0.2405237853527069, | |
| "learning_rate": 0.00023230846543926173, | |
| "loss": 0.8012, | |
| "step": 12720 | |
| }, | |
| { | |
| "epoch": 0.14237701388539378, | |
| "grad_norm": 0.23501497507095337, | |
| "learning_rate": 0.0002320800402028416, | |
| "loss": 0.8272, | |
| "step": 12730 | |
| }, | |
| { | |
| "epoch": 0.14248885757265645, | |
| "grad_norm": 0.2573966085910797, | |
| "learning_rate": 0.0002318516149664215, | |
| "loss": 0.8231, | |
| "step": 12740 | |
| }, | |
| { | |
| "epoch": 0.14260070125991914, | |
| "grad_norm": 0.25884565711021423, | |
| "learning_rate": 0.00023162318973000138, | |
| "loss": 0.8293, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 0.14271254494718183, | |
| "grad_norm": 0.24788953363895416, | |
| "learning_rate": 0.00023139476449358126, | |
| "loss": 0.8338, | |
| "step": 12760 | |
| }, | |
| { | |
| "epoch": 0.1428243886344445, | |
| "grad_norm": 0.23874413967132568, | |
| "learning_rate": 0.00023116633925716112, | |
| "loss": 0.8184, | |
| "step": 12770 | |
| }, | |
| { | |
| "epoch": 0.14293623232170719, | |
| "grad_norm": 0.2358027547597885, | |
| "learning_rate": 0.00023093791402074103, | |
| "loss": 0.8143, | |
| "step": 12780 | |
| }, | |
| { | |
| "epoch": 0.14304807600896988, | |
| "grad_norm": 0.22447925806045532, | |
| "learning_rate": 0.0002307094887843209, | |
| "loss": 0.8093, | |
| "step": 12790 | |
| }, | |
| { | |
| "epoch": 0.14315991969623254, | |
| "grad_norm": 0.25550246238708496, | |
| "learning_rate": 0.00023048106354790077, | |
| "loss": 0.8178, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.14327176338349523, | |
| "grad_norm": 0.2370327264070511, | |
| "learning_rate": 0.00023025263831148065, | |
| "loss": 0.8035, | |
| "step": 12810 | |
| }, | |
| { | |
| "epoch": 0.1433836070707579, | |
| "grad_norm": 0.24910229444503784, | |
| "learning_rate": 0.00023002421307506056, | |
| "loss": 0.7965, | |
| "step": 12820 | |
| }, | |
| { | |
| "epoch": 0.1434954507580206, | |
| "grad_norm": 0.23592302203178406, | |
| "learning_rate": 0.0002297957878386404, | |
| "loss": 0.808, | |
| "step": 12830 | |
| }, | |
| { | |
| "epoch": 0.14360729444528328, | |
| "grad_norm": 0.24010522663593292, | |
| "learning_rate": 0.0002295673626022203, | |
| "loss": 0.8047, | |
| "step": 12840 | |
| }, | |
| { | |
| "epoch": 0.14371913813254594, | |
| "grad_norm": 0.26334619522094727, | |
| "learning_rate": 0.00022933893736580015, | |
| "loss": 0.8011, | |
| "step": 12850 | |
| }, | |
| { | |
| "epoch": 0.14383098181980863, | |
| "grad_norm": 0.23162928223609924, | |
| "learning_rate": 0.00022911051212938006, | |
| "loss": 0.811, | |
| "step": 12860 | |
| }, | |
| { | |
| "epoch": 0.14394282550707133, | |
| "grad_norm": 0.24273565411567688, | |
| "learning_rate": 0.00022888208689295994, | |
| "loss": 0.8249, | |
| "step": 12870 | |
| }, | |
| { | |
| "epoch": 0.144054669194334, | |
| "grad_norm": 0.239716574549675, | |
| "learning_rate": 0.0002286536616565398, | |
| "loss": 0.8146, | |
| "step": 12880 | |
| }, | |
| { | |
| "epoch": 0.14416651288159668, | |
| "grad_norm": 0.22947145998477936, | |
| "learning_rate": 0.0002284252364201197, | |
| "loss": 0.8037, | |
| "step": 12890 | |
| }, | |
| { | |
| "epoch": 0.14427835656885937, | |
| "grad_norm": 0.2369975745677948, | |
| "learning_rate": 0.0002281968111836996, | |
| "loss": 0.7938, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 0.14439020025612204, | |
| "grad_norm": 0.23150302469730377, | |
| "learning_rate": 0.00022796838594727945, | |
| "loss": 0.7971, | |
| "step": 12910 | |
| }, | |
| { | |
| "epoch": 0.14450204394338473, | |
| "grad_norm": 0.25659120082855225, | |
| "learning_rate": 0.00022773996071085933, | |
| "loss": 0.7897, | |
| "step": 12920 | |
| }, | |
| { | |
| "epoch": 0.14461388763064742, | |
| "grad_norm": 0.26838308572769165, | |
| "learning_rate": 0.00022751153547443924, | |
| "loss": 0.8025, | |
| "step": 12930 | |
| }, | |
| { | |
| "epoch": 0.14472573131791008, | |
| "grad_norm": 0.2421617954969406, | |
| "learning_rate": 0.0002272831102380191, | |
| "loss": 0.7937, | |
| "step": 12940 | |
| }, | |
| { | |
| "epoch": 0.14483757500517278, | |
| "grad_norm": 0.22780479490756989, | |
| "learning_rate": 0.00022705468500159898, | |
| "loss": 0.7861, | |
| "step": 12950 | |
| }, | |
| { | |
| "epoch": 0.14494941869243544, | |
| "grad_norm": 0.2561044692993164, | |
| "learning_rate": 0.00022682625976517886, | |
| "loss": 0.7817, | |
| "step": 12960 | |
| }, | |
| { | |
| "epoch": 0.14506126237969813, | |
| "grad_norm": 0.24073092639446259, | |
| "learning_rate": 0.00022659783452875875, | |
| "loss": 0.8024, | |
| "step": 12970 | |
| }, | |
| { | |
| "epoch": 0.14517310606696082, | |
| "grad_norm": 0.24959658086299896, | |
| "learning_rate": 0.00022636940929233863, | |
| "loss": 0.7994, | |
| "step": 12980 | |
| }, | |
| { | |
| "epoch": 0.14528494975422349, | |
| "grad_norm": 0.2711149752140045, | |
| "learning_rate": 0.0002261409840559185, | |
| "loss": 0.8011, | |
| "step": 12990 | |
| }, | |
| { | |
| "epoch": 0.14539679344148618, | |
| "grad_norm": 0.2447725236415863, | |
| "learning_rate": 0.00022591255881949837, | |
| "loss": 0.7957, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.14550863712874887, | |
| "grad_norm": 0.26505330204963684, | |
| "learning_rate": 0.00022568413358307828, | |
| "loss": 0.7932, | |
| "step": 13010 | |
| }, | |
| { | |
| "epoch": 0.14562048081601153, | |
| "grad_norm": 0.256712943315506, | |
| "learning_rate": 0.00022545570834665816, | |
| "loss": 0.7919, | |
| "step": 13020 | |
| }, | |
| { | |
| "epoch": 0.14573232450327422, | |
| "grad_norm": 0.23816627264022827, | |
| "learning_rate": 0.00022522728311023802, | |
| "loss": 0.7942, | |
| "step": 13030 | |
| }, | |
| { | |
| "epoch": 0.14584416819053692, | |
| "grad_norm": 0.25607794523239136, | |
| "learning_rate": 0.0002249988578738179, | |
| "loss": 0.8058, | |
| "step": 13040 | |
| }, | |
| { | |
| "epoch": 0.14595601187779958, | |
| "grad_norm": 0.2644692361354828, | |
| "learning_rate": 0.0002247704326373978, | |
| "loss": 0.8026, | |
| "step": 13050 | |
| }, | |
| { | |
| "epoch": 0.14606785556506227, | |
| "grad_norm": 0.24160505831241608, | |
| "learning_rate": 0.00022454200740097766, | |
| "loss": 0.8013, | |
| "step": 13060 | |
| }, | |
| { | |
| "epoch": 0.14617969925232496, | |
| "grad_norm": 0.25321200489997864, | |
| "learning_rate": 0.00022431358216455755, | |
| "loss": 0.802, | |
| "step": 13070 | |
| }, | |
| { | |
| "epoch": 0.14629154293958763, | |
| "grad_norm": 0.38834208250045776, | |
| "learning_rate": 0.0002240851569281374, | |
| "loss": 0.8053, | |
| "step": 13080 | |
| }, | |
| { | |
| "epoch": 0.14640338662685032, | |
| "grad_norm": 0.2638767957687378, | |
| "learning_rate": 0.0002238567316917173, | |
| "loss": 0.803, | |
| "step": 13090 | |
| }, | |
| { | |
| "epoch": 0.14651523031411298, | |
| "grad_norm": 0.33412685990333557, | |
| "learning_rate": 0.0002236283064552972, | |
| "loss": 0.8091, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 0.14662707400137567, | |
| "grad_norm": 0.27539852261543274, | |
| "learning_rate": 0.00022339988121887705, | |
| "loss": 0.8019, | |
| "step": 13110 | |
| }, | |
| { | |
| "epoch": 0.14673891768863837, | |
| "grad_norm": 0.25128626823425293, | |
| "learning_rate": 0.00022317145598245693, | |
| "loss": 0.7961, | |
| "step": 13120 | |
| }, | |
| { | |
| "epoch": 0.14685076137590103, | |
| "grad_norm": 0.27428579330444336, | |
| "learning_rate": 0.00022294303074603684, | |
| "loss": 0.792, | |
| "step": 13130 | |
| }, | |
| { | |
| "epoch": 0.14696260506316372, | |
| "grad_norm": 0.25421425700187683, | |
| "learning_rate": 0.0002227146055096167, | |
| "loss": 0.8139, | |
| "step": 13140 | |
| }, | |
| { | |
| "epoch": 0.1470744487504264, | |
| "grad_norm": 0.23709440231323242, | |
| "learning_rate": 0.00022248618027319658, | |
| "loss": 0.8147, | |
| "step": 13150 | |
| }, | |
| { | |
| "epoch": 0.14718629243768908, | |
| "grad_norm": 0.2693617641925812, | |
| "learning_rate": 0.00022225775503677646, | |
| "loss": 0.8174, | |
| "step": 13160 | |
| }, | |
| { | |
| "epoch": 0.14729813612495177, | |
| "grad_norm": 0.26674261689186096, | |
| "learning_rate": 0.00022202932980035635, | |
| "loss": 0.8105, | |
| "step": 13170 | |
| }, | |
| { | |
| "epoch": 0.14740997981221446, | |
| "grad_norm": 0.2656268775463104, | |
| "learning_rate": 0.00022180090456393623, | |
| "loss": 0.8355, | |
| "step": 13180 | |
| }, | |
| { | |
| "epoch": 0.14752182349947712, | |
| "grad_norm": 0.2587822377681732, | |
| "learning_rate": 0.0002215724793275161, | |
| "loss": 0.8311, | |
| "step": 13190 | |
| }, | |
| { | |
| "epoch": 0.14763366718673981, | |
| "grad_norm": 0.29723209142684937, | |
| "learning_rate": 0.00022134405409109597, | |
| "loss": 0.8664, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.1477455108740025, | |
| "grad_norm": 0.2579325735569, | |
| "learning_rate": 0.00022111562885467588, | |
| "loss": 0.8515, | |
| "step": 13210 | |
| }, | |
| { | |
| "epoch": 0.14785735456126517, | |
| "grad_norm": 0.28357258439064026, | |
| "learning_rate": 0.00022088720361825576, | |
| "loss": 0.8562, | |
| "step": 13220 | |
| }, | |
| { | |
| "epoch": 0.14796919824852786, | |
| "grad_norm": 0.26742318272590637, | |
| "learning_rate": 0.00022065877838183562, | |
| "loss": 0.8571, | |
| "step": 13230 | |
| }, | |
| { | |
| "epoch": 0.14808104193579055, | |
| "grad_norm": 0.2750874161720276, | |
| "learning_rate": 0.0002204303531454155, | |
| "loss": 0.8449, | |
| "step": 13240 | |
| }, | |
| { | |
| "epoch": 0.14819288562305322, | |
| "grad_norm": 0.3043031692504883, | |
| "learning_rate": 0.0002202019279089954, | |
| "loss": 0.8472, | |
| "step": 13250 | |
| }, | |
| { | |
| "epoch": 0.1483047293103159, | |
| "grad_norm": 0.27216988801956177, | |
| "learning_rate": 0.00021997350267257527, | |
| "loss": 0.8732, | |
| "step": 13260 | |
| }, | |
| { | |
| "epoch": 0.14841657299757857, | |
| "grad_norm": 0.2818603515625, | |
| "learning_rate": 0.00021974507743615515, | |
| "loss": 0.8333, | |
| "step": 13270 | |
| }, | |
| { | |
| "epoch": 0.14852841668484126, | |
| "grad_norm": 0.2604407072067261, | |
| "learning_rate": 0.000219516652199735, | |
| "loss": 0.8467, | |
| "step": 13280 | |
| }, | |
| { | |
| "epoch": 0.14864026037210396, | |
| "grad_norm": 0.28342294692993164, | |
| "learning_rate": 0.00021928822696331491, | |
| "loss": 0.8292, | |
| "step": 13290 | |
| }, | |
| { | |
| "epoch": 0.14875210405936662, | |
| "grad_norm": 0.2564396262168884, | |
| "learning_rate": 0.0002190598017268948, | |
| "loss": 0.8355, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 0.1488639477466293, | |
| "grad_norm": 0.2528108060359955, | |
| "learning_rate": 0.00021883137649047465, | |
| "loss": 0.8269, | |
| "step": 13310 | |
| }, | |
| { | |
| "epoch": 0.148975791433892, | |
| "grad_norm": 0.26454785466194153, | |
| "learning_rate": 0.00021860295125405456, | |
| "loss": 0.8425, | |
| "step": 13320 | |
| }, | |
| { | |
| "epoch": 0.14908763512115467, | |
| "grad_norm": 0.25204601883888245, | |
| "learning_rate": 0.00021837452601763445, | |
| "loss": 0.8251, | |
| "step": 13330 | |
| }, | |
| { | |
| "epoch": 0.14919947880841736, | |
| "grad_norm": 0.24680152535438538, | |
| "learning_rate": 0.0002181461007812143, | |
| "loss": 0.8247, | |
| "step": 13340 | |
| }, | |
| { | |
| "epoch": 0.14931132249568005, | |
| "grad_norm": 0.27356913685798645, | |
| "learning_rate": 0.00021791767554479418, | |
| "loss": 0.811, | |
| "step": 13350 | |
| }, | |
| { | |
| "epoch": 0.1494231661829427, | |
| "grad_norm": 0.24703428149223328, | |
| "learning_rate": 0.0002176892503083741, | |
| "loss": 0.8145, | |
| "step": 13360 | |
| }, | |
| { | |
| "epoch": 0.1495350098702054, | |
| "grad_norm": 0.27793166041374207, | |
| "learning_rate": 0.00021746082507195395, | |
| "loss": 0.8162, | |
| "step": 13370 | |
| }, | |
| { | |
| "epoch": 0.1496468535574681, | |
| "grad_norm": 0.28826582431793213, | |
| "learning_rate": 0.00021723239983553383, | |
| "loss": 0.8258, | |
| "step": 13380 | |
| }, | |
| { | |
| "epoch": 0.14975869724473076, | |
| "grad_norm": 0.24826544523239136, | |
| "learning_rate": 0.00021700397459911372, | |
| "loss": 0.8131, | |
| "step": 13390 | |
| }, | |
| { | |
| "epoch": 0.14987054093199345, | |
| "grad_norm": 0.29015326499938965, | |
| "learning_rate": 0.0002167755493626936, | |
| "loss": 0.8241, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.14998238461925611, | |
| "grad_norm": 0.2692265510559082, | |
| "learning_rate": 0.00021654712412627348, | |
| "loss": 0.8046, | |
| "step": 13410 | |
| }, | |
| { | |
| "epoch": 0.1500942283065188, | |
| "grad_norm": 0.28277263045310974, | |
| "learning_rate": 0.00021631869888985336, | |
| "loss": 0.8075, | |
| "step": 13420 | |
| }, | |
| { | |
| "epoch": 0.1502060719937815, | |
| "grad_norm": 0.25920721888542175, | |
| "learning_rate": 0.00021609027365343322, | |
| "loss": 0.8146, | |
| "step": 13430 | |
| }, | |
| { | |
| "epoch": 0.15031791568104416, | |
| "grad_norm": 0.2548248767852783, | |
| "learning_rate": 0.00021586184841701313, | |
| "loss": 0.82, | |
| "step": 13440 | |
| }, | |
| { | |
| "epoch": 0.15042975936830685, | |
| "grad_norm": 0.3121783435344696, | |
| "learning_rate": 0.000215633423180593, | |
| "loss": 0.796, | |
| "step": 13450 | |
| }, | |
| { | |
| "epoch": 0.15054160305556955, | |
| "grad_norm": 0.2799825370311737, | |
| "learning_rate": 0.00021540499794417287, | |
| "loss": 0.8073, | |
| "step": 13460 | |
| }, | |
| { | |
| "epoch": 0.1506534467428322, | |
| "grad_norm": 0.24525675177574158, | |
| "learning_rate": 0.00021517657270775275, | |
| "loss": 0.804, | |
| "step": 13470 | |
| }, | |
| { | |
| "epoch": 0.1507652904300949, | |
| "grad_norm": 0.26799294352531433, | |
| "learning_rate": 0.00021494814747133266, | |
| "loss": 0.8086, | |
| "step": 13480 | |
| }, | |
| { | |
| "epoch": 0.1508771341173576, | |
| "grad_norm": 0.24744056165218353, | |
| "learning_rate": 0.00021471972223491252, | |
| "loss": 0.7972, | |
| "step": 13490 | |
| }, | |
| { | |
| "epoch": 0.15098897780462026, | |
| "grad_norm": 0.27284878492355347, | |
| "learning_rate": 0.0002144912969984924, | |
| "loss": 0.8048, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.15110082149188295, | |
| "grad_norm": 0.2427281141281128, | |
| "learning_rate": 0.00021426287176207225, | |
| "loss": 0.8043, | |
| "step": 13510 | |
| }, | |
| { | |
| "epoch": 0.15121266517914564, | |
| "grad_norm": 0.27432921528816223, | |
| "learning_rate": 0.00021403444652565216, | |
| "loss": 0.8198, | |
| "step": 13520 | |
| }, | |
| { | |
| "epoch": 0.1513245088664083, | |
| "grad_norm": 0.26843661069869995, | |
| "learning_rate": 0.00021380602128923205, | |
| "loss": 0.8156, | |
| "step": 13530 | |
| }, | |
| { | |
| "epoch": 0.151436352553671, | |
| "grad_norm": 0.2460176795721054, | |
| "learning_rate": 0.0002135775960528119, | |
| "loss": 0.806, | |
| "step": 13540 | |
| }, | |
| { | |
| "epoch": 0.15154819624093366, | |
| "grad_norm": 0.24147658050060272, | |
| "learning_rate": 0.00021334917081639179, | |
| "loss": 0.8146, | |
| "step": 13550 | |
| }, | |
| { | |
| "epoch": 0.15166003992819635, | |
| "grad_norm": 0.2715270221233368, | |
| "learning_rate": 0.0002131207455799717, | |
| "loss": 0.8065, | |
| "step": 13560 | |
| }, | |
| { | |
| "epoch": 0.15177188361545904, | |
| "grad_norm": 0.2851991653442383, | |
| "learning_rate": 0.00021289232034355155, | |
| "loss": 0.8042, | |
| "step": 13570 | |
| }, | |
| { | |
| "epoch": 0.1518837273027217, | |
| "grad_norm": 0.2779170870780945, | |
| "learning_rate": 0.00021266389510713143, | |
| "loss": 0.8163, | |
| "step": 13580 | |
| }, | |
| { | |
| "epoch": 0.1519955709899844, | |
| "grad_norm": 0.2853197455406189, | |
| "learning_rate": 0.00021243546987071132, | |
| "loss": 0.8025, | |
| "step": 13590 | |
| }, | |
| { | |
| "epoch": 0.1521074146772471, | |
| "grad_norm": 0.2753603160381317, | |
| "learning_rate": 0.0002122070446342912, | |
| "loss": 0.8187, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.15221925836450975, | |
| "grad_norm": 0.29546552896499634, | |
| "learning_rate": 0.00021197861939787108, | |
| "loss": 0.8189, | |
| "step": 13610 | |
| }, | |
| { | |
| "epoch": 0.15233110205177244, | |
| "grad_norm": 0.2799798250198364, | |
| "learning_rate": 0.00021175019416145097, | |
| "loss": 0.8098, | |
| "step": 13620 | |
| }, | |
| { | |
| "epoch": 0.15244294573903514, | |
| "grad_norm": 0.23527085781097412, | |
| "learning_rate": 0.00021152176892503082, | |
| "loss": 0.8212, | |
| "step": 13630 | |
| }, | |
| { | |
| "epoch": 0.1525547894262978, | |
| "grad_norm": 0.27207401394844055, | |
| "learning_rate": 0.00021129334368861073, | |
| "loss": 0.808, | |
| "step": 13640 | |
| }, | |
| { | |
| "epoch": 0.1526666331135605, | |
| "grad_norm": 0.26520609855651855, | |
| "learning_rate": 0.00021106491845219061, | |
| "loss": 0.8133, | |
| "step": 13650 | |
| }, | |
| { | |
| "epoch": 0.15277847680082318, | |
| "grad_norm": 0.2750151455402374, | |
| "learning_rate": 0.00021083649321577047, | |
| "loss": 0.8248, | |
| "step": 13660 | |
| }, | |
| { | |
| "epoch": 0.15289032048808585, | |
| "grad_norm": 0.28339120745658875, | |
| "learning_rate": 0.00021060806797935035, | |
| "loss": 0.8175, | |
| "step": 13670 | |
| }, | |
| { | |
| "epoch": 0.15300216417534854, | |
| "grad_norm": 0.27611440420150757, | |
| "learning_rate": 0.00021037964274293026, | |
| "loss": 0.8232, | |
| "step": 13680 | |
| }, | |
| { | |
| "epoch": 0.1531140078626112, | |
| "grad_norm": 0.264113187789917, | |
| "learning_rate": 0.00021015121750651012, | |
| "loss": 0.8217, | |
| "step": 13690 | |
| }, | |
| { | |
| "epoch": 0.1532258515498739, | |
| "grad_norm": 0.27031853795051575, | |
| "learning_rate": 0.00020992279227009, | |
| "loss": 0.8242, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 0.15333769523713658, | |
| "grad_norm": 0.2753359079360962, | |
| "learning_rate": 0.00020969436703366988, | |
| "loss": 0.8311, | |
| "step": 13710 | |
| }, | |
| { | |
| "epoch": 0.15344953892439925, | |
| "grad_norm": 0.24859648942947388, | |
| "learning_rate": 0.00020946594179724977, | |
| "loss": 0.8285, | |
| "step": 13720 | |
| }, | |
| { | |
| "epoch": 0.15356138261166194, | |
| "grad_norm": 0.2773294448852539, | |
| "learning_rate": 0.00020923751656082965, | |
| "loss": 0.8201, | |
| "step": 13730 | |
| }, | |
| { | |
| "epoch": 0.15367322629892463, | |
| "grad_norm": 0.23855488002300262, | |
| "learning_rate": 0.0002090090913244095, | |
| "loss": 0.8145, | |
| "step": 13740 | |
| }, | |
| { | |
| "epoch": 0.1537850699861873, | |
| "grad_norm": 0.27641457319259644, | |
| "learning_rate": 0.0002087806660879894, | |
| "loss": 0.8233, | |
| "step": 13750 | |
| }, | |
| { | |
| "epoch": 0.15389691367345, | |
| "grad_norm": 0.26556023955345154, | |
| "learning_rate": 0.0002085522408515693, | |
| "loss": 0.8309, | |
| "step": 13760 | |
| }, | |
| { | |
| "epoch": 0.15400875736071268, | |
| "grad_norm": 0.2980164885520935, | |
| "learning_rate": 0.00020832381561514915, | |
| "loss": 0.8585, | |
| "step": 13770 | |
| }, | |
| { | |
| "epoch": 0.15412060104797534, | |
| "grad_norm": 0.21802592277526855, | |
| "learning_rate": 0.00020809539037872904, | |
| "loss": 0.8385, | |
| "step": 13780 | |
| }, | |
| { | |
| "epoch": 0.15423244473523803, | |
| "grad_norm": 0.3153620958328247, | |
| "learning_rate": 0.00020786696514230895, | |
| "loss": 0.8423, | |
| "step": 13790 | |
| }, | |
| { | |
| "epoch": 0.15434428842250072, | |
| "grad_norm": 0.2928372621536255, | |
| "learning_rate": 0.0002076385399058888, | |
| "loss": 0.8399, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 0.1544561321097634, | |
| "grad_norm": 0.3015557527542114, | |
| "learning_rate": 0.00020741011466946868, | |
| "loss": 0.843, | |
| "step": 13810 | |
| }, | |
| { | |
| "epoch": 0.15456797579702608, | |
| "grad_norm": 0.2243575006723404, | |
| "learning_rate": 0.00020718168943304857, | |
| "loss": 0.8302, | |
| "step": 13820 | |
| }, | |
| { | |
| "epoch": 0.15467981948428874, | |
| "grad_norm": 0.23281534016132355, | |
| "learning_rate": 0.00020695326419662845, | |
| "loss": 0.8268, | |
| "step": 13830 | |
| }, | |
| { | |
| "epoch": 0.15479166317155144, | |
| "grad_norm": 0.2412877380847931, | |
| "learning_rate": 0.00020672483896020833, | |
| "loss": 0.849, | |
| "step": 13840 | |
| }, | |
| { | |
| "epoch": 0.15490350685881413, | |
| "grad_norm": 0.2762492001056671, | |
| "learning_rate": 0.00020649641372378822, | |
| "loss": 0.8324, | |
| "step": 13850 | |
| }, | |
| { | |
| "epoch": 0.1550153505460768, | |
| "grad_norm": 0.27976560592651367, | |
| "learning_rate": 0.00020626798848736807, | |
| "loss": 0.843, | |
| "step": 13860 | |
| }, | |
| { | |
| "epoch": 0.15512719423333948, | |
| "grad_norm": 0.29076194763183594, | |
| "learning_rate": 0.00020603956325094798, | |
| "loss": 0.8575, | |
| "step": 13870 | |
| }, | |
| { | |
| "epoch": 0.15523903792060217, | |
| "grad_norm": 0.2367868423461914, | |
| "learning_rate": 0.00020581113801452786, | |
| "loss": 0.8465, | |
| "step": 13880 | |
| }, | |
| { | |
| "epoch": 0.15535088160786484, | |
| "grad_norm": 0.26191186904907227, | |
| "learning_rate": 0.00020558271277810772, | |
| "loss": 0.8291, | |
| "step": 13890 | |
| }, | |
| { | |
| "epoch": 0.15546272529512753, | |
| "grad_norm": 0.27254414558410645, | |
| "learning_rate": 0.0002053542875416876, | |
| "loss": 0.8347, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 0.15557456898239022, | |
| "grad_norm": 0.2718988060951233, | |
| "learning_rate": 0.0002051258623052675, | |
| "loss": 0.8319, | |
| "step": 13910 | |
| }, | |
| { | |
| "epoch": 0.15568641266965288, | |
| "grad_norm": 0.24478264153003693, | |
| "learning_rate": 0.00020489743706884737, | |
| "loss": 0.8369, | |
| "step": 13920 | |
| }, | |
| { | |
| "epoch": 0.15579825635691558, | |
| "grad_norm": 0.27791038155555725, | |
| "learning_rate": 0.00020466901183242725, | |
| "loss": 0.8486, | |
| "step": 13930 | |
| }, | |
| { | |
| "epoch": 0.15591010004417827, | |
| "grad_norm": 0.27220630645751953, | |
| "learning_rate": 0.00020444058659600713, | |
| "loss": 0.8335, | |
| "step": 13940 | |
| }, | |
| { | |
| "epoch": 0.15602194373144093, | |
| "grad_norm": 0.2945479154586792, | |
| "learning_rate": 0.00020421216135958702, | |
| "loss": 0.8234, | |
| "step": 13950 | |
| }, | |
| { | |
| "epoch": 0.15613378741870362, | |
| "grad_norm": 0.2911258041858673, | |
| "learning_rate": 0.0002039837361231669, | |
| "loss": 0.8279, | |
| "step": 13960 | |
| }, | |
| { | |
| "epoch": 0.15624563110596631, | |
| "grad_norm": 0.3039700984954834, | |
| "learning_rate": 0.00020375531088674676, | |
| "loss": 0.8409, | |
| "step": 13970 | |
| }, | |
| { | |
| "epoch": 0.15635747479322898, | |
| "grad_norm": 0.27290788292884827, | |
| "learning_rate": 0.00020352688565032664, | |
| "loss": 0.8394, | |
| "step": 13980 | |
| }, | |
| { | |
| "epoch": 0.15646931848049167, | |
| "grad_norm": 0.28534916043281555, | |
| "learning_rate": 0.00020329846041390655, | |
| "loss": 0.8431, | |
| "step": 13990 | |
| }, | |
| { | |
| "epoch": 0.15658116216775433, | |
| "grad_norm": 0.304221510887146, | |
| "learning_rate": 0.0002030700351774864, | |
| "loss": 0.8476, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.15669300585501703, | |
| "grad_norm": 0.3151461184024811, | |
| "learning_rate": 0.0002028416099410663, | |
| "loss": 0.852, | |
| "step": 14010 | |
| }, | |
| { | |
| "epoch": 0.15680484954227972, | |
| "grad_norm": 0.2947019040584564, | |
| "learning_rate": 0.00020261318470464617, | |
| "loss": 0.8396, | |
| "step": 14020 | |
| }, | |
| { | |
| "epoch": 0.15691669322954238, | |
| "grad_norm": 0.2737627625465393, | |
| "learning_rate": 0.00020238475946822605, | |
| "loss": 0.8337, | |
| "step": 14030 | |
| }, | |
| { | |
| "epoch": 0.15702853691680507, | |
| "grad_norm": 0.28257089853286743, | |
| "learning_rate": 0.00020215633423180594, | |
| "loss": 0.8475, | |
| "step": 14040 | |
| }, | |
| { | |
| "epoch": 0.15714038060406776, | |
| "grad_norm": 0.3102625608444214, | |
| "learning_rate": 0.00020192790899538582, | |
| "loss": 0.8451, | |
| "step": 14050 | |
| }, | |
| { | |
| "epoch": 0.15725222429133043, | |
| "grad_norm": 0.2839931845664978, | |
| "learning_rate": 0.00020169948375896567, | |
| "loss": 0.8365, | |
| "step": 14060 | |
| }, | |
| { | |
| "epoch": 0.15736406797859312, | |
| "grad_norm": 0.25566980242729187, | |
| "learning_rate": 0.00020147105852254558, | |
| "loss": 0.8287, | |
| "step": 14070 | |
| }, | |
| { | |
| "epoch": 0.1574759116658558, | |
| "grad_norm": 0.267791211605072, | |
| "learning_rate": 0.00020124263328612547, | |
| "loss": 0.8289, | |
| "step": 14080 | |
| }, | |
| { | |
| "epoch": 0.15758775535311847, | |
| "grad_norm": 0.267635703086853, | |
| "learning_rate": 0.00020101420804970532, | |
| "loss": 0.8357, | |
| "step": 14090 | |
| }, | |
| { | |
| "epoch": 0.15769959904038117, | |
| "grad_norm": 0.28065699338912964, | |
| "learning_rate": 0.0002007857828132852, | |
| "loss": 0.8363, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 0.15781144272764386, | |
| "grad_norm": 0.26585736870765686, | |
| "learning_rate": 0.00020055735757686512, | |
| "loss": 0.8409, | |
| "step": 14110 | |
| }, | |
| { | |
| "epoch": 0.15792328641490652, | |
| "grad_norm": 0.2562732398509979, | |
| "learning_rate": 0.00020032893234044497, | |
| "loss": 0.8374, | |
| "step": 14120 | |
| }, | |
| { | |
| "epoch": 0.1580351301021692, | |
| "grad_norm": 0.2572222650051117, | |
| "learning_rate": 0.00020010050710402485, | |
| "loss": 0.8405, | |
| "step": 14130 | |
| }, | |
| { | |
| "epoch": 0.15814697378943188, | |
| "grad_norm": 0.3075050413608551, | |
| "learning_rate": 0.00019987208186760474, | |
| "loss": 0.825, | |
| "step": 14140 | |
| }, | |
| { | |
| "epoch": 0.15825881747669457, | |
| "grad_norm": 0.2630293071269989, | |
| "learning_rate": 0.00019964365663118462, | |
| "loss": 0.8326, | |
| "step": 14150 | |
| }, | |
| { | |
| "epoch": 0.15837066116395726, | |
| "grad_norm": 0.255015105009079, | |
| "learning_rate": 0.0001994152313947645, | |
| "loss": 0.8181, | |
| "step": 14160 | |
| }, | |
| { | |
| "epoch": 0.15848250485121992, | |
| "grad_norm": 0.25929179787635803, | |
| "learning_rate": 0.00019918680615834438, | |
| "loss": 0.8067, | |
| "step": 14170 | |
| }, | |
| { | |
| "epoch": 0.15859434853848262, | |
| "grad_norm": 0.27078965306282043, | |
| "learning_rate": 0.00019895838092192424, | |
| "loss": 0.8043, | |
| "step": 14180 | |
| }, | |
| { | |
| "epoch": 0.1587061922257453, | |
| "grad_norm": 0.2618376612663269, | |
| "learning_rate": 0.00019872995568550415, | |
| "loss": 0.8191, | |
| "step": 14190 | |
| }, | |
| { | |
| "epoch": 0.15881803591300797, | |
| "grad_norm": 0.246153324842453, | |
| "learning_rate": 0.000198501530449084, | |
| "loss": 0.8251, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 0.15892987960027066, | |
| "grad_norm": 0.25498026609420776, | |
| "learning_rate": 0.0001982731052126639, | |
| "loss": 0.8319, | |
| "step": 14210 | |
| }, | |
| { | |
| "epoch": 0.15904172328753335, | |
| "grad_norm": 0.2517942190170288, | |
| "learning_rate": 0.0001980446799762438, | |
| "loss": 0.8106, | |
| "step": 14220 | |
| }, | |
| { | |
| "epoch": 0.15915356697479602, | |
| "grad_norm": 0.2659161388874054, | |
| "learning_rate": 0.00019781625473982365, | |
| "loss": 0.8163, | |
| "step": 14230 | |
| }, | |
| { | |
| "epoch": 0.1592654106620587, | |
| "grad_norm": 0.24527288973331451, | |
| "learning_rate": 0.00019758782950340354, | |
| "loss": 0.8359, | |
| "step": 14240 | |
| }, | |
| { | |
| "epoch": 0.1593772543493214, | |
| "grad_norm": 0.23943792283535004, | |
| "learning_rate": 0.00019735940426698342, | |
| "loss": 0.8253, | |
| "step": 14250 | |
| }, | |
| { | |
| "epoch": 0.15948909803658406, | |
| "grad_norm": 0.30401650071144104, | |
| "learning_rate": 0.0001971309790305633, | |
| "loss": 0.8369, | |
| "step": 14260 | |
| }, | |
| { | |
| "epoch": 0.15960094172384676, | |
| "grad_norm": 0.25001001358032227, | |
| "learning_rate": 0.00019690255379414319, | |
| "loss": 0.8354, | |
| "step": 14270 | |
| }, | |
| { | |
| "epoch": 0.15971278541110942, | |
| "grad_norm": 0.2378586083650589, | |
| "learning_rate": 0.00019667412855772307, | |
| "loss": 0.8324, | |
| "step": 14280 | |
| }, | |
| { | |
| "epoch": 0.1598246290983721, | |
| "grad_norm": 0.26216059923171997, | |
| "learning_rate": 0.00019644570332130292, | |
| "loss": 0.8227, | |
| "step": 14290 | |
| }, | |
| { | |
| "epoch": 0.1599364727856348, | |
| "grad_norm": 0.24156969785690308, | |
| "learning_rate": 0.00019621727808488283, | |
| "loss": 0.8362, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 0.16004831647289747, | |
| "grad_norm": 0.24192091822624207, | |
| "learning_rate": 0.00019598885284846272, | |
| "loss": 0.835, | |
| "step": 14310 | |
| }, | |
| { | |
| "epoch": 0.16016016016016016, | |
| "grad_norm": 0.24861887097358704, | |
| "learning_rate": 0.00019576042761204257, | |
| "loss": 0.8232, | |
| "step": 14320 | |
| }, | |
| { | |
| "epoch": 0.16027200384742285, | |
| "grad_norm": 0.27175864577293396, | |
| "learning_rate": 0.00019553200237562246, | |
| "loss": 0.8303, | |
| "step": 14330 | |
| }, | |
| { | |
| "epoch": 0.16038384753468551, | |
| "grad_norm": 0.272334486246109, | |
| "learning_rate": 0.00019530357713920237, | |
| "loss": 0.8217, | |
| "step": 14340 | |
| }, | |
| { | |
| "epoch": 0.1604956912219482, | |
| "grad_norm": 0.28357213735580444, | |
| "learning_rate": 0.00019507515190278222, | |
| "loss": 0.8343, | |
| "step": 14350 | |
| }, | |
| { | |
| "epoch": 0.1606075349092109, | |
| "grad_norm": 0.272276371717453, | |
| "learning_rate": 0.0001948467266663621, | |
| "loss": 0.8235, | |
| "step": 14360 | |
| }, | |
| { | |
| "epoch": 0.16071937859647356, | |
| "grad_norm": 0.26771044731140137, | |
| "learning_rate": 0.000194618301429942, | |
| "loss": 0.8292, | |
| "step": 14370 | |
| }, | |
| { | |
| "epoch": 0.16083122228373625, | |
| "grad_norm": 0.27449774742126465, | |
| "learning_rate": 0.00019438987619352187, | |
| "loss": 0.8485, | |
| "step": 14380 | |
| }, | |
| { | |
| "epoch": 0.16094306597099894, | |
| "grad_norm": 0.26026156544685364, | |
| "learning_rate": 0.00019416145095710175, | |
| "loss": 0.8458, | |
| "step": 14390 | |
| }, | |
| { | |
| "epoch": 0.1610549096582616, | |
| "grad_norm": 0.2667345404624939, | |
| "learning_rate": 0.00019393302572068164, | |
| "loss": 0.8519, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.1611667533455243, | |
| "grad_norm": 0.26302048563957214, | |
| "learning_rate": 0.0001937046004842615, | |
| "loss": 0.8353, | |
| "step": 14410 | |
| }, | |
| { | |
| "epoch": 0.16127859703278696, | |
| "grad_norm": 0.24420003592967987, | |
| "learning_rate": 0.0001934761752478414, | |
| "loss": 0.8464, | |
| "step": 14420 | |
| }, | |
| { | |
| "epoch": 0.16139044072004965, | |
| "grad_norm": 0.2739315629005432, | |
| "learning_rate": 0.00019324775001142126, | |
| "loss": 0.8257, | |
| "step": 14430 | |
| }, | |
| { | |
| "epoch": 0.16150228440731235, | |
| "grad_norm": 0.2370629757642746, | |
| "learning_rate": 0.00019301932477500114, | |
| "loss": 0.8324, | |
| "step": 14440 | |
| }, | |
| { | |
| "epoch": 0.161614128094575, | |
| "grad_norm": 0.2616153955459595, | |
| "learning_rate": 0.00019279089953858102, | |
| "loss": 0.8513, | |
| "step": 14450 | |
| }, | |
| { | |
| "epoch": 0.1617259717818377, | |
| "grad_norm": 0.2527558207511902, | |
| "learning_rate": 0.0001925624743021609, | |
| "loss": 0.8435, | |
| "step": 14460 | |
| }, | |
| { | |
| "epoch": 0.1618378154691004, | |
| "grad_norm": 0.28255122900009155, | |
| "learning_rate": 0.0001923340490657408, | |
| "loss": 0.8497, | |
| "step": 14470 | |
| }, | |
| { | |
| "epoch": 0.16194965915636306, | |
| "grad_norm": 0.23198026418685913, | |
| "learning_rate": 0.00019210562382932067, | |
| "loss": 0.8357, | |
| "step": 14480 | |
| }, | |
| { | |
| "epoch": 0.16206150284362575, | |
| "grad_norm": 0.2534460127353668, | |
| "learning_rate": 0.00019187719859290053, | |
| "loss": 0.8396, | |
| "step": 14490 | |
| }, | |
| { | |
| "epoch": 0.16217334653088844, | |
| "grad_norm": 0.2693686783313751, | |
| "learning_rate": 0.00019164877335648044, | |
| "loss": 0.8438, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.1622851902181511, | |
| "grad_norm": 0.26181599497795105, | |
| "learning_rate": 0.00019142034812006032, | |
| "loss": 0.8452, | |
| "step": 14510 | |
| }, | |
| { | |
| "epoch": 0.1623970339054138, | |
| "grad_norm": 0.2268761545419693, | |
| "learning_rate": 0.00019119192288364017, | |
| "loss": 0.8496, | |
| "step": 14520 | |
| }, | |
| { | |
| "epoch": 0.1625088775926765, | |
| "grad_norm": 0.27698907256126404, | |
| "learning_rate": 0.00019096349764722006, | |
| "loss": 0.8265, | |
| "step": 14530 | |
| }, | |
| { | |
| "epoch": 0.16262072127993915, | |
| "grad_norm": 0.30570700764656067, | |
| "learning_rate": 0.00019073507241079997, | |
| "loss": 0.8399, | |
| "step": 14540 | |
| }, | |
| { | |
| "epoch": 0.16273256496720184, | |
| "grad_norm": 0.2894477844238281, | |
| "learning_rate": 0.00019050664717437982, | |
| "loss": 0.8488, | |
| "step": 14550 | |
| }, | |
| { | |
| "epoch": 0.16284440865446453, | |
| "grad_norm": 0.3094457685947418, | |
| "learning_rate": 0.0001902782219379597, | |
| "loss": 0.8243, | |
| "step": 14560 | |
| }, | |
| { | |
| "epoch": 0.1629562523417272, | |
| "grad_norm": 0.2908037602901459, | |
| "learning_rate": 0.0001900497967015396, | |
| "loss": 0.835, | |
| "step": 14570 | |
| }, | |
| { | |
| "epoch": 0.1630680960289899, | |
| "grad_norm": 0.27222102880477905, | |
| "learning_rate": 0.00018982137146511947, | |
| "loss": 0.8306, | |
| "step": 14580 | |
| }, | |
| { | |
| "epoch": 0.16317993971625255, | |
| "grad_norm": 0.2542339563369751, | |
| "learning_rate": 0.00018959294622869935, | |
| "loss": 0.8259, | |
| "step": 14590 | |
| }, | |
| { | |
| "epoch": 0.16329178340351524, | |
| "grad_norm": 0.28288012742996216, | |
| "learning_rate": 0.00018936452099227924, | |
| "loss": 0.8243, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 0.16340362709077794, | |
| "grad_norm": 0.2584143877029419, | |
| "learning_rate": 0.0001891360957558591, | |
| "loss": 0.8224, | |
| "step": 14610 | |
| }, | |
| { | |
| "epoch": 0.1635154707780406, | |
| "grad_norm": 0.26679450273513794, | |
| "learning_rate": 0.000188907670519439, | |
| "loss": 0.8142, | |
| "step": 14620 | |
| }, | |
| { | |
| "epoch": 0.1636273144653033, | |
| "grad_norm": 0.24589306116104126, | |
| "learning_rate": 0.00018867924528301889, | |
| "loss": 0.81, | |
| "step": 14630 | |
| }, | |
| { | |
| "epoch": 0.16373915815256598, | |
| "grad_norm": 0.28474611043930054, | |
| "learning_rate": 0.00018845082004659874, | |
| "loss": 0.7989, | |
| "step": 14640 | |
| }, | |
| { | |
| "epoch": 0.16385100183982865, | |
| "grad_norm": 0.27567991614341736, | |
| "learning_rate": 0.00018822239481017862, | |
| "loss": 0.8049, | |
| "step": 14650 | |
| }, | |
| { | |
| "epoch": 0.16396284552709134, | |
| "grad_norm": 0.2509905695915222, | |
| "learning_rate": 0.0001879939695737585, | |
| "loss": 0.8168, | |
| "step": 14660 | |
| }, | |
| { | |
| "epoch": 0.16407468921435403, | |
| "grad_norm": 0.30284953117370605, | |
| "learning_rate": 0.0001877655443373384, | |
| "loss": 0.8055, | |
| "step": 14670 | |
| }, | |
| { | |
| "epoch": 0.1641865329016167, | |
| "grad_norm": 0.27638325095176697, | |
| "learning_rate": 0.00018753711910091827, | |
| "loss": 0.8368, | |
| "step": 14680 | |
| }, | |
| { | |
| "epoch": 0.16429837658887939, | |
| "grad_norm": 0.29546642303466797, | |
| "learning_rate": 0.00018730869386449816, | |
| "loss": 0.8161, | |
| "step": 14690 | |
| }, | |
| { | |
| "epoch": 0.16441022027614208, | |
| "grad_norm": 0.2483370304107666, | |
| "learning_rate": 0.00018708026862807804, | |
| "loss": 0.8136, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 0.16452206396340474, | |
| "grad_norm": 0.2862898111343384, | |
| "learning_rate": 0.00018685184339165792, | |
| "loss": 0.836, | |
| "step": 14710 | |
| }, | |
| { | |
| "epoch": 0.16463390765066743, | |
| "grad_norm": 0.2730434238910675, | |
| "learning_rate": 0.00018662341815523778, | |
| "loss": 0.8279, | |
| "step": 14720 | |
| }, | |
| { | |
| "epoch": 0.1647457513379301, | |
| "grad_norm": 0.2846275269985199, | |
| "learning_rate": 0.0001863949929188177, | |
| "loss": 0.7991, | |
| "step": 14730 | |
| }, | |
| { | |
| "epoch": 0.1648575950251928, | |
| "grad_norm": 0.2455524355173111, | |
| "learning_rate": 0.00018616656768239757, | |
| "loss": 0.7931, | |
| "step": 14740 | |
| }, | |
| { | |
| "epoch": 0.16496943871245548, | |
| "grad_norm": 0.25060829520225525, | |
| "learning_rate": 0.00018593814244597743, | |
| "loss": 0.8009, | |
| "step": 14750 | |
| }, | |
| { | |
| "epoch": 0.16508128239971814, | |
| "grad_norm": 0.2687000334262848, | |
| "learning_rate": 0.0001857097172095573, | |
| "loss": 0.7968, | |
| "step": 14760 | |
| }, | |
| { | |
| "epoch": 0.16519312608698083, | |
| "grad_norm": 0.28619691729545593, | |
| "learning_rate": 0.00018548129197313722, | |
| "loss": 0.7818, | |
| "step": 14770 | |
| }, | |
| { | |
| "epoch": 0.16530496977424353, | |
| "grad_norm": 0.2549494206905365, | |
| "learning_rate": 0.00018525286673671707, | |
| "loss": 0.7877, | |
| "step": 14780 | |
| }, | |
| { | |
| "epoch": 0.1654168134615062, | |
| "grad_norm": 0.2419700175523758, | |
| "learning_rate": 0.00018502444150029696, | |
| "loss": 0.7899, | |
| "step": 14790 | |
| }, | |
| { | |
| "epoch": 0.16552865714876888, | |
| "grad_norm": 0.2636066675186157, | |
| "learning_rate": 0.00018479601626387684, | |
| "loss": 0.7893, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.16564050083603157, | |
| "grad_norm": 0.264072984457016, | |
| "learning_rate": 0.00018456759102745672, | |
| "loss": 0.7984, | |
| "step": 14810 | |
| }, | |
| { | |
| "epoch": 0.16575234452329424, | |
| "grad_norm": 0.2661677598953247, | |
| "learning_rate": 0.0001843391657910366, | |
| "loss": 0.8085, | |
| "step": 14820 | |
| }, | |
| { | |
| "epoch": 0.16586418821055693, | |
| "grad_norm": 0.28324052691459656, | |
| "learning_rate": 0.0001841107405546165, | |
| "loss": 0.8066, | |
| "step": 14830 | |
| }, | |
| { | |
| "epoch": 0.16597603189781962, | |
| "grad_norm": 0.277761310338974, | |
| "learning_rate": 0.00018388231531819634, | |
| "loss": 0.8008, | |
| "step": 14840 | |
| }, | |
| { | |
| "epoch": 0.16608787558508228, | |
| "grad_norm": 0.2669602036476135, | |
| "learning_rate": 0.00018365389008177625, | |
| "loss": 0.8285, | |
| "step": 14850 | |
| }, | |
| { | |
| "epoch": 0.16619971927234498, | |
| "grad_norm": 0.28757140040397644, | |
| "learning_rate": 0.00018342546484535614, | |
| "loss": 0.8121, | |
| "step": 14860 | |
| }, | |
| { | |
| "epoch": 0.16631156295960764, | |
| "grad_norm": 0.2616439163684845, | |
| "learning_rate": 0.000183197039608936, | |
| "loss": 0.8185, | |
| "step": 14870 | |
| }, | |
| { | |
| "epoch": 0.16642340664687033, | |
| "grad_norm": 0.28334370255470276, | |
| "learning_rate": 0.00018296861437251587, | |
| "loss": 0.8229, | |
| "step": 14880 | |
| }, | |
| { | |
| "epoch": 0.16653525033413302, | |
| "grad_norm": 0.2659022808074951, | |
| "learning_rate": 0.00018274018913609576, | |
| "loss": 0.82, | |
| "step": 14890 | |
| }, | |
| { | |
| "epoch": 0.1666470940213957, | |
| "grad_norm": 0.2544262111186981, | |
| "learning_rate": 0.00018251176389967564, | |
| "loss": 0.84, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 0.16675893770865838, | |
| "grad_norm": 0.27492937445640564, | |
| "learning_rate": 0.00018228333866325552, | |
| "loss": 0.8411, | |
| "step": 14910 | |
| }, | |
| { | |
| "epoch": 0.16687078139592107, | |
| "grad_norm": 0.2961216866970062, | |
| "learning_rate": 0.00018205491342683538, | |
| "loss": 0.8178, | |
| "step": 14920 | |
| }, | |
| { | |
| "epoch": 0.16698262508318373, | |
| "grad_norm": 0.2704416811466217, | |
| "learning_rate": 0.0001818264881904153, | |
| "loss": 0.8264, | |
| "step": 14930 | |
| }, | |
| { | |
| "epoch": 0.16709446877044642, | |
| "grad_norm": 0.261704683303833, | |
| "learning_rate": 0.00018159806295399517, | |
| "loss": 0.8307, | |
| "step": 14940 | |
| }, | |
| { | |
| "epoch": 0.16720631245770912, | |
| "grad_norm": 0.26157405972480774, | |
| "learning_rate": 0.00018136963771757503, | |
| "loss": 0.8064, | |
| "step": 14950 | |
| }, | |
| { | |
| "epoch": 0.16731815614497178, | |
| "grad_norm": 0.2589896023273468, | |
| "learning_rate": 0.0001811412124811549, | |
| "loss": 0.8195, | |
| "step": 14960 | |
| }, | |
| { | |
| "epoch": 0.16742999983223447, | |
| "grad_norm": 0.24691319465637207, | |
| "learning_rate": 0.00018091278724473482, | |
| "loss": 0.8283, | |
| "step": 14970 | |
| }, | |
| { | |
| "epoch": 0.16754184351949716, | |
| "grad_norm": 0.2527819871902466, | |
| "learning_rate": 0.00018068436200831468, | |
| "loss": 0.8229, | |
| "step": 14980 | |
| }, | |
| { | |
| "epoch": 0.16765368720675983, | |
| "grad_norm": 0.2639094293117523, | |
| "learning_rate": 0.00018045593677189456, | |
| "loss": 0.8393, | |
| "step": 14990 | |
| }, | |
| { | |
| "epoch": 0.16776553089402252, | |
| "grad_norm": 0.24417634308338165, | |
| "learning_rate": 0.00018022751153547444, | |
| "loss": 0.8204, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.16787737458128518, | |
| "grad_norm": 0.25673115253448486, | |
| "learning_rate": 0.00017999908629905432, | |
| "loss": 0.8184, | |
| "step": 15010 | |
| }, | |
| { | |
| "epoch": 0.16798921826854787, | |
| "grad_norm": 0.254077285528183, | |
| "learning_rate": 0.0001797706610626342, | |
| "loss": 0.8195, | |
| "step": 15020 | |
| }, | |
| { | |
| "epoch": 0.16810106195581057, | |
| "grad_norm": 0.2455417662858963, | |
| "learning_rate": 0.0001795422358262141, | |
| "loss": 0.8255, | |
| "step": 15030 | |
| }, | |
| { | |
| "epoch": 0.16821290564307323, | |
| "grad_norm": 0.27918189764022827, | |
| "learning_rate": 0.00017931381058979395, | |
| "loss": 0.8345, | |
| "step": 15040 | |
| }, | |
| { | |
| "epoch": 0.16832474933033592, | |
| "grad_norm": 0.2272186279296875, | |
| "learning_rate": 0.00017908538535337386, | |
| "loss": 0.8178, | |
| "step": 15050 | |
| }, | |
| { | |
| "epoch": 0.1684365930175986, | |
| "grad_norm": 0.269189715385437, | |
| "learning_rate": 0.00017885696011695374, | |
| "loss": 0.8343, | |
| "step": 15060 | |
| }, | |
| { | |
| "epoch": 0.16854843670486128, | |
| "grad_norm": 0.2805529832839966, | |
| "learning_rate": 0.0001786285348805336, | |
| "loss": 0.8126, | |
| "step": 15070 | |
| }, | |
| { | |
| "epoch": 0.16866028039212397, | |
| "grad_norm": 0.28788769245147705, | |
| "learning_rate": 0.00017840010964411348, | |
| "loss": 0.8278, | |
| "step": 15080 | |
| }, | |
| { | |
| "epoch": 0.16877212407938666, | |
| "grad_norm": 0.2439277619123459, | |
| "learning_rate": 0.00017817168440769336, | |
| "loss": 0.8272, | |
| "step": 15090 | |
| }, | |
| { | |
| "epoch": 0.16888396776664932, | |
| "grad_norm": 0.3151440918445587, | |
| "learning_rate": 0.00017794325917127324, | |
| "loss": 0.8201, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 0.16899581145391201, | |
| "grad_norm": 0.2562885880470276, | |
| "learning_rate": 0.00017771483393485313, | |
| "loss": 0.8275, | |
| "step": 15110 | |
| }, | |
| { | |
| "epoch": 0.1691076551411747, | |
| "grad_norm": 0.2718476355075836, | |
| "learning_rate": 0.00017748640869843298, | |
| "loss": 0.821, | |
| "step": 15120 | |
| }, | |
| { | |
| "epoch": 0.16921949882843737, | |
| "grad_norm": 0.2699459493160248, | |
| "learning_rate": 0.0001772579834620129, | |
| "loss": 0.8352, | |
| "step": 15130 | |
| }, | |
| { | |
| "epoch": 0.16933134251570006, | |
| "grad_norm": 0.29737600684165955, | |
| "learning_rate": 0.00017702955822559277, | |
| "loss": 0.8279, | |
| "step": 15140 | |
| }, | |
| { | |
| "epoch": 0.16944318620296273, | |
| "grad_norm": 0.3075369894504547, | |
| "learning_rate": 0.00017680113298917263, | |
| "loss": 0.8037, | |
| "step": 15150 | |
| }, | |
| { | |
| "epoch": 0.16955502989022542, | |
| "grad_norm": 0.27061593532562256, | |
| "learning_rate": 0.00017657270775275254, | |
| "loss": 0.8149, | |
| "step": 15160 | |
| }, | |
| { | |
| "epoch": 0.1696668735774881, | |
| "grad_norm": 0.26719844341278076, | |
| "learning_rate": 0.00017634428251633242, | |
| "loss": 0.7896, | |
| "step": 15170 | |
| }, | |
| { | |
| "epoch": 0.16977871726475077, | |
| "grad_norm": 0.2871409058570862, | |
| "learning_rate": 0.00017611585727991228, | |
| "loss": 0.7863, | |
| "step": 15180 | |
| }, | |
| { | |
| "epoch": 0.16989056095201346, | |
| "grad_norm": 0.2502906620502472, | |
| "learning_rate": 0.00017588743204349216, | |
| "loss": 0.7817, | |
| "step": 15190 | |
| }, | |
| { | |
| "epoch": 0.17000240463927616, | |
| "grad_norm": 0.2579248547554016, | |
| "learning_rate": 0.00017565900680707207, | |
| "loss": 0.796, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 0.17011424832653882, | |
| "grad_norm": 0.2537415325641632, | |
| "learning_rate": 0.00017543058157065193, | |
| "loss": 0.78, | |
| "step": 15210 | |
| }, | |
| { | |
| "epoch": 0.1702260920138015, | |
| "grad_norm": 0.2420157790184021, | |
| "learning_rate": 0.0001752021563342318, | |
| "loss": 0.7946, | |
| "step": 15220 | |
| }, | |
| { | |
| "epoch": 0.1703379357010642, | |
| "grad_norm": 0.2423790544271469, | |
| "learning_rate": 0.0001749737310978117, | |
| "loss": 0.797, | |
| "step": 15230 | |
| }, | |
| { | |
| "epoch": 0.17044977938832687, | |
| "grad_norm": 0.2521071434020996, | |
| "learning_rate": 0.00017474530586139157, | |
| "loss": 0.8073, | |
| "step": 15240 | |
| }, | |
| { | |
| "epoch": 0.17056162307558956, | |
| "grad_norm": 0.22921273112297058, | |
| "learning_rate": 0.00017451688062497146, | |
| "loss": 0.7916, | |
| "step": 15250 | |
| }, | |
| { | |
| "epoch": 0.17067346676285225, | |
| "grad_norm": 0.35150206089019775, | |
| "learning_rate": 0.00017428845538855134, | |
| "loss": 0.8001, | |
| "step": 15260 | |
| }, | |
| { | |
| "epoch": 0.1707853104501149, | |
| "grad_norm": 0.27637869119644165, | |
| "learning_rate": 0.0001740600301521312, | |
| "loss": 0.7948, | |
| "step": 15270 | |
| }, | |
| { | |
| "epoch": 0.1708971541373776, | |
| "grad_norm": 0.22480230033397675, | |
| "learning_rate": 0.0001738316049157111, | |
| "loss": 0.7932, | |
| "step": 15280 | |
| }, | |
| { | |
| "epoch": 0.1710089978246403, | |
| "grad_norm": 0.27264508605003357, | |
| "learning_rate": 0.000173603179679291, | |
| "loss": 0.8083, | |
| "step": 15290 | |
| }, | |
| { | |
| "epoch": 0.17112084151190296, | |
| "grad_norm": 0.2647417485713959, | |
| "learning_rate": 0.00017337475444287084, | |
| "loss": 0.8177, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 0.17123268519916565, | |
| "grad_norm": 0.23619987070560455, | |
| "learning_rate": 0.00017314632920645073, | |
| "loss": 0.8068, | |
| "step": 15310 | |
| }, | |
| { | |
| "epoch": 0.17134452888642832, | |
| "grad_norm": 0.22450131177902222, | |
| "learning_rate": 0.0001729179039700306, | |
| "loss": 0.8004, | |
| "step": 15320 | |
| }, | |
| { | |
| "epoch": 0.171456372573691, | |
| "grad_norm": 0.2784859240055084, | |
| "learning_rate": 0.0001726894787336105, | |
| "loss": 0.7938, | |
| "step": 15330 | |
| }, | |
| { | |
| "epoch": 0.1715682162609537, | |
| "grad_norm": 0.25513574481010437, | |
| "learning_rate": 0.00017246105349719038, | |
| "loss": 0.7844, | |
| "step": 15340 | |
| }, | |
| { | |
| "epoch": 0.17168005994821636, | |
| "grad_norm": 0.27425146102905273, | |
| "learning_rate": 0.00017223262826077023, | |
| "loss": 0.7906, | |
| "step": 15350 | |
| }, | |
| { | |
| "epoch": 0.17179190363547905, | |
| "grad_norm": 0.2500791847705841, | |
| "learning_rate": 0.00017200420302435014, | |
| "loss": 0.7834, | |
| "step": 15360 | |
| }, | |
| { | |
| "epoch": 0.17190374732274175, | |
| "grad_norm": 0.2550630271434784, | |
| "learning_rate": 0.00017177577778793002, | |
| "loss": 0.7736, | |
| "step": 15370 | |
| }, | |
| { | |
| "epoch": 0.1720155910100044, | |
| "grad_norm": 0.25209444761276245, | |
| "learning_rate": 0.00017154735255150988, | |
| "loss": 0.773, | |
| "step": 15380 | |
| }, | |
| { | |
| "epoch": 0.1721274346972671, | |
| "grad_norm": 0.2347812056541443, | |
| "learning_rate": 0.00017131892731508976, | |
| "loss": 0.7745, | |
| "step": 15390 | |
| }, | |
| { | |
| "epoch": 0.1722392783845298, | |
| "grad_norm": 0.2858305871486664, | |
| "learning_rate": 0.00017109050207866967, | |
| "loss": 0.7776, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 0.17235112207179246, | |
| "grad_norm": 0.30414941906929016, | |
| "learning_rate": 0.00017086207684224953, | |
| "loss": 0.7701, | |
| "step": 15410 | |
| }, | |
| { | |
| "epoch": 0.17246296575905515, | |
| "grad_norm": 0.2645011842250824, | |
| "learning_rate": 0.0001706336516058294, | |
| "loss": 0.7746, | |
| "step": 15420 | |
| }, | |
| { | |
| "epoch": 0.17257480944631784, | |
| "grad_norm": 0.2984048128128052, | |
| "learning_rate": 0.0001704052263694093, | |
| "loss": 0.771, | |
| "step": 15430 | |
| }, | |
| { | |
| "epoch": 0.1726866531335805, | |
| "grad_norm": 0.2734147906303406, | |
| "learning_rate": 0.00017017680113298918, | |
| "loss": 0.7769, | |
| "step": 15440 | |
| }, | |
| { | |
| "epoch": 0.1727984968208432, | |
| "grad_norm": 0.2632124125957489, | |
| "learning_rate": 0.00016994837589656906, | |
| "loss": 0.7754, | |
| "step": 15450 | |
| }, | |
| { | |
| "epoch": 0.17291034050810586, | |
| "grad_norm": 0.29384443163871765, | |
| "learning_rate": 0.00016971995066014894, | |
| "loss": 0.7833, | |
| "step": 15460 | |
| }, | |
| { | |
| "epoch": 0.17302218419536855, | |
| "grad_norm": 0.3194182813167572, | |
| "learning_rate": 0.0001694915254237288, | |
| "loss": 0.7813, | |
| "step": 15470 | |
| }, | |
| { | |
| "epoch": 0.17313402788263124, | |
| "grad_norm": 0.25995251536369324, | |
| "learning_rate": 0.0001692631001873087, | |
| "loss": 0.7796, | |
| "step": 15480 | |
| }, | |
| { | |
| "epoch": 0.1732458715698939, | |
| "grad_norm": 0.272419810295105, | |
| "learning_rate": 0.0001690346749508886, | |
| "loss": 0.7839, | |
| "step": 15490 | |
| }, | |
| { | |
| "epoch": 0.1733577152571566, | |
| "grad_norm": 0.26239413022994995, | |
| "learning_rate": 0.00016880624971446845, | |
| "loss": 0.7807, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.1734695589444193, | |
| "grad_norm": 0.29991698265075684, | |
| "learning_rate": 0.00016857782447804833, | |
| "loss": 0.7941, | |
| "step": 15510 | |
| }, | |
| { | |
| "epoch": 0.17358140263168195, | |
| "grad_norm": 0.2812528908252716, | |
| "learning_rate": 0.00016834939924162824, | |
| "loss": 0.7863, | |
| "step": 15520 | |
| }, | |
| { | |
| "epoch": 0.17369324631894464, | |
| "grad_norm": 0.2557685077190399, | |
| "learning_rate": 0.0001681209740052081, | |
| "loss": 0.7953, | |
| "step": 15530 | |
| }, | |
| { | |
| "epoch": 0.17380509000620734, | |
| "grad_norm": 0.28565913438796997, | |
| "learning_rate": 0.00016789254876878798, | |
| "loss": 0.7934, | |
| "step": 15540 | |
| }, | |
| { | |
| "epoch": 0.17391693369347, | |
| "grad_norm": 0.25316086411476135, | |
| "learning_rate": 0.00016766412353236783, | |
| "loss": 0.7969, | |
| "step": 15550 | |
| }, | |
| { | |
| "epoch": 0.1740287773807327, | |
| "grad_norm": 0.2636478543281555, | |
| "learning_rate": 0.00016743569829594774, | |
| "loss": 0.8021, | |
| "step": 15560 | |
| }, | |
| { | |
| "epoch": 0.17414062106799538, | |
| "grad_norm": 0.28839442133903503, | |
| "learning_rate": 0.00016720727305952763, | |
| "loss": 0.8108, | |
| "step": 15570 | |
| }, | |
| { | |
| "epoch": 0.17425246475525805, | |
| "grad_norm": 0.2453639954328537, | |
| "learning_rate": 0.00016697884782310748, | |
| "loss": 0.8034, | |
| "step": 15580 | |
| }, | |
| { | |
| "epoch": 0.17436430844252074, | |
| "grad_norm": 0.2550848424434662, | |
| "learning_rate": 0.0001667504225866874, | |
| "loss": 0.8169, | |
| "step": 15590 | |
| }, | |
| { | |
| "epoch": 0.1744761521297834, | |
| "grad_norm": 0.24949923157691956, | |
| "learning_rate": 0.00016652199735026727, | |
| "loss": 0.8167, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.1745879958170461, | |
| "grad_norm": 0.24357125163078308, | |
| "learning_rate": 0.00016629357211384713, | |
| "loss": 0.821, | |
| "step": 15610 | |
| }, | |
| { | |
| "epoch": 0.17469983950430878, | |
| "grad_norm": 0.2246461659669876, | |
| "learning_rate": 0.000166065146877427, | |
| "loss": 0.82, | |
| "step": 15620 | |
| }, | |
| { | |
| "epoch": 0.17481168319157145, | |
| "grad_norm": 0.26160740852355957, | |
| "learning_rate": 0.00016583672164100692, | |
| "loss": 0.8167, | |
| "step": 15630 | |
| }, | |
| { | |
| "epoch": 0.17492352687883414, | |
| "grad_norm": 0.25773337483406067, | |
| "learning_rate": 0.00016560829640458678, | |
| "loss": 0.8305, | |
| "step": 15640 | |
| }, | |
| { | |
| "epoch": 0.17503537056609683, | |
| "grad_norm": 0.24051527678966522, | |
| "learning_rate": 0.00016537987116816666, | |
| "loss": 0.8201, | |
| "step": 15650 | |
| }, | |
| { | |
| "epoch": 0.1751472142533595, | |
| "grad_norm": 0.2507860064506531, | |
| "learning_rate": 0.00016515144593174654, | |
| "loss": 0.8444, | |
| "step": 15660 | |
| }, | |
| { | |
| "epoch": 0.1752590579406222, | |
| "grad_norm": 0.24071821570396423, | |
| "learning_rate": 0.00016492302069532643, | |
| "loss": 0.8071, | |
| "step": 15670 | |
| }, | |
| { | |
| "epoch": 0.17537090162788488, | |
| "grad_norm": 0.2533905506134033, | |
| "learning_rate": 0.0001646945954589063, | |
| "loss": 0.8164, | |
| "step": 15680 | |
| }, | |
| { | |
| "epoch": 0.17548274531514754, | |
| "grad_norm": 0.2546316683292389, | |
| "learning_rate": 0.0001644661702224862, | |
| "loss": 0.8237, | |
| "step": 15690 | |
| }, | |
| { | |
| "epoch": 0.17559458900241023, | |
| "grad_norm": 0.25692155957221985, | |
| "learning_rate": 0.00016423774498606605, | |
| "loss": 0.8198, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 0.17570643268967293, | |
| "grad_norm": 0.254535436630249, | |
| "learning_rate": 0.00016400931974964596, | |
| "loss": 0.8061, | |
| "step": 15710 | |
| }, | |
| { | |
| "epoch": 0.1758182763769356, | |
| "grad_norm": 0.2557326555252075, | |
| "learning_rate": 0.00016378089451322584, | |
| "loss": 0.8194, | |
| "step": 15720 | |
| }, | |
| { | |
| "epoch": 0.17593012006419828, | |
| "grad_norm": 0.24234241247177124, | |
| "learning_rate": 0.0001635524692768057, | |
| "loss": 0.8183, | |
| "step": 15730 | |
| }, | |
| { | |
| "epoch": 0.17604196375146094, | |
| "grad_norm": 0.2597709596157074, | |
| "learning_rate": 0.00016332404404038558, | |
| "loss": 0.7957, | |
| "step": 15740 | |
| }, | |
| { | |
| "epoch": 0.17615380743872364, | |
| "grad_norm": 0.2896418273448944, | |
| "learning_rate": 0.0001630956188039655, | |
| "loss": 0.8146, | |
| "step": 15750 | |
| }, | |
| { | |
| "epoch": 0.17626565112598633, | |
| "grad_norm": 0.2686966061592102, | |
| "learning_rate": 0.00016286719356754535, | |
| "loss": 0.7988, | |
| "step": 15760 | |
| }, | |
| { | |
| "epoch": 0.176377494813249, | |
| "grad_norm": 0.26220840215682983, | |
| "learning_rate": 0.00016263876833112523, | |
| "loss": 0.7936, | |
| "step": 15770 | |
| }, | |
| { | |
| "epoch": 0.17648933850051168, | |
| "grad_norm": 0.260547012090683, | |
| "learning_rate": 0.00016241034309470508, | |
| "loss": 0.8002, | |
| "step": 15780 | |
| }, | |
| { | |
| "epoch": 0.17660118218777437, | |
| "grad_norm": 0.22341471910476685, | |
| "learning_rate": 0.000162181917858285, | |
| "loss": 0.7935, | |
| "step": 15790 | |
| }, | |
| { | |
| "epoch": 0.17671302587503704, | |
| "grad_norm": 0.24994009733200073, | |
| "learning_rate": 0.00016195349262186488, | |
| "loss": 0.7971, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 0.17682486956229973, | |
| "grad_norm": 0.24070651829242706, | |
| "learning_rate": 0.00016172506738544473, | |
| "loss": 0.7844, | |
| "step": 15810 | |
| }, | |
| { | |
| "epoch": 0.17693671324956242, | |
| "grad_norm": 0.23858696222305298, | |
| "learning_rate": 0.00016149664214902461, | |
| "loss": 0.7687, | |
| "step": 15820 | |
| }, | |
| { | |
| "epoch": 0.17704855693682509, | |
| "grad_norm": 0.24684946238994598, | |
| "learning_rate": 0.00016126821691260452, | |
| "loss": 0.7848, | |
| "step": 15830 | |
| }, | |
| { | |
| "epoch": 0.17716040062408778, | |
| "grad_norm": 0.2525545656681061, | |
| "learning_rate": 0.00016103979167618438, | |
| "loss": 0.773, | |
| "step": 15840 | |
| }, | |
| { | |
| "epoch": 0.17727224431135047, | |
| "grad_norm": 0.2485392689704895, | |
| "learning_rate": 0.00016081136643976426, | |
| "loss": 0.7787, | |
| "step": 15850 | |
| }, | |
| { | |
| "epoch": 0.17738408799861313, | |
| "grad_norm": 0.2384241223335266, | |
| "learning_rate": 0.00016058294120334415, | |
| "loss": 0.7732, | |
| "step": 15860 | |
| }, | |
| { | |
| "epoch": 0.17749593168587582, | |
| "grad_norm": 0.25029659271240234, | |
| "learning_rate": 0.00016035451596692403, | |
| "loss": 0.7819, | |
| "step": 15870 | |
| }, | |
| { | |
| "epoch": 0.1776077753731385, | |
| "grad_norm": 0.2988499701023102, | |
| "learning_rate": 0.0001601260907305039, | |
| "loss": 0.7815, | |
| "step": 15880 | |
| }, | |
| { | |
| "epoch": 0.17771961906040118, | |
| "grad_norm": 0.25840380787849426, | |
| "learning_rate": 0.0001598976654940838, | |
| "loss": 0.7899, | |
| "step": 15890 | |
| }, | |
| { | |
| "epoch": 0.17783146274766387, | |
| "grad_norm": 0.2870889902114868, | |
| "learning_rate": 0.00015966924025766365, | |
| "loss": 0.7964, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 0.17794330643492653, | |
| "grad_norm": 0.270702987909317, | |
| "learning_rate": 0.00015944081502124356, | |
| "loss": 0.7907, | |
| "step": 15910 | |
| }, | |
| { | |
| "epoch": 0.17805515012218923, | |
| "grad_norm": 0.24939289689064026, | |
| "learning_rate": 0.00015921238978482344, | |
| "loss": 0.7909, | |
| "step": 15920 | |
| }, | |
| { | |
| "epoch": 0.17816699380945192, | |
| "grad_norm": 0.25692620873451233, | |
| "learning_rate": 0.0001589839645484033, | |
| "loss": 0.7864, | |
| "step": 15930 | |
| }, | |
| { | |
| "epoch": 0.17827883749671458, | |
| "grad_norm": 0.25667235255241394, | |
| "learning_rate": 0.00015875553931198318, | |
| "loss": 0.7792, | |
| "step": 15940 | |
| }, | |
| { | |
| "epoch": 0.17839068118397727, | |
| "grad_norm": 0.27988189458847046, | |
| "learning_rate": 0.0001585271140755631, | |
| "loss": 0.78, | |
| "step": 15950 | |
| }, | |
| { | |
| "epoch": 0.17850252487123996, | |
| "grad_norm": 0.26706936955451965, | |
| "learning_rate": 0.00015829868883914295, | |
| "loss": 0.7764, | |
| "step": 15960 | |
| }, | |
| { | |
| "epoch": 0.17861436855850263, | |
| "grad_norm": 0.25825801491737366, | |
| "learning_rate": 0.00015807026360272283, | |
| "loss": 0.7798, | |
| "step": 15970 | |
| }, | |
| { | |
| "epoch": 0.17872621224576532, | |
| "grad_norm": 0.26630404591560364, | |
| "learning_rate": 0.0001578418383663027, | |
| "loss": 0.7877, | |
| "step": 15980 | |
| }, | |
| { | |
| "epoch": 0.178838055933028, | |
| "grad_norm": 0.24562442302703857, | |
| "learning_rate": 0.0001576134131298826, | |
| "loss": 0.7761, | |
| "step": 15990 | |
| }, | |
| { | |
| "epoch": 0.17894989962029068, | |
| "grad_norm": 0.2607520818710327, | |
| "learning_rate": 0.00015738498789346248, | |
| "loss": 0.7844, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.17906174330755337, | |
| "grad_norm": 0.25256794691085815, | |
| "learning_rate": 0.00015715656265704233, | |
| "loss": 0.7712, | |
| "step": 16010 | |
| }, | |
| { | |
| "epoch": 0.17917358699481606, | |
| "grad_norm": 0.24657808244228363, | |
| "learning_rate": 0.00015692813742062222, | |
| "loss": 0.7766, | |
| "step": 16020 | |
| }, | |
| { | |
| "epoch": 0.17928543068207872, | |
| "grad_norm": 0.2546744644641876, | |
| "learning_rate": 0.00015669971218420213, | |
| "loss": 0.781, | |
| "step": 16030 | |
| }, | |
| { | |
| "epoch": 0.17939727436934141, | |
| "grad_norm": 0.24849241971969604, | |
| "learning_rate": 0.00015647128694778198, | |
| "loss": 0.786, | |
| "step": 16040 | |
| }, | |
| { | |
| "epoch": 0.17950911805660408, | |
| "grad_norm": 0.2447352409362793, | |
| "learning_rate": 0.00015624286171136187, | |
| "loss": 0.7805, | |
| "step": 16050 | |
| }, | |
| { | |
| "epoch": 0.17962096174386677, | |
| "grad_norm": 0.3004114031791687, | |
| "learning_rate": 0.00015601443647494178, | |
| "loss": 0.7748, | |
| "step": 16060 | |
| }, | |
| { | |
| "epoch": 0.17973280543112946, | |
| "grad_norm": 0.24974007904529572, | |
| "learning_rate": 0.00015578601123852163, | |
| "loss": 0.7823, | |
| "step": 16070 | |
| }, | |
| { | |
| "epoch": 0.17984464911839212, | |
| "grad_norm": 0.2995624542236328, | |
| "learning_rate": 0.00015555758600210151, | |
| "loss": 0.7894, | |
| "step": 16080 | |
| }, | |
| { | |
| "epoch": 0.17995649280565482, | |
| "grad_norm": 0.2560220956802368, | |
| "learning_rate": 0.0001553291607656814, | |
| "loss": 0.7849, | |
| "step": 16090 | |
| }, | |
| { | |
| "epoch": 0.1800683364929175, | |
| "grad_norm": 0.24940122663974762, | |
| "learning_rate": 0.00015510073552926128, | |
| "loss": 0.7903, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 0.18018018018018017, | |
| "grad_norm": 0.22082312405109406, | |
| "learning_rate": 0.00015487231029284116, | |
| "loss": 0.783, | |
| "step": 16110 | |
| }, | |
| { | |
| "epoch": 0.18029202386744286, | |
| "grad_norm": 0.2670224606990814, | |
| "learning_rate": 0.00015464388505642104, | |
| "loss": 0.7919, | |
| "step": 16120 | |
| }, | |
| { | |
| "epoch": 0.18040386755470555, | |
| "grad_norm": 0.2533135414123535, | |
| "learning_rate": 0.0001544154598200009, | |
| "loss": 0.8007, | |
| "step": 16130 | |
| }, | |
| { | |
| "epoch": 0.18051571124196822, | |
| "grad_norm": 0.2660861909389496, | |
| "learning_rate": 0.0001541870345835808, | |
| "loss": 0.7913, | |
| "step": 16140 | |
| }, | |
| { | |
| "epoch": 0.1806275549292309, | |
| "grad_norm": 0.2556677460670471, | |
| "learning_rate": 0.0001539586093471607, | |
| "loss": 0.7826, | |
| "step": 16150 | |
| }, | |
| { | |
| "epoch": 0.1807393986164936, | |
| "grad_norm": 0.275900661945343, | |
| "learning_rate": 0.00015373018411074055, | |
| "loss": 0.8048, | |
| "step": 16160 | |
| }, | |
| { | |
| "epoch": 0.18085124230375627, | |
| "grad_norm": 0.29176998138427734, | |
| "learning_rate": 0.00015350175887432043, | |
| "loss": 0.8241, | |
| "step": 16170 | |
| }, | |
| { | |
| "epoch": 0.18096308599101896, | |
| "grad_norm": 0.2635776996612549, | |
| "learning_rate": 0.00015327333363790034, | |
| "loss": 0.8211, | |
| "step": 16180 | |
| }, | |
| { | |
| "epoch": 0.18107492967828162, | |
| "grad_norm": 0.27744734287261963, | |
| "learning_rate": 0.0001530449084014802, | |
| "loss": 0.8254, | |
| "step": 16190 | |
| }, | |
| { | |
| "epoch": 0.1811867733655443, | |
| "grad_norm": 0.28162074089050293, | |
| "learning_rate": 0.00015281648316506008, | |
| "loss": 0.8182, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 0.181298617052807, | |
| "grad_norm": 0.29347339272499084, | |
| "learning_rate": 0.00015258805792863996, | |
| "loss": 0.812, | |
| "step": 16210 | |
| }, | |
| { | |
| "epoch": 0.18141046074006967, | |
| "grad_norm": 0.26170992851257324, | |
| "learning_rate": 0.00015235963269221985, | |
| "loss": 0.8221, | |
| "step": 16220 | |
| }, | |
| { | |
| "epoch": 0.18152230442733236, | |
| "grad_norm": 0.27848196029663086, | |
| "learning_rate": 0.00015213120745579973, | |
| "loss": 0.825, | |
| "step": 16230 | |
| }, | |
| { | |
| "epoch": 0.18163414811459505, | |
| "grad_norm": 0.2994973659515381, | |
| "learning_rate": 0.00015190278221937958, | |
| "loss": 0.8158, | |
| "step": 16240 | |
| }, | |
| { | |
| "epoch": 0.18174599180185771, | |
| "grad_norm": 0.27873843908309937, | |
| "learning_rate": 0.00015167435698295947, | |
| "loss": 0.816, | |
| "step": 16250 | |
| }, | |
| { | |
| "epoch": 0.1818578354891204, | |
| "grad_norm": 0.3014775812625885, | |
| "learning_rate": 0.00015144593174653938, | |
| "loss": 0.8174, | |
| "step": 16260 | |
| }, | |
| { | |
| "epoch": 0.1819696791763831, | |
| "grad_norm": 0.29963594675064087, | |
| "learning_rate": 0.00015121750651011923, | |
| "loss": 0.8104, | |
| "step": 16270 | |
| }, | |
| { | |
| "epoch": 0.18208152286364576, | |
| "grad_norm": 0.3388141393661499, | |
| "learning_rate": 0.00015098908127369912, | |
| "loss": 0.826, | |
| "step": 16280 | |
| }, | |
| { | |
| "epoch": 0.18219336655090845, | |
| "grad_norm": 0.29143062233924866, | |
| "learning_rate": 0.000150760656037279, | |
| "loss": 0.8222, | |
| "step": 16290 | |
| }, | |
| { | |
| "epoch": 0.18230521023817114, | |
| "grad_norm": 0.327824205160141, | |
| "learning_rate": 0.00015053223080085888, | |
| "loss": 0.8186, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 0.1824170539254338, | |
| "grad_norm": 0.3053797483444214, | |
| "learning_rate": 0.00015030380556443876, | |
| "loss": 0.8214, | |
| "step": 16310 | |
| }, | |
| { | |
| "epoch": 0.1825288976126965, | |
| "grad_norm": 0.3030015230178833, | |
| "learning_rate": 0.00015007538032801865, | |
| "loss": 0.8198, | |
| "step": 16320 | |
| }, | |
| { | |
| "epoch": 0.18264074129995916, | |
| "grad_norm": 0.3147192597389221, | |
| "learning_rate": 0.0001498469550915985, | |
| "loss": 0.8224, | |
| "step": 16330 | |
| }, | |
| { | |
| "epoch": 0.18275258498722186, | |
| "grad_norm": 0.2838999927043915, | |
| "learning_rate": 0.0001496185298551784, | |
| "loss": 0.8142, | |
| "step": 16340 | |
| }, | |
| { | |
| "epoch": 0.18286442867448455, | |
| "grad_norm": 0.27273476123809814, | |
| "learning_rate": 0.0001493901046187583, | |
| "loss": 0.8054, | |
| "step": 16350 | |
| }, | |
| { | |
| "epoch": 0.1829762723617472, | |
| "grad_norm": 0.2754770517349243, | |
| "learning_rate": 0.00014916167938233815, | |
| "loss": 0.8131, | |
| "step": 16360 | |
| }, | |
| { | |
| "epoch": 0.1830881160490099, | |
| "grad_norm": 0.29061514139175415, | |
| "learning_rate": 0.00014893325414591803, | |
| "loss": 0.7988, | |
| "step": 16370 | |
| }, | |
| { | |
| "epoch": 0.1831999597362726, | |
| "grad_norm": 0.2525017559528351, | |
| "learning_rate": 0.00014870482890949794, | |
| "loss": 0.8023, | |
| "step": 16380 | |
| }, | |
| { | |
| "epoch": 0.18331180342353526, | |
| "grad_norm": 0.3019058108329773, | |
| "learning_rate": 0.0001484764036730778, | |
| "loss": 0.8077, | |
| "step": 16390 | |
| }, | |
| { | |
| "epoch": 0.18342364711079795, | |
| "grad_norm": 0.302090048789978, | |
| "learning_rate": 0.00014824797843665768, | |
| "loss": 0.812, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 0.18353549079806064, | |
| "grad_norm": 0.29742154479026794, | |
| "learning_rate": 0.00014801955320023757, | |
| "loss": 0.7911, | |
| "step": 16410 | |
| }, | |
| { | |
| "epoch": 0.1836473344853233, | |
| "grad_norm": 0.31950804591178894, | |
| "learning_rate": 0.00014779112796381745, | |
| "loss": 0.7875, | |
| "step": 16420 | |
| }, | |
| { | |
| "epoch": 0.183759178172586, | |
| "grad_norm": 0.32971978187561035, | |
| "learning_rate": 0.00014756270272739733, | |
| "loss": 0.7788, | |
| "step": 16430 | |
| }, | |
| { | |
| "epoch": 0.1838710218598487, | |
| "grad_norm": 0.2941220700740814, | |
| "learning_rate": 0.00014733427749097721, | |
| "loss": 0.7772, | |
| "step": 16440 | |
| }, | |
| { | |
| "epoch": 0.18398286554711135, | |
| "grad_norm": 0.2639923393726349, | |
| "learning_rate": 0.00014710585225455707, | |
| "loss": 0.7708, | |
| "step": 16450 | |
| }, | |
| { | |
| "epoch": 0.18409470923437404, | |
| "grad_norm": 0.2483467161655426, | |
| "learning_rate": 0.00014687742701813698, | |
| "loss": 0.7846, | |
| "step": 16460 | |
| }, | |
| { | |
| "epoch": 0.1842065529216367, | |
| "grad_norm": 0.31150713562965393, | |
| "learning_rate": 0.00014664900178171683, | |
| "loss": 0.7853, | |
| "step": 16470 | |
| }, | |
| { | |
| "epoch": 0.1843183966088994, | |
| "grad_norm": 0.30439406633377075, | |
| "learning_rate": 0.00014642057654529672, | |
| "loss": 0.7779, | |
| "step": 16480 | |
| }, | |
| { | |
| "epoch": 0.1844302402961621, | |
| "grad_norm": 0.29318898916244507, | |
| "learning_rate": 0.00014619215130887663, | |
| "loss": 0.7911, | |
| "step": 16490 | |
| }, | |
| { | |
| "epoch": 0.18454208398342475, | |
| "grad_norm": 0.2726874053478241, | |
| "learning_rate": 0.00014596372607245648, | |
| "loss": 0.7869, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.18465392767068745, | |
| "grad_norm": 0.2978016436100006, | |
| "learning_rate": 0.00014573530083603637, | |
| "loss": 0.783, | |
| "step": 16510 | |
| }, | |
| { | |
| "epoch": 0.18476577135795014, | |
| "grad_norm": 0.3107501268386841, | |
| "learning_rate": 0.00014550687559961625, | |
| "loss": 0.801, | |
| "step": 16520 | |
| }, | |
| { | |
| "epoch": 0.1848776150452128, | |
| "grad_norm": 0.2848517894744873, | |
| "learning_rate": 0.00014527845036319613, | |
| "loss": 0.8063, | |
| "step": 16530 | |
| }, | |
| { | |
| "epoch": 0.1849894587324755, | |
| "grad_norm": 0.2625429332256317, | |
| "learning_rate": 0.00014505002512677601, | |
| "loss": 0.8074, | |
| "step": 16540 | |
| }, | |
| { | |
| "epoch": 0.18510130241973818, | |
| "grad_norm": 0.2805044949054718, | |
| "learning_rate": 0.0001448215998903559, | |
| "loss": 0.8013, | |
| "step": 16550 | |
| }, | |
| { | |
| "epoch": 0.18521314610700085, | |
| "grad_norm": 0.27657589316368103, | |
| "learning_rate": 0.00014459317465393575, | |
| "loss": 0.8012, | |
| "step": 16560 | |
| }, | |
| { | |
| "epoch": 0.18532498979426354, | |
| "grad_norm": 0.2780141532421112, | |
| "learning_rate": 0.00014436474941751566, | |
| "loss": 0.8161, | |
| "step": 16570 | |
| }, | |
| { | |
| "epoch": 0.18543683348152623, | |
| "grad_norm": 0.2871207892894745, | |
| "learning_rate": 0.00014413632418109555, | |
| "loss": 0.7899, | |
| "step": 16580 | |
| }, | |
| { | |
| "epoch": 0.1855486771687889, | |
| "grad_norm": 0.2656658887863159, | |
| "learning_rate": 0.0001439078989446754, | |
| "loss": 0.7985, | |
| "step": 16590 | |
| }, | |
| { | |
| "epoch": 0.1856605208560516, | |
| "grad_norm": 0.2766350209712982, | |
| "learning_rate": 0.00014367947370825528, | |
| "loss": 0.7999, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 0.18577236454331428, | |
| "grad_norm": 0.2616749107837677, | |
| "learning_rate": 0.0001434510484718352, | |
| "loss": 0.8002, | |
| "step": 16610 | |
| }, | |
| { | |
| "epoch": 0.18588420823057694, | |
| "grad_norm": 0.25887414813041687, | |
| "learning_rate": 0.00014322262323541505, | |
| "loss": 0.8112, | |
| "step": 16620 | |
| }, | |
| { | |
| "epoch": 0.18599605191783963, | |
| "grad_norm": 0.2594297528266907, | |
| "learning_rate": 0.00014299419799899493, | |
| "loss": 0.802, | |
| "step": 16630 | |
| }, | |
| { | |
| "epoch": 0.1861078956051023, | |
| "grad_norm": 0.2535499036312103, | |
| "learning_rate": 0.00014276577276257482, | |
| "loss": 0.7867, | |
| "step": 16640 | |
| }, | |
| { | |
| "epoch": 0.186219739292365, | |
| "grad_norm": 0.25161436200141907, | |
| "learning_rate": 0.0001425373475261547, | |
| "loss": 0.8059, | |
| "step": 16650 | |
| }, | |
| { | |
| "epoch": 0.18633158297962768, | |
| "grad_norm": 0.22897444665431976, | |
| "learning_rate": 0.00014230892228973458, | |
| "loss": 0.7864, | |
| "step": 16660 | |
| }, | |
| { | |
| "epoch": 0.18644342666689034, | |
| "grad_norm": 0.27164047956466675, | |
| "learning_rate": 0.00014208049705331446, | |
| "loss": 0.796, | |
| "step": 16670 | |
| }, | |
| { | |
| "epoch": 0.18655527035415304, | |
| "grad_norm": 0.2717941701412201, | |
| "learning_rate": 0.00014185207181689432, | |
| "loss": 0.7801, | |
| "step": 16680 | |
| }, | |
| { | |
| "epoch": 0.18666711404141573, | |
| "grad_norm": 0.27144837379455566, | |
| "learning_rate": 0.00014162364658047423, | |
| "loss": 0.7758, | |
| "step": 16690 | |
| }, | |
| { | |
| "epoch": 0.1867789577286784, | |
| "grad_norm": 0.2357831746339798, | |
| "learning_rate": 0.00014139522134405409, | |
| "loss": 0.7674, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 0.18689080141594108, | |
| "grad_norm": 0.23233544826507568, | |
| "learning_rate": 0.00014116679610763397, | |
| "loss": 0.7827, | |
| "step": 16710 | |
| }, | |
| { | |
| "epoch": 0.18700264510320377, | |
| "grad_norm": 0.2399321347475052, | |
| "learning_rate": 0.00014093837087121385, | |
| "loss": 0.7811, | |
| "step": 16720 | |
| }, | |
| { | |
| "epoch": 0.18711448879046644, | |
| "grad_norm": 0.2493642419576645, | |
| "learning_rate": 0.00014070994563479373, | |
| "loss": 0.7762, | |
| "step": 16730 | |
| }, | |
| { | |
| "epoch": 0.18722633247772913, | |
| "grad_norm": 0.23383350670337677, | |
| "learning_rate": 0.00014048152039837362, | |
| "loss": 0.7754, | |
| "step": 16740 | |
| }, | |
| { | |
| "epoch": 0.18733817616499182, | |
| "grad_norm": 0.2624364197254181, | |
| "learning_rate": 0.0001402530951619535, | |
| "loss": 0.7766, | |
| "step": 16750 | |
| }, | |
| { | |
| "epoch": 0.18745001985225448, | |
| "grad_norm": 0.24138151109218597, | |
| "learning_rate": 0.00014002466992553336, | |
| "loss": 0.7869, | |
| "step": 16760 | |
| }, | |
| { | |
| "epoch": 0.18756186353951718, | |
| "grad_norm": 0.2397204041481018, | |
| "learning_rate": 0.00013979624468911326, | |
| "loss": 0.7974, | |
| "step": 16770 | |
| }, | |
| { | |
| "epoch": 0.18767370722677984, | |
| "grad_norm": 0.27491655945777893, | |
| "learning_rate": 0.00013956781945269315, | |
| "loss": 0.8011, | |
| "step": 16780 | |
| }, | |
| { | |
| "epoch": 0.18778555091404253, | |
| "grad_norm": 0.2321402132511139, | |
| "learning_rate": 0.000139339394216273, | |
| "loss": 0.803, | |
| "step": 16790 | |
| }, | |
| { | |
| "epoch": 0.18789739460130522, | |
| "grad_norm": 0.24487042427062988, | |
| "learning_rate": 0.00013911096897985289, | |
| "loss": 0.7975, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 0.1880092382885679, | |
| "grad_norm": 0.23328396677970886, | |
| "learning_rate": 0.0001388825437434328, | |
| "loss": 0.795, | |
| "step": 16810 | |
| }, | |
| { | |
| "epoch": 0.18812108197583058, | |
| "grad_norm": 0.22705566883087158, | |
| "learning_rate": 0.00013865411850701265, | |
| "loss": 0.7895, | |
| "step": 16820 | |
| }, | |
| { | |
| "epoch": 0.18823292566309327, | |
| "grad_norm": 0.24339929223060608, | |
| "learning_rate": 0.00013842569327059253, | |
| "loss": 0.7931, | |
| "step": 16830 | |
| }, | |
| { | |
| "epoch": 0.18834476935035593, | |
| "grad_norm": 0.2613057494163513, | |
| "learning_rate": 0.00013819726803417242, | |
| "loss": 0.7785, | |
| "step": 16840 | |
| }, | |
| { | |
| "epoch": 0.18845661303761863, | |
| "grad_norm": 0.27011603116989136, | |
| "learning_rate": 0.0001379688427977523, | |
| "loss": 0.7853, | |
| "step": 16850 | |
| }, | |
| { | |
| "epoch": 0.18856845672488132, | |
| "grad_norm": 0.26589342951774597, | |
| "learning_rate": 0.00013774041756133218, | |
| "loss": 0.7893, | |
| "step": 16860 | |
| }, | |
| { | |
| "epoch": 0.18868030041214398, | |
| "grad_norm": 0.26286208629608154, | |
| "learning_rate": 0.00013751199232491207, | |
| "loss": 0.7707, | |
| "step": 16870 | |
| }, | |
| { | |
| "epoch": 0.18879214409940667, | |
| "grad_norm": 0.3021993637084961, | |
| "learning_rate": 0.00013728356708849192, | |
| "loss": 0.7896, | |
| "step": 16880 | |
| }, | |
| { | |
| "epoch": 0.18890398778666936, | |
| "grad_norm": 0.30742523074150085, | |
| "learning_rate": 0.00013705514185207183, | |
| "loss": 0.7895, | |
| "step": 16890 | |
| }, | |
| { | |
| "epoch": 0.18901583147393203, | |
| "grad_norm": 0.3027999699115753, | |
| "learning_rate": 0.0001368267166156517, | |
| "loss": 0.7839, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 0.18912767516119472, | |
| "grad_norm": 0.29199281334877014, | |
| "learning_rate": 0.00013659829137923157, | |
| "loss": 0.7771, | |
| "step": 16910 | |
| }, | |
| { | |
| "epoch": 0.18923951884845738, | |
| "grad_norm": 0.2460477203130722, | |
| "learning_rate": 0.00013636986614281145, | |
| "loss": 0.7823, | |
| "step": 16920 | |
| }, | |
| { | |
| "epoch": 0.18935136253572007, | |
| "grad_norm": 0.2608555853366852, | |
| "learning_rate": 0.00013614144090639134, | |
| "loss": 0.7664, | |
| "step": 16930 | |
| }, | |
| { | |
| "epoch": 0.18946320622298277, | |
| "grad_norm": 0.2723162770271301, | |
| "learning_rate": 0.00013591301566997122, | |
| "loss": 0.7768, | |
| "step": 16940 | |
| }, | |
| { | |
| "epoch": 0.18957504991024543, | |
| "grad_norm": 0.2690962255001068, | |
| "learning_rate": 0.0001356845904335511, | |
| "loss": 0.7697, | |
| "step": 16950 | |
| }, | |
| { | |
| "epoch": 0.18968689359750812, | |
| "grad_norm": 0.2892717719078064, | |
| "learning_rate": 0.00013545616519713096, | |
| "loss": 0.769, | |
| "step": 16960 | |
| }, | |
| { | |
| "epoch": 0.1897987372847708, | |
| "grad_norm": 0.2581406533718109, | |
| "learning_rate": 0.00013522773996071087, | |
| "loss": 0.7766, | |
| "step": 16970 | |
| }, | |
| { | |
| "epoch": 0.18991058097203348, | |
| "grad_norm": 0.2944723963737488, | |
| "learning_rate": 0.00013499931472429075, | |
| "loss": 0.7638, | |
| "step": 16980 | |
| }, | |
| { | |
| "epoch": 0.19002242465929617, | |
| "grad_norm": 0.2776504158973694, | |
| "learning_rate": 0.0001347708894878706, | |
| "loss": 0.7731, | |
| "step": 16990 | |
| }, | |
| { | |
| "epoch": 0.19013426834655886, | |
| "grad_norm": 0.267098993062973, | |
| "learning_rate": 0.00013454246425145052, | |
| "loss": 0.7772, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.19024611203382152, | |
| "grad_norm": 0.2806127071380615, | |
| "learning_rate": 0.0001343140390150304, | |
| "loss": 0.772, | |
| "step": 17010 | |
| }, | |
| { | |
| "epoch": 0.19035795572108422, | |
| "grad_norm": 0.2872319519519806, | |
| "learning_rate": 0.00013408561377861025, | |
| "loss": 0.7695, | |
| "step": 17020 | |
| }, | |
| { | |
| "epoch": 0.1904697994083469, | |
| "grad_norm": 0.24477818608283997, | |
| "learning_rate": 0.00013385718854219014, | |
| "loss": 0.7764, | |
| "step": 17030 | |
| }, | |
| { | |
| "epoch": 0.19058164309560957, | |
| "grad_norm": 0.2637476623058319, | |
| "learning_rate": 0.00013362876330577005, | |
| "loss": 0.7712, | |
| "step": 17040 | |
| }, | |
| { | |
| "epoch": 0.19069348678287226, | |
| "grad_norm": 0.2676442861557007, | |
| "learning_rate": 0.0001334003380693499, | |
| "loss": 0.7707, | |
| "step": 17050 | |
| }, | |
| { | |
| "epoch": 0.19080533047013493, | |
| "grad_norm": 0.2592306435108185, | |
| "learning_rate": 0.00013317191283292979, | |
| "loss": 0.7808, | |
| "step": 17060 | |
| }, | |
| { | |
| "epoch": 0.19091717415739762, | |
| "grad_norm": 0.3543199896812439, | |
| "learning_rate": 0.00013294348759650967, | |
| "loss": 0.7928, | |
| "step": 17070 | |
| }, | |
| { | |
| "epoch": 0.1910290178446603, | |
| "grad_norm": 0.26262548565864563, | |
| "learning_rate": 0.00013271506236008955, | |
| "loss": 0.7677, | |
| "step": 17080 | |
| }, | |
| { | |
| "epoch": 0.19114086153192297, | |
| "grad_norm": 0.2845424711704254, | |
| "learning_rate": 0.00013248663712366943, | |
| "loss": 0.7758, | |
| "step": 17090 | |
| }, | |
| { | |
| "epoch": 0.19125270521918566, | |
| "grad_norm": 0.2694297730922699, | |
| "learning_rate": 0.00013225821188724932, | |
| "loss": 0.7857, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 0.19136454890644836, | |
| "grad_norm": 0.2682325839996338, | |
| "learning_rate": 0.00013202978665082917, | |
| "loss": 0.782, | |
| "step": 17110 | |
| }, | |
| { | |
| "epoch": 0.19147639259371102, | |
| "grad_norm": 0.26535049080848694, | |
| "learning_rate": 0.00013180136141440908, | |
| "loss": 0.7796, | |
| "step": 17120 | |
| }, | |
| { | |
| "epoch": 0.1915882362809737, | |
| "grad_norm": 0.2759861946105957, | |
| "learning_rate": 0.00013157293617798894, | |
| "loss": 0.7732, | |
| "step": 17130 | |
| }, | |
| { | |
| "epoch": 0.1917000799682364, | |
| "grad_norm": 0.24873244762420654, | |
| "learning_rate": 0.00013134451094156882, | |
| "loss": 0.7763, | |
| "step": 17140 | |
| }, | |
| { | |
| "epoch": 0.19181192365549907, | |
| "grad_norm": 0.2826152443885803, | |
| "learning_rate": 0.0001311160857051487, | |
| "loss": 0.7748, | |
| "step": 17150 | |
| }, | |
| { | |
| "epoch": 0.19192376734276176, | |
| "grad_norm": 0.2823798358440399, | |
| "learning_rate": 0.00013088766046872859, | |
| "loss": 0.768, | |
| "step": 17160 | |
| }, | |
| { | |
| "epoch": 0.19203561103002445, | |
| "grad_norm": 0.2591745853424072, | |
| "learning_rate": 0.00013065923523230847, | |
| "loss": 0.7831, | |
| "step": 17170 | |
| }, | |
| { | |
| "epoch": 0.19214745471728711, | |
| "grad_norm": 0.24773742258548737, | |
| "learning_rate": 0.00013043080999588835, | |
| "loss": 0.7799, | |
| "step": 17180 | |
| }, | |
| { | |
| "epoch": 0.1922592984045498, | |
| "grad_norm": 0.28184765577316284, | |
| "learning_rate": 0.0001302023847594682, | |
| "loss": 0.787, | |
| "step": 17190 | |
| }, | |
| { | |
| "epoch": 0.19237114209181247, | |
| "grad_norm": 0.24396668374538422, | |
| "learning_rate": 0.00012997395952304812, | |
| "loss": 0.7777, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 0.19248298577907516, | |
| "grad_norm": 0.25493332743644714, | |
| "learning_rate": 0.000129745534286628, | |
| "loss": 0.7842, | |
| "step": 17210 | |
| }, | |
| { | |
| "epoch": 0.19259482946633785, | |
| "grad_norm": 0.2615022361278534, | |
| "learning_rate": 0.00012951710905020786, | |
| "loss": 0.788, | |
| "step": 17220 | |
| }, | |
| { | |
| "epoch": 0.19270667315360052, | |
| "grad_norm": 0.28270524740219116, | |
| "learning_rate": 0.00012928868381378774, | |
| "loss": 0.7788, | |
| "step": 17230 | |
| }, | |
| { | |
| "epoch": 0.1928185168408632, | |
| "grad_norm": 0.24917210638523102, | |
| "learning_rate": 0.00012906025857736765, | |
| "loss": 0.7731, | |
| "step": 17240 | |
| }, | |
| { | |
| "epoch": 0.1929303605281259, | |
| "grad_norm": 0.2589946985244751, | |
| "learning_rate": 0.0001288318333409475, | |
| "loss": 0.7781, | |
| "step": 17250 | |
| }, | |
| { | |
| "epoch": 0.19304220421538856, | |
| "grad_norm": 0.23770585656166077, | |
| "learning_rate": 0.0001286034081045274, | |
| "loss": 0.7902, | |
| "step": 17260 | |
| }, | |
| { | |
| "epoch": 0.19315404790265125, | |
| "grad_norm": 0.22782771289348602, | |
| "learning_rate": 0.00012837498286810727, | |
| "loss": 0.7875, | |
| "step": 17270 | |
| }, | |
| { | |
| "epoch": 0.19326589158991395, | |
| "grad_norm": 0.2611001431941986, | |
| "learning_rate": 0.00012814655763168715, | |
| "loss": 0.794, | |
| "step": 17280 | |
| }, | |
| { | |
| "epoch": 0.1933777352771766, | |
| "grad_norm": 0.2642746865749359, | |
| "learning_rate": 0.00012791813239526704, | |
| "loss": 0.8005, | |
| "step": 17290 | |
| }, | |
| { | |
| "epoch": 0.1934895789644393, | |
| "grad_norm": 0.2470688372850418, | |
| "learning_rate": 0.00012768970715884692, | |
| "loss": 0.7854, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 0.193601422651702, | |
| "grad_norm": 0.24735964834690094, | |
| "learning_rate": 0.00012746128192242677, | |
| "loss": 0.7918, | |
| "step": 17310 | |
| }, | |
| { | |
| "epoch": 0.19371326633896466, | |
| "grad_norm": 0.2734208405017853, | |
| "learning_rate": 0.00012723285668600668, | |
| "loss": 0.7719, | |
| "step": 17320 | |
| }, | |
| { | |
| "epoch": 0.19382511002622735, | |
| "grad_norm": 0.28373652696609497, | |
| "learning_rate": 0.00012700443144958657, | |
| "loss": 0.7743, | |
| "step": 17330 | |
| }, | |
| { | |
| "epoch": 0.19393695371349004, | |
| "grad_norm": 0.25755295157432556, | |
| "learning_rate": 0.00012677600621316642, | |
| "loss": 0.7761, | |
| "step": 17340 | |
| }, | |
| { | |
| "epoch": 0.1940487974007527, | |
| "grad_norm": 0.2918241322040558, | |
| "learning_rate": 0.0001265475809767463, | |
| "loss": 0.7885, | |
| "step": 17350 | |
| }, | |
| { | |
| "epoch": 0.1941606410880154, | |
| "grad_norm": 0.2589518427848816, | |
| "learning_rate": 0.0001263191557403262, | |
| "loss": 0.7781, | |
| "step": 17360 | |
| }, | |
| { | |
| "epoch": 0.19427248477527806, | |
| "grad_norm": 0.2941739857196808, | |
| "learning_rate": 0.00012609073050390607, | |
| "loss": 0.7896, | |
| "step": 17370 | |
| }, | |
| { | |
| "epoch": 0.19438432846254075, | |
| "grad_norm": 0.2625831663608551, | |
| "learning_rate": 0.00012586230526748595, | |
| "loss": 0.7797, | |
| "step": 17380 | |
| }, | |
| { | |
| "epoch": 0.19449617214980344, | |
| "grad_norm": 0.2731517255306244, | |
| "learning_rate": 0.0001256338800310658, | |
| "loss": 0.7861, | |
| "step": 17390 | |
| }, | |
| { | |
| "epoch": 0.1946080158370661, | |
| "grad_norm": 0.2802453637123108, | |
| "learning_rate": 0.00012540545479464572, | |
| "loss": 0.8066, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 0.1947198595243288, | |
| "grad_norm": 0.24151596426963806, | |
| "learning_rate": 0.0001251770295582256, | |
| "loss": 0.7746, | |
| "step": 17410 | |
| }, | |
| { | |
| "epoch": 0.1948317032115915, | |
| "grad_norm": 0.27006617188453674, | |
| "learning_rate": 0.00012494860432180549, | |
| "loss": 0.7796, | |
| "step": 17420 | |
| }, | |
| { | |
| "epoch": 0.19494354689885415, | |
| "grad_norm": 0.2574283480644226, | |
| "learning_rate": 0.00012472017908538537, | |
| "loss": 0.7809, | |
| "step": 17430 | |
| }, | |
| { | |
| "epoch": 0.19505539058611684, | |
| "grad_norm": 0.25741514563560486, | |
| "learning_rate": 0.00012449175384896522, | |
| "loss": 0.7792, | |
| "step": 17440 | |
| }, | |
| { | |
| "epoch": 0.19516723427337954, | |
| "grad_norm": 0.2619360685348511, | |
| "learning_rate": 0.00012426332861254513, | |
| "loss": 0.7768, | |
| "step": 17450 | |
| }, | |
| { | |
| "epoch": 0.1952790779606422, | |
| "grad_norm": 0.28053224086761475, | |
| "learning_rate": 0.000124034903376125, | |
| "loss": 0.7841, | |
| "step": 17460 | |
| }, | |
| { | |
| "epoch": 0.1953909216479049, | |
| "grad_norm": 0.24019859731197357, | |
| "learning_rate": 0.00012380647813970487, | |
| "loss": 0.783, | |
| "step": 17470 | |
| }, | |
| { | |
| "epoch": 0.19550276533516758, | |
| "grad_norm": 0.2747540771961212, | |
| "learning_rate": 0.00012357805290328475, | |
| "loss": 0.7911, | |
| "step": 17480 | |
| }, | |
| { | |
| "epoch": 0.19561460902243025, | |
| "grad_norm": 0.28044483065605164, | |
| "learning_rate": 0.00012334962766686464, | |
| "loss": 0.7986, | |
| "step": 17490 | |
| }, | |
| { | |
| "epoch": 0.19572645270969294, | |
| "grad_norm": 0.24908137321472168, | |
| "learning_rate": 0.00012312120243044452, | |
| "loss": 0.8087, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.1958382963969556, | |
| "grad_norm": 0.29041793942451477, | |
| "learning_rate": 0.0001228927771940244, | |
| "loss": 0.8063, | |
| "step": 17510 | |
| }, | |
| { | |
| "epoch": 0.1959501400842183, | |
| "grad_norm": 0.3020537495613098, | |
| "learning_rate": 0.00012266435195760429, | |
| "loss": 0.8004, | |
| "step": 17520 | |
| }, | |
| { | |
| "epoch": 0.19606198377148099, | |
| "grad_norm": 0.29414400458335876, | |
| "learning_rate": 0.00012243592672118417, | |
| "loss": 0.7846, | |
| "step": 17530 | |
| }, | |
| { | |
| "epoch": 0.19617382745874365, | |
| "grad_norm": 0.2648397386074066, | |
| "learning_rate": 0.00012220750148476402, | |
| "loss": 0.7708, | |
| "step": 17540 | |
| }, | |
| { | |
| "epoch": 0.19628567114600634, | |
| "grad_norm": 0.2834302484989166, | |
| "learning_rate": 0.00012197907624834392, | |
| "loss": 0.7818, | |
| "step": 17550 | |
| }, | |
| { | |
| "epoch": 0.19639751483326903, | |
| "grad_norm": 0.2748505175113678, | |
| "learning_rate": 0.0001217506510119238, | |
| "loss": 0.7642, | |
| "step": 17560 | |
| }, | |
| { | |
| "epoch": 0.1965093585205317, | |
| "grad_norm": 0.32425326108932495, | |
| "learning_rate": 0.00012152222577550367, | |
| "loss": 0.7765, | |
| "step": 17570 | |
| }, | |
| { | |
| "epoch": 0.1966212022077944, | |
| "grad_norm": 0.27183324098587036, | |
| "learning_rate": 0.00012129380053908357, | |
| "loss": 0.7572, | |
| "step": 17580 | |
| }, | |
| { | |
| "epoch": 0.19673304589505708, | |
| "grad_norm": 0.28190943598747253, | |
| "learning_rate": 0.00012106537530266344, | |
| "loss": 0.7571, | |
| "step": 17590 | |
| }, | |
| { | |
| "epoch": 0.19684488958231974, | |
| "grad_norm": 0.5151196718215942, | |
| "learning_rate": 0.00012083695006624332, | |
| "loss": 0.7565, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 0.19695673326958243, | |
| "grad_norm": 0.2523132264614105, | |
| "learning_rate": 0.0001206085248298232, | |
| "loss": 0.7597, | |
| "step": 17610 | |
| }, | |
| { | |
| "epoch": 0.19706857695684513, | |
| "grad_norm": 0.27336063981056213, | |
| "learning_rate": 0.00012038009959340309, | |
| "loss": 0.7546, | |
| "step": 17620 | |
| }, | |
| { | |
| "epoch": 0.1971804206441078, | |
| "grad_norm": 0.25119057297706604, | |
| "learning_rate": 0.00012015167435698296, | |
| "loss": 0.7519, | |
| "step": 17630 | |
| }, | |
| { | |
| "epoch": 0.19729226433137048, | |
| "grad_norm": 0.281147301197052, | |
| "learning_rate": 0.00011992324912056284, | |
| "loss": 0.7623, | |
| "step": 17640 | |
| }, | |
| { | |
| "epoch": 0.19740410801863315, | |
| "grad_norm": 0.2463361769914627, | |
| "learning_rate": 0.00011969482388414272, | |
| "loss": 0.754, | |
| "step": 17650 | |
| }, | |
| { | |
| "epoch": 0.19751595170589584, | |
| "grad_norm": 0.2902059853076935, | |
| "learning_rate": 0.0001194663986477226, | |
| "loss": 0.7578, | |
| "step": 17660 | |
| }, | |
| { | |
| "epoch": 0.19762779539315853, | |
| "grad_norm": 0.2590588629245758, | |
| "learning_rate": 0.00011923797341130247, | |
| "loss": 0.7427, | |
| "step": 17670 | |
| }, | |
| { | |
| "epoch": 0.1977396390804212, | |
| "grad_norm": 0.24349506199359894, | |
| "learning_rate": 0.00011900954817488237, | |
| "loss": 0.7599, | |
| "step": 17680 | |
| }, | |
| { | |
| "epoch": 0.19785148276768388, | |
| "grad_norm": 0.2568139135837555, | |
| "learning_rate": 0.00011878112293846224, | |
| "loss": 0.7673, | |
| "step": 17690 | |
| }, | |
| { | |
| "epoch": 0.19796332645494658, | |
| "grad_norm": 0.2617419958114624, | |
| "learning_rate": 0.00011855269770204212, | |
| "loss": 0.7637, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 0.19807517014220924, | |
| "grad_norm": 0.24309082329273224, | |
| "learning_rate": 0.000118324272465622, | |
| "loss": 0.7583, | |
| "step": 17710 | |
| }, | |
| { | |
| "epoch": 0.19818701382947193, | |
| "grad_norm": 0.22027656435966492, | |
| "learning_rate": 0.00011809584722920189, | |
| "loss": 0.7479, | |
| "step": 17720 | |
| }, | |
| { | |
| "epoch": 0.19829885751673462, | |
| "grad_norm": 0.27296265959739685, | |
| "learning_rate": 0.00011786742199278176, | |
| "loss": 0.765, | |
| "step": 17730 | |
| }, | |
| { | |
| "epoch": 0.1984107012039973, | |
| "grad_norm": 0.2589128613471985, | |
| "learning_rate": 0.00011763899675636165, | |
| "loss": 0.777, | |
| "step": 17740 | |
| }, | |
| { | |
| "epoch": 0.19852254489125998, | |
| "grad_norm": 0.27665242552757263, | |
| "learning_rate": 0.00011741057151994152, | |
| "loss": 0.7656, | |
| "step": 17750 | |
| }, | |
| { | |
| "epoch": 0.19863438857852267, | |
| "grad_norm": 0.27103251218795776, | |
| "learning_rate": 0.0001171821462835214, | |
| "loss": 0.7716, | |
| "step": 17760 | |
| }, | |
| { | |
| "epoch": 0.19874623226578533, | |
| "grad_norm": 0.2768172025680542, | |
| "learning_rate": 0.00011695372104710127, | |
| "loss": 0.7738, | |
| "step": 17770 | |
| }, | |
| { | |
| "epoch": 0.19885807595304802, | |
| "grad_norm": 0.2424757182598114, | |
| "learning_rate": 0.00011672529581068117, | |
| "loss": 0.7793, | |
| "step": 17780 | |
| }, | |
| { | |
| "epoch": 0.1989699196403107, | |
| "grad_norm": 0.2821860909461975, | |
| "learning_rate": 0.00011649687057426104, | |
| "loss": 0.7771, | |
| "step": 17790 | |
| }, | |
| { | |
| "epoch": 0.19908176332757338, | |
| "grad_norm": 0.28263264894485474, | |
| "learning_rate": 0.00011626844533784092, | |
| "loss": 0.7812, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 0.19919360701483607, | |
| "grad_norm": 0.24835869669914246, | |
| "learning_rate": 0.0001160400201014208, | |
| "loss": 0.7753, | |
| "step": 17810 | |
| }, | |
| { | |
| "epoch": 0.19930545070209874, | |
| "grad_norm": 0.23325562477111816, | |
| "learning_rate": 0.00011581159486500069, | |
| "loss": 0.7763, | |
| "step": 17820 | |
| }, | |
| { | |
| "epoch": 0.19941729438936143, | |
| "grad_norm": 0.2520182132720947, | |
| "learning_rate": 0.00011558316962858056, | |
| "loss": 0.791, | |
| "step": 17830 | |
| }, | |
| { | |
| "epoch": 0.19952913807662412, | |
| "grad_norm": 0.2478768676519394, | |
| "learning_rate": 0.00011535474439216045, | |
| "loss": 0.7819, | |
| "step": 17840 | |
| }, | |
| { | |
| "epoch": 0.19964098176388678, | |
| "grad_norm": 0.2749478220939636, | |
| "learning_rate": 0.00011512631915574032, | |
| "loss": 0.7805, | |
| "step": 17850 | |
| }, | |
| { | |
| "epoch": 0.19975282545114947, | |
| "grad_norm": 0.2417723685503006, | |
| "learning_rate": 0.0001148978939193202, | |
| "loss": 0.766, | |
| "step": 17860 | |
| }, | |
| { | |
| "epoch": 0.19986466913841217, | |
| "grad_norm": 0.25219354033470154, | |
| "learning_rate": 0.00011466946868290008, | |
| "loss": 0.758, | |
| "step": 17870 | |
| }, | |
| { | |
| "epoch": 0.19997651282567483, | |
| "grad_norm": 0.24644000828266144, | |
| "learning_rate": 0.00011444104344647997, | |
| "loss": 0.7569, | |
| "step": 17880 | |
| }, | |
| { | |
| "epoch": 0.20008835651293752, | |
| "grad_norm": 0.2683338224887848, | |
| "learning_rate": 0.00011421261821005986, | |
| "loss": 0.7509, | |
| "step": 17890 | |
| }, | |
| { | |
| "epoch": 0.2002002002002002, | |
| "grad_norm": 0.29149681329727173, | |
| "learning_rate": 0.00011398419297363972, | |
| "loss": 0.7611, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 0.20031204388746288, | |
| "grad_norm": 0.2651118338108063, | |
| "learning_rate": 0.00011375576773721962, | |
| "loss": 0.756, | |
| "step": 17910 | |
| }, | |
| { | |
| "epoch": 0.20042388757472557, | |
| "grad_norm": 0.26990607380867004, | |
| "learning_rate": 0.00011352734250079949, | |
| "loss": 0.7726, | |
| "step": 17920 | |
| }, | |
| { | |
| "epoch": 0.20053573126198823, | |
| "grad_norm": 0.23897935450077057, | |
| "learning_rate": 0.00011329891726437937, | |
| "loss": 0.7875, | |
| "step": 17930 | |
| }, | |
| { | |
| "epoch": 0.20064757494925092, | |
| "grad_norm": 0.2300727218389511, | |
| "learning_rate": 0.00011307049202795926, | |
| "loss": 0.7697, | |
| "step": 17940 | |
| }, | |
| { | |
| "epoch": 0.20075941863651361, | |
| "grad_norm": 0.2873596251010895, | |
| "learning_rate": 0.00011284206679153914, | |
| "loss": 0.7776, | |
| "step": 17950 | |
| }, | |
| { | |
| "epoch": 0.20087126232377628, | |
| "grad_norm": 0.29036712646484375, | |
| "learning_rate": 0.00011261364155511901, | |
| "loss": 0.7794, | |
| "step": 17960 | |
| }, | |
| { | |
| "epoch": 0.20098310601103897, | |
| "grad_norm": 0.2837420701980591, | |
| "learning_rate": 0.0001123852163186989, | |
| "loss": 0.7818, | |
| "step": 17970 | |
| }, | |
| { | |
| "epoch": 0.20109494969830166, | |
| "grad_norm": 0.2920686602592468, | |
| "learning_rate": 0.00011215679108227877, | |
| "loss": 0.7851, | |
| "step": 17980 | |
| }, | |
| { | |
| "epoch": 0.20120679338556433, | |
| "grad_norm": 0.27664583921432495, | |
| "learning_rate": 0.00011192836584585866, | |
| "loss": 0.7601, | |
| "step": 17990 | |
| }, | |
| { | |
| "epoch": 0.20131863707282702, | |
| "grad_norm": 0.26870399713516235, | |
| "learning_rate": 0.00011169994060943853, | |
| "loss": 0.7961, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.2014304807600897, | |
| "grad_norm": 0.2502228021621704, | |
| "learning_rate": 0.00011147151537301842, | |
| "loss": 0.7827, | |
| "step": 18010 | |
| }, | |
| { | |
| "epoch": 0.20154232444735237, | |
| "grad_norm": 0.2473440319299698, | |
| "learning_rate": 0.00011124309013659829, | |
| "loss": 0.7815, | |
| "step": 18020 | |
| }, | |
| { | |
| "epoch": 0.20165416813461506, | |
| "grad_norm": 0.2513076663017273, | |
| "learning_rate": 0.00011101466490017817, | |
| "loss": 0.7675, | |
| "step": 18030 | |
| }, | |
| { | |
| "epoch": 0.20176601182187776, | |
| "grad_norm": 0.2829226851463318, | |
| "learning_rate": 0.00011078623966375806, | |
| "loss": 0.7669, | |
| "step": 18040 | |
| }, | |
| { | |
| "epoch": 0.20187785550914042, | |
| "grad_norm": 0.25758418440818787, | |
| "learning_rate": 0.00011055781442733794, | |
| "loss": 0.7707, | |
| "step": 18050 | |
| }, | |
| { | |
| "epoch": 0.2019896991964031, | |
| "grad_norm": 0.27185285091400146, | |
| "learning_rate": 0.00011032938919091781, | |
| "loss": 0.7742, | |
| "step": 18060 | |
| }, | |
| { | |
| "epoch": 0.2021015428836658, | |
| "grad_norm": 0.2802230417728424, | |
| "learning_rate": 0.0001101009639544977, | |
| "loss": 0.7821, | |
| "step": 18070 | |
| }, | |
| { | |
| "epoch": 0.20221338657092847, | |
| "grad_norm": 0.2882921099662781, | |
| "learning_rate": 0.00010987253871807757, | |
| "loss": 0.779, | |
| "step": 18080 | |
| }, | |
| { | |
| "epoch": 0.20232523025819116, | |
| "grad_norm": 0.2569839358329773, | |
| "learning_rate": 0.00010964411348165746, | |
| "loss": 0.7694, | |
| "step": 18090 | |
| }, | |
| { | |
| "epoch": 0.20243707394545382, | |
| "grad_norm": 0.2600938379764557, | |
| "learning_rate": 0.00010941568824523733, | |
| "loss": 0.7781, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 0.2025489176327165, | |
| "grad_norm": 0.28083154559135437, | |
| "learning_rate": 0.00010918726300881722, | |
| "loss": 0.7799, | |
| "step": 18110 | |
| }, | |
| { | |
| "epoch": 0.2026607613199792, | |
| "grad_norm": 0.22990182042121887, | |
| "learning_rate": 0.00010895883777239709, | |
| "loss": 0.7883, | |
| "step": 18120 | |
| }, | |
| { | |
| "epoch": 0.20277260500724187, | |
| "grad_norm": 0.27432581782341003, | |
| "learning_rate": 0.00010873041253597697, | |
| "loss": 0.7942, | |
| "step": 18130 | |
| }, | |
| { | |
| "epoch": 0.20288444869450456, | |
| "grad_norm": 0.2607738971710205, | |
| "learning_rate": 0.00010850198729955686, | |
| "loss": 0.7877, | |
| "step": 18140 | |
| }, | |
| { | |
| "epoch": 0.20299629238176725, | |
| "grad_norm": 0.2818219065666199, | |
| "learning_rate": 0.00010827356206313674, | |
| "loss": 0.7948, | |
| "step": 18150 | |
| }, | |
| { | |
| "epoch": 0.20310813606902992, | |
| "grad_norm": 0.2751563489437103, | |
| "learning_rate": 0.00010804513682671661, | |
| "loss": 0.7836, | |
| "step": 18160 | |
| }, | |
| { | |
| "epoch": 0.2032199797562926, | |
| "grad_norm": 0.2746957242488861, | |
| "learning_rate": 0.0001078167115902965, | |
| "loss": 0.7693, | |
| "step": 18170 | |
| }, | |
| { | |
| "epoch": 0.2033318234435553, | |
| "grad_norm": 0.24990054965019226, | |
| "learning_rate": 0.00010758828635387638, | |
| "loss": 0.7869, | |
| "step": 18180 | |
| }, | |
| { | |
| "epoch": 0.20344366713081796, | |
| "grad_norm": 0.24581623077392578, | |
| "learning_rate": 0.00010735986111745626, | |
| "loss": 0.768, | |
| "step": 18190 | |
| }, | |
| { | |
| "epoch": 0.20355551081808065, | |
| "grad_norm": 0.26637768745422363, | |
| "learning_rate": 0.00010713143588103613, | |
| "loss": 0.7711, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 0.20366735450534335, | |
| "grad_norm": 0.2510250508785248, | |
| "learning_rate": 0.00010690301064461602, | |
| "loss": 0.7748, | |
| "step": 18210 | |
| }, | |
| { | |
| "epoch": 0.203779198192606, | |
| "grad_norm": 0.2378496378660202, | |
| "learning_rate": 0.00010667458540819589, | |
| "loss": 0.7622, | |
| "step": 18220 | |
| }, | |
| { | |
| "epoch": 0.2038910418798687, | |
| "grad_norm": 0.2507869601249695, | |
| "learning_rate": 0.00010644616017177578, | |
| "loss": 0.7739, | |
| "step": 18230 | |
| }, | |
| { | |
| "epoch": 0.20400288556713136, | |
| "grad_norm": 0.24733096361160278, | |
| "learning_rate": 0.00010621773493535566, | |
| "loss": 0.7508, | |
| "step": 18240 | |
| }, | |
| { | |
| "epoch": 0.20411472925439406, | |
| "grad_norm": 0.23383109271526337, | |
| "learning_rate": 0.00010598930969893554, | |
| "loss": 0.7507, | |
| "step": 18250 | |
| }, | |
| { | |
| "epoch": 0.20422657294165675, | |
| "grad_norm": 0.2543237805366516, | |
| "learning_rate": 0.00010576088446251541, | |
| "loss": 0.7578, | |
| "step": 18260 | |
| }, | |
| { | |
| "epoch": 0.2043384166289194, | |
| "grad_norm": 0.25807520747184753, | |
| "learning_rate": 0.00010553245922609531, | |
| "loss": 0.7513, | |
| "step": 18270 | |
| }, | |
| { | |
| "epoch": 0.2044502603161821, | |
| "grad_norm": 0.23354406654834747, | |
| "learning_rate": 0.00010530403398967518, | |
| "loss": 0.7566, | |
| "step": 18280 | |
| }, | |
| { | |
| "epoch": 0.2045621040034448, | |
| "grad_norm": 0.2685154676437378, | |
| "learning_rate": 0.00010507560875325506, | |
| "loss": 0.758, | |
| "step": 18290 | |
| }, | |
| { | |
| "epoch": 0.20467394769070746, | |
| "grad_norm": 0.24349918961524963, | |
| "learning_rate": 0.00010484718351683494, | |
| "loss": 0.7686, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 0.20478579137797015, | |
| "grad_norm": 0.24823498725891113, | |
| "learning_rate": 0.00010461875828041482, | |
| "loss": 0.7659, | |
| "step": 18310 | |
| }, | |
| { | |
| "epoch": 0.20489763506523284, | |
| "grad_norm": 0.2511804401874542, | |
| "learning_rate": 0.0001043903330439947, | |
| "loss": 0.77, | |
| "step": 18320 | |
| }, | |
| { | |
| "epoch": 0.2050094787524955, | |
| "grad_norm": 0.24065516889095306, | |
| "learning_rate": 0.00010416190780757458, | |
| "loss": 0.7677, | |
| "step": 18330 | |
| }, | |
| { | |
| "epoch": 0.2051213224397582, | |
| "grad_norm": 0.2819323241710663, | |
| "learning_rate": 0.00010393348257115447, | |
| "loss": 0.753, | |
| "step": 18340 | |
| }, | |
| { | |
| "epoch": 0.2052331661270209, | |
| "grad_norm": 0.26467952132225037, | |
| "learning_rate": 0.00010370505733473434, | |
| "loss": 0.7826, | |
| "step": 18350 | |
| }, | |
| { | |
| "epoch": 0.20534500981428355, | |
| "grad_norm": 0.22962163388729095, | |
| "learning_rate": 0.00010347663209831423, | |
| "loss": 0.7683, | |
| "step": 18360 | |
| }, | |
| { | |
| "epoch": 0.20545685350154624, | |
| "grad_norm": 0.2582736611366272, | |
| "learning_rate": 0.00010324820686189411, | |
| "loss": 0.7951, | |
| "step": 18370 | |
| }, | |
| { | |
| "epoch": 0.2055686971888089, | |
| "grad_norm": 0.2352149486541748, | |
| "learning_rate": 0.00010301978162547399, | |
| "loss": 0.7577, | |
| "step": 18380 | |
| }, | |
| { | |
| "epoch": 0.2056805408760716, | |
| "grad_norm": 0.25687554478645325, | |
| "learning_rate": 0.00010279135638905386, | |
| "loss": 0.7696, | |
| "step": 18390 | |
| }, | |
| { | |
| "epoch": 0.2057923845633343, | |
| "grad_norm": 0.2579772472381592, | |
| "learning_rate": 0.00010256293115263376, | |
| "loss": 0.7837, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 0.20590422825059695, | |
| "grad_norm": 0.24537009000778198, | |
| "learning_rate": 0.00010233450591621363, | |
| "loss": 0.7799, | |
| "step": 18410 | |
| }, | |
| { | |
| "epoch": 0.20601607193785965, | |
| "grad_norm": 0.2636966109275818, | |
| "learning_rate": 0.00010210608067979351, | |
| "loss": 0.7588, | |
| "step": 18420 | |
| }, | |
| { | |
| "epoch": 0.20612791562512234, | |
| "grad_norm": 0.30670562386512756, | |
| "learning_rate": 0.00010187765544337338, | |
| "loss": 0.771, | |
| "step": 18430 | |
| }, | |
| { | |
| "epoch": 0.206239759312385, | |
| "grad_norm": 0.28400668501853943, | |
| "learning_rate": 0.00010164923020695327, | |
| "loss": 0.7686, | |
| "step": 18440 | |
| }, | |
| { | |
| "epoch": 0.2063516029996477, | |
| "grad_norm": 0.27395951747894287, | |
| "learning_rate": 0.00010142080497053314, | |
| "loss": 0.776, | |
| "step": 18450 | |
| }, | |
| { | |
| "epoch": 0.20646344668691038, | |
| "grad_norm": 0.284868061542511, | |
| "learning_rate": 0.00010119237973411303, | |
| "loss": 0.7864, | |
| "step": 18460 | |
| }, | |
| { | |
| "epoch": 0.20657529037417305, | |
| "grad_norm": 0.2859087586402893, | |
| "learning_rate": 0.00010096395449769291, | |
| "loss": 0.7749, | |
| "step": 18470 | |
| }, | |
| { | |
| "epoch": 0.20668713406143574, | |
| "grad_norm": 0.28758034110069275, | |
| "learning_rate": 0.00010073552926127279, | |
| "loss": 0.7919, | |
| "step": 18480 | |
| }, | |
| { | |
| "epoch": 0.20679897774869843, | |
| "grad_norm": 0.2752404510974884, | |
| "learning_rate": 0.00010050710402485266, | |
| "loss": 0.7808, | |
| "step": 18490 | |
| }, | |
| { | |
| "epoch": 0.2069108214359611, | |
| "grad_norm": 0.30756843090057373, | |
| "learning_rate": 0.00010027867878843256, | |
| "loss": 0.7734, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.2070226651232238, | |
| "grad_norm": 0.2694368064403534, | |
| "learning_rate": 0.00010005025355201243, | |
| "loss": 0.7751, | |
| "step": 18510 | |
| }, | |
| { | |
| "epoch": 0.20713450881048645, | |
| "grad_norm": 0.25838834047317505, | |
| "learning_rate": 9.982182831559231e-05, | |
| "loss": 0.7686, | |
| "step": 18520 | |
| }, | |
| { | |
| "epoch": 0.20724635249774914, | |
| "grad_norm": 0.257729709148407, | |
| "learning_rate": 9.959340307917219e-05, | |
| "loss": 0.7827, | |
| "step": 18530 | |
| }, | |
| { | |
| "epoch": 0.20735819618501183, | |
| "grad_norm": 0.2938844859600067, | |
| "learning_rate": 9.936497784275208e-05, | |
| "loss": 0.7685, | |
| "step": 18540 | |
| }, | |
| { | |
| "epoch": 0.2074700398722745, | |
| "grad_norm": 0.25894027948379517, | |
| "learning_rate": 9.913655260633194e-05, | |
| "loss": 0.7738, | |
| "step": 18550 | |
| }, | |
| { | |
| "epoch": 0.2075818835595372, | |
| "grad_norm": 0.2751148045063019, | |
| "learning_rate": 9.890812736991183e-05, | |
| "loss": 0.7594, | |
| "step": 18560 | |
| }, | |
| { | |
| "epoch": 0.20769372724679988, | |
| "grad_norm": 0.28643253445625305, | |
| "learning_rate": 9.867970213349171e-05, | |
| "loss": 0.7737, | |
| "step": 18570 | |
| }, | |
| { | |
| "epoch": 0.20780557093406254, | |
| "grad_norm": 0.2575749158859253, | |
| "learning_rate": 9.845127689707159e-05, | |
| "loss": 0.7778, | |
| "step": 18580 | |
| }, | |
| { | |
| "epoch": 0.20791741462132524, | |
| "grad_norm": 0.27625295519828796, | |
| "learning_rate": 9.822285166065146e-05, | |
| "loss": 0.7716, | |
| "step": 18590 | |
| }, | |
| { | |
| "epoch": 0.20802925830858793, | |
| "grad_norm": 0.2803322672843933, | |
| "learning_rate": 9.799442642423136e-05, | |
| "loss": 0.7805, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 0.2081411019958506, | |
| "grad_norm": 0.2567484676837921, | |
| "learning_rate": 9.776600118781123e-05, | |
| "loss": 0.7633, | |
| "step": 18610 | |
| }, | |
| { | |
| "epoch": 0.20825294568311328, | |
| "grad_norm": 0.28193768858909607, | |
| "learning_rate": 9.753757595139111e-05, | |
| "loss": 0.7895, | |
| "step": 18620 | |
| }, | |
| { | |
| "epoch": 0.20836478937037597, | |
| "grad_norm": 0.28459542989730835, | |
| "learning_rate": 9.7309150714971e-05, | |
| "loss": 0.7741, | |
| "step": 18630 | |
| }, | |
| { | |
| "epoch": 0.20847663305763864, | |
| "grad_norm": 0.28346261382102966, | |
| "learning_rate": 9.708072547855088e-05, | |
| "loss": 0.7813, | |
| "step": 18640 | |
| }, | |
| { | |
| "epoch": 0.20858847674490133, | |
| "grad_norm": 0.2818828523159027, | |
| "learning_rate": 9.685230024213075e-05, | |
| "loss": 0.7755, | |
| "step": 18650 | |
| }, | |
| { | |
| "epoch": 0.208700320432164, | |
| "grad_norm": 0.28914326429367065, | |
| "learning_rate": 9.662387500571063e-05, | |
| "loss": 0.7798, | |
| "step": 18660 | |
| }, | |
| { | |
| "epoch": 0.20881216411942669, | |
| "grad_norm": 0.2600755989551544, | |
| "learning_rate": 9.639544976929051e-05, | |
| "loss": 0.7758, | |
| "step": 18670 | |
| }, | |
| { | |
| "epoch": 0.20892400780668938, | |
| "grad_norm": 0.2726733088493347, | |
| "learning_rate": 9.61670245328704e-05, | |
| "loss": 0.7769, | |
| "step": 18680 | |
| }, | |
| { | |
| "epoch": 0.20903585149395204, | |
| "grad_norm": 0.23421594500541687, | |
| "learning_rate": 9.593859929645026e-05, | |
| "loss": 0.758, | |
| "step": 18690 | |
| }, | |
| { | |
| "epoch": 0.20914769518121473, | |
| "grad_norm": 0.29468339681625366, | |
| "learning_rate": 9.571017406003016e-05, | |
| "loss": 0.7746, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 0.20925953886847742, | |
| "grad_norm": 0.29477235674858093, | |
| "learning_rate": 9.548174882361003e-05, | |
| "loss": 0.7633, | |
| "step": 18710 | |
| }, | |
| { | |
| "epoch": 0.2093713825557401, | |
| "grad_norm": 0.2564197778701782, | |
| "learning_rate": 9.525332358718991e-05, | |
| "loss": 0.7541, | |
| "step": 18720 | |
| }, | |
| { | |
| "epoch": 0.20948322624300278, | |
| "grad_norm": 0.2745250165462494, | |
| "learning_rate": 9.50248983507698e-05, | |
| "loss": 0.7887, | |
| "step": 18730 | |
| }, | |
| { | |
| "epoch": 0.20959506993026547, | |
| "grad_norm": 0.2572060525417328, | |
| "learning_rate": 9.479647311434968e-05, | |
| "loss": 0.774, | |
| "step": 18740 | |
| }, | |
| { | |
| "epoch": 0.20970691361752813, | |
| "grad_norm": 0.28513193130493164, | |
| "learning_rate": 9.456804787792955e-05, | |
| "loss": 0.7871, | |
| "step": 18750 | |
| }, | |
| { | |
| "epoch": 0.20981875730479083, | |
| "grad_norm": 0.2643887400627136, | |
| "learning_rate": 9.433962264150944e-05, | |
| "loss": 0.77, | |
| "step": 18760 | |
| }, | |
| { | |
| "epoch": 0.20993060099205352, | |
| "grad_norm": 0.27534207701683044, | |
| "learning_rate": 9.411119740508931e-05, | |
| "loss": 0.7775, | |
| "step": 18770 | |
| }, | |
| { | |
| "epoch": 0.21004244467931618, | |
| "grad_norm": 0.2620585858821869, | |
| "learning_rate": 9.38827721686692e-05, | |
| "loss": 0.7808, | |
| "step": 18780 | |
| }, | |
| { | |
| "epoch": 0.21015428836657887, | |
| "grad_norm": 0.2759549915790558, | |
| "learning_rate": 9.365434693224908e-05, | |
| "loss": 0.7642, | |
| "step": 18790 | |
| }, | |
| { | |
| "epoch": 0.21026613205384156, | |
| "grad_norm": 0.2919774353504181, | |
| "learning_rate": 9.342592169582896e-05, | |
| "loss": 0.7828, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 0.21037797574110423, | |
| "grad_norm": 0.2717173099517822, | |
| "learning_rate": 9.319749645940884e-05, | |
| "loss": 0.7513, | |
| "step": 18810 | |
| }, | |
| { | |
| "epoch": 0.21048981942836692, | |
| "grad_norm": 0.2662122845649719, | |
| "learning_rate": 9.296907122298871e-05, | |
| "loss": 0.7668, | |
| "step": 18820 | |
| }, | |
| { | |
| "epoch": 0.21060166311562958, | |
| "grad_norm": 0.26051005721092224, | |
| "learning_rate": 9.274064598656861e-05, | |
| "loss": 0.7676, | |
| "step": 18830 | |
| }, | |
| { | |
| "epoch": 0.21071350680289228, | |
| "grad_norm": 0.27510005235671997, | |
| "learning_rate": 9.251222075014848e-05, | |
| "loss": 0.7507, | |
| "step": 18840 | |
| }, | |
| { | |
| "epoch": 0.21082535049015497, | |
| "grad_norm": 0.23877868056297302, | |
| "learning_rate": 9.228379551372836e-05, | |
| "loss": 0.7535, | |
| "step": 18850 | |
| }, | |
| { | |
| "epoch": 0.21093719417741763, | |
| "grad_norm": 0.256104439496994, | |
| "learning_rate": 9.205537027730824e-05, | |
| "loss": 0.7546, | |
| "step": 18860 | |
| }, | |
| { | |
| "epoch": 0.21104903786468032, | |
| "grad_norm": 0.2829015552997589, | |
| "learning_rate": 9.182694504088813e-05, | |
| "loss": 0.7588, | |
| "step": 18870 | |
| }, | |
| { | |
| "epoch": 0.211160881551943, | |
| "grad_norm": 0.22898368537425995, | |
| "learning_rate": 9.1598519804468e-05, | |
| "loss": 0.7551, | |
| "step": 18880 | |
| }, | |
| { | |
| "epoch": 0.21127272523920568, | |
| "grad_norm": 0.23679418861865997, | |
| "learning_rate": 9.137009456804788e-05, | |
| "loss": 0.7718, | |
| "step": 18890 | |
| }, | |
| { | |
| "epoch": 0.21138456892646837, | |
| "grad_norm": 0.2878457009792328, | |
| "learning_rate": 9.114166933162776e-05, | |
| "loss": 0.7593, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 0.21149641261373106, | |
| "grad_norm": 0.2936013638973236, | |
| "learning_rate": 9.091324409520764e-05, | |
| "loss": 0.7713, | |
| "step": 18910 | |
| }, | |
| { | |
| "epoch": 0.21160825630099372, | |
| "grad_norm": 0.26062774658203125, | |
| "learning_rate": 9.068481885878751e-05, | |
| "loss": 0.7763, | |
| "step": 18920 | |
| }, | |
| { | |
| "epoch": 0.21172009998825642, | |
| "grad_norm": 0.3092271685600281, | |
| "learning_rate": 9.045639362236741e-05, | |
| "loss": 0.7807, | |
| "step": 18930 | |
| }, | |
| { | |
| "epoch": 0.2118319436755191, | |
| "grad_norm": 0.23566113412380219, | |
| "learning_rate": 9.022796838594728e-05, | |
| "loss": 0.7779, | |
| "step": 18940 | |
| }, | |
| { | |
| "epoch": 0.21194378736278177, | |
| "grad_norm": 0.27366477251052856, | |
| "learning_rate": 8.999954314952716e-05, | |
| "loss": 0.77, | |
| "step": 18950 | |
| }, | |
| { | |
| "epoch": 0.21205563105004446, | |
| "grad_norm": 0.23270778357982635, | |
| "learning_rate": 8.977111791310704e-05, | |
| "loss": 0.7549, | |
| "step": 18960 | |
| }, | |
| { | |
| "epoch": 0.21216747473730713, | |
| "grad_norm": 0.28785306215286255, | |
| "learning_rate": 8.954269267668693e-05, | |
| "loss": 0.7677, | |
| "step": 18970 | |
| }, | |
| { | |
| "epoch": 0.21227931842456982, | |
| "grad_norm": 0.2588510811328888, | |
| "learning_rate": 8.93142674402668e-05, | |
| "loss": 0.7715, | |
| "step": 18980 | |
| }, | |
| { | |
| "epoch": 0.2123911621118325, | |
| "grad_norm": 0.248029887676239, | |
| "learning_rate": 8.908584220384668e-05, | |
| "loss": 0.7749, | |
| "step": 18990 | |
| }, | |
| { | |
| "epoch": 0.21250300579909517, | |
| "grad_norm": 0.2579936981201172, | |
| "learning_rate": 8.885741696742656e-05, | |
| "loss": 0.7552, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.21261484948635787, | |
| "grad_norm": 0.26293206214904785, | |
| "learning_rate": 8.862899173100645e-05, | |
| "loss": 0.7657, | |
| "step": 19010 | |
| }, | |
| { | |
| "epoch": 0.21272669317362056, | |
| "grad_norm": 0.24589793384075165, | |
| "learning_rate": 8.840056649458631e-05, | |
| "loss": 0.7598, | |
| "step": 19020 | |
| }, | |
| { | |
| "epoch": 0.21283853686088322, | |
| "grad_norm": 0.2315252274274826, | |
| "learning_rate": 8.817214125816621e-05, | |
| "loss": 0.7637, | |
| "step": 19030 | |
| }, | |
| { | |
| "epoch": 0.2129503805481459, | |
| "grad_norm": 0.2538358271121979, | |
| "learning_rate": 8.794371602174608e-05, | |
| "loss": 0.7587, | |
| "step": 19040 | |
| }, | |
| { | |
| "epoch": 0.2130622242354086, | |
| "grad_norm": 0.2626616060733795, | |
| "learning_rate": 8.771529078532596e-05, | |
| "loss": 0.7597, | |
| "step": 19050 | |
| }, | |
| { | |
| "epoch": 0.21317406792267127, | |
| "grad_norm": 0.2557279169559479, | |
| "learning_rate": 8.748686554890585e-05, | |
| "loss": 0.7499, | |
| "step": 19060 | |
| }, | |
| { | |
| "epoch": 0.21328591160993396, | |
| "grad_norm": 0.25008153915405273, | |
| "learning_rate": 8.725844031248573e-05, | |
| "loss": 0.7466, | |
| "step": 19070 | |
| }, | |
| { | |
| "epoch": 0.21339775529719665, | |
| "grad_norm": 0.2647120952606201, | |
| "learning_rate": 8.70300150760656e-05, | |
| "loss": 0.7574, | |
| "step": 19080 | |
| }, | |
| { | |
| "epoch": 0.21350959898445931, | |
| "grad_norm": 0.2535738945007324, | |
| "learning_rate": 8.68015898396455e-05, | |
| "loss": 0.7672, | |
| "step": 19090 | |
| }, | |
| { | |
| "epoch": 0.213621442671722, | |
| "grad_norm": 0.28925755620002747, | |
| "learning_rate": 8.657316460322536e-05, | |
| "loss": 0.7692, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 0.21373328635898467, | |
| "grad_norm": 0.26770591735839844, | |
| "learning_rate": 8.634473936680525e-05, | |
| "loss": 0.7511, | |
| "step": 19110 | |
| }, | |
| { | |
| "epoch": 0.21384513004624736, | |
| "grad_norm": 0.25162947177886963, | |
| "learning_rate": 8.611631413038512e-05, | |
| "loss": 0.7573, | |
| "step": 19120 | |
| }, | |
| { | |
| "epoch": 0.21395697373351005, | |
| "grad_norm": 0.253324031829834, | |
| "learning_rate": 8.588788889396501e-05, | |
| "loss": 0.7516, | |
| "step": 19130 | |
| }, | |
| { | |
| "epoch": 0.21406881742077272, | |
| "grad_norm": 0.2784843146800995, | |
| "learning_rate": 8.565946365754488e-05, | |
| "loss": 0.7522, | |
| "step": 19140 | |
| }, | |
| { | |
| "epoch": 0.2141806611080354, | |
| "grad_norm": 0.2869722247123718, | |
| "learning_rate": 8.543103842112476e-05, | |
| "loss": 0.7525, | |
| "step": 19150 | |
| }, | |
| { | |
| "epoch": 0.2142925047952981, | |
| "grad_norm": 0.2467101663351059, | |
| "learning_rate": 8.520261318470465e-05, | |
| "loss": 0.7336, | |
| "step": 19160 | |
| }, | |
| { | |
| "epoch": 0.21440434848256076, | |
| "grad_norm": 0.26108691096305847, | |
| "learning_rate": 8.497418794828453e-05, | |
| "loss": 0.751, | |
| "step": 19170 | |
| }, | |
| { | |
| "epoch": 0.21451619216982346, | |
| "grad_norm": 0.2992580533027649, | |
| "learning_rate": 8.47457627118644e-05, | |
| "loss": 0.7599, | |
| "step": 19180 | |
| }, | |
| { | |
| "epoch": 0.21462803585708615, | |
| "grad_norm": 0.2573351562023163, | |
| "learning_rate": 8.45173374754443e-05, | |
| "loss": 0.752, | |
| "step": 19190 | |
| }, | |
| { | |
| "epoch": 0.2147398795443488, | |
| "grad_norm": 0.30148234963417053, | |
| "learning_rate": 8.428891223902416e-05, | |
| "loss": 0.7536, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 0.2148517232316115, | |
| "grad_norm": 0.2811321020126343, | |
| "learning_rate": 8.406048700260405e-05, | |
| "loss": 0.761, | |
| "step": 19210 | |
| }, | |
| { | |
| "epoch": 0.2149635669188742, | |
| "grad_norm": 0.2792038321495056, | |
| "learning_rate": 8.383206176618392e-05, | |
| "loss": 0.7558, | |
| "step": 19220 | |
| }, | |
| { | |
| "epoch": 0.21507541060613686, | |
| "grad_norm": 0.30432426929473877, | |
| "learning_rate": 8.360363652976381e-05, | |
| "loss": 0.7541, | |
| "step": 19230 | |
| }, | |
| { | |
| "epoch": 0.21518725429339955, | |
| "grad_norm": 0.28335481882095337, | |
| "learning_rate": 8.33752112933437e-05, | |
| "loss": 0.7628, | |
| "step": 19240 | |
| }, | |
| { | |
| "epoch": 0.2152990979806622, | |
| "grad_norm": 0.28402864933013916, | |
| "learning_rate": 8.314678605692357e-05, | |
| "loss": 0.7835, | |
| "step": 19250 | |
| }, | |
| { | |
| "epoch": 0.2154109416679249, | |
| "grad_norm": 0.2914164662361145, | |
| "learning_rate": 8.291836082050346e-05, | |
| "loss": 0.7705, | |
| "step": 19260 | |
| }, | |
| { | |
| "epoch": 0.2155227853551876, | |
| "grad_norm": 0.27296769618988037, | |
| "learning_rate": 8.268993558408333e-05, | |
| "loss": 0.7791, | |
| "step": 19270 | |
| }, | |
| { | |
| "epoch": 0.21563462904245026, | |
| "grad_norm": 0.2987435460090637, | |
| "learning_rate": 8.246151034766321e-05, | |
| "loss": 0.7918, | |
| "step": 19280 | |
| }, | |
| { | |
| "epoch": 0.21574647272971295, | |
| "grad_norm": 0.2743736207485199, | |
| "learning_rate": 8.22330851112431e-05, | |
| "loss": 0.7777, | |
| "step": 19290 | |
| }, | |
| { | |
| "epoch": 0.21585831641697564, | |
| "grad_norm": 0.2775188982486725, | |
| "learning_rate": 8.200465987482298e-05, | |
| "loss": 0.7811, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 0.2159701601042383, | |
| "grad_norm": 0.2942585349082947, | |
| "learning_rate": 8.177623463840285e-05, | |
| "loss": 0.7748, | |
| "step": 19310 | |
| }, | |
| { | |
| "epoch": 0.216082003791501, | |
| "grad_norm": 0.2545025050640106, | |
| "learning_rate": 8.154780940198274e-05, | |
| "loss": 0.77, | |
| "step": 19320 | |
| }, | |
| { | |
| "epoch": 0.2161938474787637, | |
| "grad_norm": 0.2571526765823364, | |
| "learning_rate": 8.131938416556261e-05, | |
| "loss": 0.7735, | |
| "step": 19330 | |
| }, | |
| { | |
| "epoch": 0.21630569116602635, | |
| "grad_norm": 0.2687735855579376, | |
| "learning_rate": 8.10909589291425e-05, | |
| "loss": 0.7703, | |
| "step": 19340 | |
| }, | |
| { | |
| "epoch": 0.21641753485328905, | |
| "grad_norm": 0.27332374453544617, | |
| "learning_rate": 8.086253369272237e-05, | |
| "loss": 0.7645, | |
| "step": 19350 | |
| }, | |
| { | |
| "epoch": 0.21652937854055174, | |
| "grad_norm": 0.25585636496543884, | |
| "learning_rate": 8.063410845630226e-05, | |
| "loss": 0.7651, | |
| "step": 19360 | |
| }, | |
| { | |
| "epoch": 0.2166412222278144, | |
| "grad_norm": 0.25861334800720215, | |
| "learning_rate": 8.040568321988213e-05, | |
| "loss": 0.7788, | |
| "step": 19370 | |
| }, | |
| { | |
| "epoch": 0.2167530659150771, | |
| "grad_norm": 0.26126453280448914, | |
| "learning_rate": 8.017725798346201e-05, | |
| "loss": 0.7631, | |
| "step": 19380 | |
| }, | |
| { | |
| "epoch": 0.21686490960233978, | |
| "grad_norm": 0.27623289823532104, | |
| "learning_rate": 7.99488327470419e-05, | |
| "loss": 0.7555, | |
| "step": 19390 | |
| }, | |
| { | |
| "epoch": 0.21697675328960245, | |
| "grad_norm": 0.256489634513855, | |
| "learning_rate": 7.972040751062178e-05, | |
| "loss": 0.7565, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 0.21708859697686514, | |
| "grad_norm": 0.26825475692749023, | |
| "learning_rate": 7.949198227420165e-05, | |
| "loss": 0.7619, | |
| "step": 19410 | |
| }, | |
| { | |
| "epoch": 0.2172004406641278, | |
| "grad_norm": 0.2633214294910431, | |
| "learning_rate": 7.926355703778155e-05, | |
| "loss": 0.7576, | |
| "step": 19420 | |
| }, | |
| { | |
| "epoch": 0.2173122843513905, | |
| "grad_norm": 0.24602185189723969, | |
| "learning_rate": 7.903513180136141e-05, | |
| "loss": 0.748, | |
| "step": 19430 | |
| }, | |
| { | |
| "epoch": 0.21742412803865319, | |
| "grad_norm": 0.24769659340381622, | |
| "learning_rate": 7.88067065649413e-05, | |
| "loss": 0.749, | |
| "step": 19440 | |
| }, | |
| { | |
| "epoch": 0.21753597172591585, | |
| "grad_norm": 0.22824670374393463, | |
| "learning_rate": 7.857828132852117e-05, | |
| "loss": 0.7439, | |
| "step": 19450 | |
| }, | |
| { | |
| "epoch": 0.21764781541317854, | |
| "grad_norm": 0.24848710000514984, | |
| "learning_rate": 7.834985609210106e-05, | |
| "loss": 0.7422, | |
| "step": 19460 | |
| }, | |
| { | |
| "epoch": 0.21775965910044123, | |
| "grad_norm": 0.25875037908554077, | |
| "learning_rate": 7.812143085568093e-05, | |
| "loss": 0.7411, | |
| "step": 19470 | |
| }, | |
| { | |
| "epoch": 0.2178715027877039, | |
| "grad_norm": 0.24616488814353943, | |
| "learning_rate": 7.789300561926082e-05, | |
| "loss": 0.723, | |
| "step": 19480 | |
| }, | |
| { | |
| "epoch": 0.2179833464749666, | |
| "grad_norm": 0.26018476486206055, | |
| "learning_rate": 7.76645803828407e-05, | |
| "loss": 0.7388, | |
| "step": 19490 | |
| }, | |
| { | |
| "epoch": 0.21809519016222928, | |
| "grad_norm": 0.24355724453926086, | |
| "learning_rate": 7.743615514642058e-05, | |
| "loss": 0.7337, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.21820703384949194, | |
| "grad_norm": 0.24908235669136047, | |
| "learning_rate": 7.720772991000045e-05, | |
| "loss": 0.7378, | |
| "step": 19510 | |
| }, | |
| { | |
| "epoch": 0.21831887753675464, | |
| "grad_norm": 0.2710162401199341, | |
| "learning_rate": 7.697930467358035e-05, | |
| "loss": 0.7336, | |
| "step": 19520 | |
| }, | |
| { | |
| "epoch": 0.21843072122401733, | |
| "grad_norm": 0.24222905933856964, | |
| "learning_rate": 7.675087943716022e-05, | |
| "loss": 0.7386, | |
| "step": 19530 | |
| }, | |
| { | |
| "epoch": 0.21854256491128, | |
| "grad_norm": 0.23762881755828857, | |
| "learning_rate": 7.65224542007401e-05, | |
| "loss": 0.7354, | |
| "step": 19540 | |
| }, | |
| { | |
| "epoch": 0.21865440859854268, | |
| "grad_norm": 0.25905948877334595, | |
| "learning_rate": 7.629402896431998e-05, | |
| "loss": 0.7453, | |
| "step": 19550 | |
| }, | |
| { | |
| "epoch": 0.21876625228580535, | |
| "grad_norm": 0.24563716351985931, | |
| "learning_rate": 7.606560372789986e-05, | |
| "loss": 0.7422, | |
| "step": 19560 | |
| }, | |
| { | |
| "epoch": 0.21887809597306804, | |
| "grad_norm": 0.2649664878845215, | |
| "learning_rate": 7.583717849147973e-05, | |
| "loss": 0.7301, | |
| "step": 19570 | |
| }, | |
| { | |
| "epoch": 0.21898993966033073, | |
| "grad_norm": 0.24720273911952972, | |
| "learning_rate": 7.560875325505962e-05, | |
| "loss": 0.7321, | |
| "step": 19580 | |
| }, | |
| { | |
| "epoch": 0.2191017833475934, | |
| "grad_norm": 0.23652884364128113, | |
| "learning_rate": 7.53803280186395e-05, | |
| "loss": 0.7296, | |
| "step": 19590 | |
| }, | |
| { | |
| "epoch": 0.21921362703485608, | |
| "grad_norm": 0.23715312778949738, | |
| "learning_rate": 7.515190278221938e-05, | |
| "loss": 0.7237, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 0.21932547072211878, | |
| "grad_norm": 0.2500048577785492, | |
| "learning_rate": 7.492347754579925e-05, | |
| "loss": 0.7372, | |
| "step": 19610 | |
| }, | |
| { | |
| "epoch": 0.21943731440938144, | |
| "grad_norm": 0.2575337886810303, | |
| "learning_rate": 7.469505230937915e-05, | |
| "loss": 0.7393, | |
| "step": 19620 | |
| }, | |
| { | |
| "epoch": 0.21954915809664413, | |
| "grad_norm": 0.255375474691391, | |
| "learning_rate": 7.446662707295902e-05, | |
| "loss": 0.75, | |
| "step": 19630 | |
| }, | |
| { | |
| "epoch": 0.21966100178390682, | |
| "grad_norm": 0.2793714106082916, | |
| "learning_rate": 7.42382018365389e-05, | |
| "loss": 0.7585, | |
| "step": 19640 | |
| }, | |
| { | |
| "epoch": 0.2197728454711695, | |
| "grad_norm": 0.2588786482810974, | |
| "learning_rate": 7.400977660011878e-05, | |
| "loss": 0.7661, | |
| "step": 19650 | |
| }, | |
| { | |
| "epoch": 0.21988468915843218, | |
| "grad_norm": 0.27130866050720215, | |
| "learning_rate": 7.378135136369867e-05, | |
| "loss": 0.7579, | |
| "step": 19660 | |
| }, | |
| { | |
| "epoch": 0.21999653284569487, | |
| "grad_norm": 0.2730309069156647, | |
| "learning_rate": 7.355292612727853e-05, | |
| "loss": 0.7463, | |
| "step": 19670 | |
| }, | |
| { | |
| "epoch": 0.22010837653295753, | |
| "grad_norm": 0.24330918490886688, | |
| "learning_rate": 7.332450089085842e-05, | |
| "loss": 0.7388, | |
| "step": 19680 | |
| }, | |
| { | |
| "epoch": 0.22022022022022023, | |
| "grad_norm": 0.30004703998565674, | |
| "learning_rate": 7.309607565443831e-05, | |
| "loss": 0.7633, | |
| "step": 19690 | |
| }, | |
| { | |
| "epoch": 0.2203320639074829, | |
| "grad_norm": 0.2754705548286438, | |
| "learning_rate": 7.286765041801818e-05, | |
| "loss": 0.7587, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 0.22044390759474558, | |
| "grad_norm": 0.27601394057273865, | |
| "learning_rate": 7.263922518159807e-05, | |
| "loss": 0.7468, | |
| "step": 19710 | |
| }, | |
| { | |
| "epoch": 0.22055575128200827, | |
| "grad_norm": 0.2328653633594513, | |
| "learning_rate": 7.241079994517795e-05, | |
| "loss": 0.7432, | |
| "step": 19720 | |
| }, | |
| { | |
| "epoch": 0.22066759496927094, | |
| "grad_norm": 0.23960436880588531, | |
| "learning_rate": 7.218237470875783e-05, | |
| "loss": 0.7384, | |
| "step": 19730 | |
| }, | |
| { | |
| "epoch": 0.22077943865653363, | |
| "grad_norm": 0.2687484323978424, | |
| "learning_rate": 7.19539494723377e-05, | |
| "loss": 0.738, | |
| "step": 19740 | |
| }, | |
| { | |
| "epoch": 0.22089128234379632, | |
| "grad_norm": 0.2243189811706543, | |
| "learning_rate": 7.17255242359176e-05, | |
| "loss": 0.7467, | |
| "step": 19750 | |
| }, | |
| { | |
| "epoch": 0.22100312603105898, | |
| "grad_norm": 0.26094529032707214, | |
| "learning_rate": 7.149709899949747e-05, | |
| "loss": 0.7579, | |
| "step": 19760 | |
| }, | |
| { | |
| "epoch": 0.22111496971832167, | |
| "grad_norm": 0.2761390507221222, | |
| "learning_rate": 7.126867376307735e-05, | |
| "loss": 0.7491, | |
| "step": 19770 | |
| }, | |
| { | |
| "epoch": 0.22122681340558437, | |
| "grad_norm": 0.2523578405380249, | |
| "learning_rate": 7.104024852665723e-05, | |
| "loss": 0.7358, | |
| "step": 19780 | |
| }, | |
| { | |
| "epoch": 0.22133865709284703, | |
| "grad_norm": 0.25612056255340576, | |
| "learning_rate": 7.081182329023711e-05, | |
| "loss": 0.7322, | |
| "step": 19790 | |
| }, | |
| { | |
| "epoch": 0.22145050078010972, | |
| "grad_norm": 0.24379362165927887, | |
| "learning_rate": 7.058339805381698e-05, | |
| "loss": 0.7438, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 0.2215623444673724, | |
| "grad_norm": 0.2315502017736435, | |
| "learning_rate": 7.035497281739687e-05, | |
| "loss": 0.7349, | |
| "step": 19810 | |
| }, | |
| { | |
| "epoch": 0.22167418815463508, | |
| "grad_norm": 0.41941365599632263, | |
| "learning_rate": 7.012654758097675e-05, | |
| "loss": 0.743, | |
| "step": 19820 | |
| }, | |
| { | |
| "epoch": 0.22178603184189777, | |
| "grad_norm": 0.23147599399089813, | |
| "learning_rate": 6.989812234455663e-05, | |
| "loss": 0.7381, | |
| "step": 19830 | |
| }, | |
| { | |
| "epoch": 0.22189787552916043, | |
| "grad_norm": 0.25920864939689636, | |
| "learning_rate": 6.96696971081365e-05, | |
| "loss": 0.7469, | |
| "step": 19840 | |
| }, | |
| { | |
| "epoch": 0.22200971921642312, | |
| "grad_norm": 0.23870904743671417, | |
| "learning_rate": 6.94412718717164e-05, | |
| "loss": 0.7476, | |
| "step": 19850 | |
| }, | |
| { | |
| "epoch": 0.22212156290368582, | |
| "grad_norm": 0.2372673749923706, | |
| "learning_rate": 6.921284663529627e-05, | |
| "loss": 0.7468, | |
| "step": 19860 | |
| }, | |
| { | |
| "epoch": 0.22223340659094848, | |
| "grad_norm": 0.2703365683555603, | |
| "learning_rate": 6.898442139887615e-05, | |
| "loss": 0.742, | |
| "step": 19870 | |
| }, | |
| { | |
| "epoch": 0.22234525027821117, | |
| "grad_norm": 0.24437329173088074, | |
| "learning_rate": 6.875599616245603e-05, | |
| "loss": 0.7217, | |
| "step": 19880 | |
| }, | |
| { | |
| "epoch": 0.22245709396547386, | |
| "grad_norm": 0.21680840849876404, | |
| "learning_rate": 6.852757092603592e-05, | |
| "loss": 0.7547, | |
| "step": 19890 | |
| }, | |
| { | |
| "epoch": 0.22256893765273653, | |
| "grad_norm": 0.29101526737213135, | |
| "learning_rate": 6.829914568961579e-05, | |
| "loss": 0.7389, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 0.22268078133999922, | |
| "grad_norm": 0.2821531891822815, | |
| "learning_rate": 6.807072045319567e-05, | |
| "loss": 0.731, | |
| "step": 19910 | |
| }, | |
| { | |
| "epoch": 0.2227926250272619, | |
| "grad_norm": 0.2773050367832184, | |
| "learning_rate": 6.784229521677555e-05, | |
| "loss": 0.7369, | |
| "step": 19920 | |
| }, | |
| { | |
| "epoch": 0.22290446871452457, | |
| "grad_norm": 0.2531367838382721, | |
| "learning_rate": 6.761386998035543e-05, | |
| "loss": 0.7399, | |
| "step": 19930 | |
| }, | |
| { | |
| "epoch": 0.22301631240178726, | |
| "grad_norm": 0.28158465027809143, | |
| "learning_rate": 6.73854447439353e-05, | |
| "loss": 0.7523, | |
| "step": 19940 | |
| }, | |
| { | |
| "epoch": 0.22312815608904996, | |
| "grad_norm": 0.25612935423851013, | |
| "learning_rate": 6.71570195075152e-05, | |
| "loss": 0.7725, | |
| "step": 19950 | |
| }, | |
| { | |
| "epoch": 0.22323999977631262, | |
| "grad_norm": 0.26996153593063354, | |
| "learning_rate": 6.692859427109507e-05, | |
| "loss": 0.7823, | |
| "step": 19960 | |
| }, | |
| { | |
| "epoch": 0.2233518434635753, | |
| "grad_norm": 0.28008782863616943, | |
| "learning_rate": 6.670016903467495e-05, | |
| "loss": 0.7679, | |
| "step": 19970 | |
| }, | |
| { | |
| "epoch": 0.22346368715083798, | |
| "grad_norm": 0.27016493678092957, | |
| "learning_rate": 6.647174379825483e-05, | |
| "loss": 0.7617, | |
| "step": 19980 | |
| }, | |
| { | |
| "epoch": 0.22357553083810067, | |
| "grad_norm": 0.2679850459098816, | |
| "learning_rate": 6.624331856183472e-05, | |
| "loss": 0.7737, | |
| "step": 19990 | |
| }, | |
| { | |
| "epoch": 0.22368737452536336, | |
| "grad_norm": 0.2570480406284332, | |
| "learning_rate": 6.601489332541459e-05, | |
| "loss": 0.758, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.22379921821262602, | |
| "grad_norm": 0.2503785490989685, | |
| "learning_rate": 6.578646808899447e-05, | |
| "loss": 0.761, | |
| "step": 20010 | |
| }, | |
| { | |
| "epoch": 0.2239110618998887, | |
| "grad_norm": 0.2648092210292816, | |
| "learning_rate": 6.555804285257435e-05, | |
| "loss": 0.7532, | |
| "step": 20020 | |
| }, | |
| { | |
| "epoch": 0.2240229055871514, | |
| "grad_norm": 0.26829221844673157, | |
| "learning_rate": 6.532961761615423e-05, | |
| "loss": 0.7542, | |
| "step": 20030 | |
| }, | |
| { | |
| "epoch": 0.22413474927441407, | |
| "grad_norm": 0.27535539865493774, | |
| "learning_rate": 6.51011923797341e-05, | |
| "loss": 0.7578, | |
| "step": 20040 | |
| }, | |
| { | |
| "epoch": 0.22424659296167676, | |
| "grad_norm": 0.28674209117889404, | |
| "learning_rate": 6.4872767143314e-05, | |
| "loss": 0.756, | |
| "step": 20050 | |
| }, | |
| { | |
| "epoch": 0.22435843664893945, | |
| "grad_norm": 0.2523026466369629, | |
| "learning_rate": 6.464434190689387e-05, | |
| "loss": 0.7514, | |
| "step": 20060 | |
| }, | |
| { | |
| "epoch": 0.22447028033620212, | |
| "grad_norm": 0.24213305115699768, | |
| "learning_rate": 6.441591667047375e-05, | |
| "loss": 0.7546, | |
| "step": 20070 | |
| }, | |
| { | |
| "epoch": 0.2245821240234648, | |
| "grad_norm": 0.2779023349285126, | |
| "learning_rate": 6.418749143405363e-05, | |
| "loss": 0.7654, | |
| "step": 20080 | |
| }, | |
| { | |
| "epoch": 0.2246939677107275, | |
| "grad_norm": 0.28806111216545105, | |
| "learning_rate": 6.395906619763352e-05, | |
| "loss": 0.7612, | |
| "step": 20090 | |
| }, | |
| { | |
| "epoch": 0.22480581139799016, | |
| "grad_norm": 0.2637580931186676, | |
| "learning_rate": 6.373064096121339e-05, | |
| "loss": 0.7659, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 0.22491765508525285, | |
| "grad_norm": 0.2683275043964386, | |
| "learning_rate": 6.350221572479328e-05, | |
| "loss": 0.753, | |
| "step": 20110 | |
| }, | |
| { | |
| "epoch": 0.22502949877251555, | |
| "grad_norm": 0.2693597078323364, | |
| "learning_rate": 6.327379048837315e-05, | |
| "loss": 0.7697, | |
| "step": 20120 | |
| }, | |
| { | |
| "epoch": 0.2251413424597782, | |
| "grad_norm": 0.26335635781288147, | |
| "learning_rate": 6.304536525195304e-05, | |
| "loss": 0.7644, | |
| "step": 20130 | |
| }, | |
| { | |
| "epoch": 0.2252531861470409, | |
| "grad_norm": 0.29237446188926697, | |
| "learning_rate": 6.28169400155329e-05, | |
| "loss": 0.7721, | |
| "step": 20140 | |
| }, | |
| { | |
| "epoch": 0.22536502983430357, | |
| "grad_norm": 0.3080182373523712, | |
| "learning_rate": 6.25885147791128e-05, | |
| "loss": 0.7666, | |
| "step": 20150 | |
| }, | |
| { | |
| "epoch": 0.22547687352156626, | |
| "grad_norm": 0.2831542193889618, | |
| "learning_rate": 6.236008954269268e-05, | |
| "loss": 0.7805, | |
| "step": 20160 | |
| }, | |
| { | |
| "epoch": 0.22558871720882895, | |
| "grad_norm": 0.2860835790634155, | |
| "learning_rate": 6.213166430627257e-05, | |
| "loss": 0.7816, | |
| "step": 20170 | |
| }, | |
| { | |
| "epoch": 0.2257005608960916, | |
| "grad_norm": 0.28273066878318787, | |
| "learning_rate": 6.190323906985244e-05, | |
| "loss": 0.7812, | |
| "step": 20180 | |
| }, | |
| { | |
| "epoch": 0.2258124045833543, | |
| "grad_norm": 0.29203614592552185, | |
| "learning_rate": 6.167481383343232e-05, | |
| "loss": 0.7699, | |
| "step": 20190 | |
| }, | |
| { | |
| "epoch": 0.225924248270617, | |
| "grad_norm": 0.2811570167541504, | |
| "learning_rate": 6.14463885970122e-05, | |
| "loss": 0.7833, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 0.22603609195787966, | |
| "grad_norm": 0.30047500133514404, | |
| "learning_rate": 6.121796336059208e-05, | |
| "loss": 0.7594, | |
| "step": 20210 | |
| }, | |
| { | |
| "epoch": 0.22614793564514235, | |
| "grad_norm": 0.2838903069496155, | |
| "learning_rate": 6.098953812417196e-05, | |
| "loss": 0.7678, | |
| "step": 20220 | |
| }, | |
| { | |
| "epoch": 0.22625977933240504, | |
| "grad_norm": 0.2840651273727417, | |
| "learning_rate": 6.0761112887751836e-05, | |
| "loss": 0.7546, | |
| "step": 20230 | |
| }, | |
| { | |
| "epoch": 0.2263716230196677, | |
| "grad_norm": 0.31575652956962585, | |
| "learning_rate": 6.053268765133172e-05, | |
| "loss": 0.7533, | |
| "step": 20240 | |
| }, | |
| { | |
| "epoch": 0.2264834667069304, | |
| "grad_norm": 0.2692145109176636, | |
| "learning_rate": 6.03042624149116e-05, | |
| "loss": 0.744, | |
| "step": 20250 | |
| }, | |
| { | |
| "epoch": 0.2265953103941931, | |
| "grad_norm": 0.3094116449356079, | |
| "learning_rate": 6.007583717849148e-05, | |
| "loss": 0.7708, | |
| "step": 20260 | |
| }, | |
| { | |
| "epoch": 0.22670715408145575, | |
| "grad_norm": 0.3123047947883606, | |
| "learning_rate": 5.984741194207136e-05, | |
| "loss": 0.7431, | |
| "step": 20270 | |
| }, | |
| { | |
| "epoch": 0.22681899776871844, | |
| "grad_norm": 0.2733646631240845, | |
| "learning_rate": 5.961898670565124e-05, | |
| "loss": 0.762, | |
| "step": 20280 | |
| }, | |
| { | |
| "epoch": 0.2269308414559811, | |
| "grad_norm": 0.23944342136383057, | |
| "learning_rate": 5.939056146923112e-05, | |
| "loss": 0.7488, | |
| "step": 20290 | |
| }, | |
| { | |
| "epoch": 0.2270426851432438, | |
| "grad_norm": 0.2459600865840912, | |
| "learning_rate": 5.9162136232811e-05, | |
| "loss": 0.7443, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 0.2271545288305065, | |
| "grad_norm": 0.2502724826335907, | |
| "learning_rate": 5.893371099639088e-05, | |
| "loss": 0.7417, | |
| "step": 20310 | |
| }, | |
| { | |
| "epoch": 0.22726637251776916, | |
| "grad_norm": 0.23721522092819214, | |
| "learning_rate": 5.870528575997076e-05, | |
| "loss": 0.7393, | |
| "step": 20320 | |
| }, | |
| { | |
| "epoch": 0.22737821620503185, | |
| "grad_norm": 0.2526785135269165, | |
| "learning_rate": 5.847686052355064e-05, | |
| "loss": 0.7346, | |
| "step": 20330 | |
| }, | |
| { | |
| "epoch": 0.22749005989229454, | |
| "grad_norm": 0.2573647201061249, | |
| "learning_rate": 5.824843528713052e-05, | |
| "loss": 0.7192, | |
| "step": 20340 | |
| }, | |
| { | |
| "epoch": 0.2276019035795572, | |
| "grad_norm": 0.2632768750190735, | |
| "learning_rate": 5.80200100507104e-05, | |
| "loss": 0.7234, | |
| "step": 20350 | |
| }, | |
| { | |
| "epoch": 0.2277137472668199, | |
| "grad_norm": 0.2589345872402191, | |
| "learning_rate": 5.779158481429028e-05, | |
| "loss": 0.7165, | |
| "step": 20360 | |
| }, | |
| { | |
| "epoch": 0.22782559095408259, | |
| "grad_norm": 0.2480648308992386, | |
| "learning_rate": 5.756315957787016e-05, | |
| "loss": 0.7099, | |
| "step": 20370 | |
| }, | |
| { | |
| "epoch": 0.22793743464134525, | |
| "grad_norm": 0.24949654936790466, | |
| "learning_rate": 5.733473434145004e-05, | |
| "loss": 0.7187, | |
| "step": 20380 | |
| }, | |
| { | |
| "epoch": 0.22804927832860794, | |
| "grad_norm": 0.25637611746788025, | |
| "learning_rate": 5.710630910502993e-05, | |
| "loss": 0.7098, | |
| "step": 20390 | |
| }, | |
| { | |
| "epoch": 0.22816112201587063, | |
| "grad_norm": 0.28809231519699097, | |
| "learning_rate": 5.687788386860981e-05, | |
| "loss": 0.7315, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 0.2282729657031333, | |
| "grad_norm": 0.25564566254615784, | |
| "learning_rate": 5.6649458632189686e-05, | |
| "loss": 0.7319, | |
| "step": 20410 | |
| }, | |
| { | |
| "epoch": 0.228384809390396, | |
| "grad_norm": 0.2693794369697571, | |
| "learning_rate": 5.642103339576957e-05, | |
| "loss": 0.7173, | |
| "step": 20420 | |
| }, | |
| { | |
| "epoch": 0.22849665307765865, | |
| "grad_norm": 0.24680989980697632, | |
| "learning_rate": 5.619260815934945e-05, | |
| "loss": 0.708, | |
| "step": 20430 | |
| }, | |
| { | |
| "epoch": 0.22860849676492134, | |
| "grad_norm": 0.2790026068687439, | |
| "learning_rate": 5.596418292292933e-05, | |
| "loss": 0.7023, | |
| "step": 20440 | |
| }, | |
| { | |
| "epoch": 0.22872034045218403, | |
| "grad_norm": 0.2656199038028717, | |
| "learning_rate": 5.573575768650921e-05, | |
| "loss": 0.7113, | |
| "step": 20450 | |
| }, | |
| { | |
| "epoch": 0.2288321841394467, | |
| "grad_norm": 0.30832743644714355, | |
| "learning_rate": 5.550733245008909e-05, | |
| "loss": 0.7161, | |
| "step": 20460 | |
| }, | |
| { | |
| "epoch": 0.2289440278267094, | |
| "grad_norm": 0.27060794830322266, | |
| "learning_rate": 5.527890721366897e-05, | |
| "loss": 0.7208, | |
| "step": 20470 | |
| }, | |
| { | |
| "epoch": 0.22905587151397208, | |
| "grad_norm": 0.26036307215690613, | |
| "learning_rate": 5.505048197724885e-05, | |
| "loss": 0.7004, | |
| "step": 20480 | |
| }, | |
| { | |
| "epoch": 0.22916771520123475, | |
| "grad_norm": 0.2758086919784546, | |
| "learning_rate": 5.482205674082873e-05, | |
| "loss": 0.7179, | |
| "step": 20490 | |
| }, | |
| { | |
| "epoch": 0.22927955888849744, | |
| "grad_norm": 0.2821243107318878, | |
| "learning_rate": 5.459363150440861e-05, | |
| "loss": 0.7255, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.22939140257576013, | |
| "grad_norm": 0.2782810628414154, | |
| "learning_rate": 5.436520626798849e-05, | |
| "loss": 0.7149, | |
| "step": 20510 | |
| }, | |
| { | |
| "epoch": 0.2295032462630228, | |
| "grad_norm": 0.2755940854549408, | |
| "learning_rate": 5.413678103156837e-05, | |
| "loss": 0.7117, | |
| "step": 20520 | |
| }, | |
| { | |
| "epoch": 0.22961508995028548, | |
| "grad_norm": 0.29176777601242065, | |
| "learning_rate": 5.390835579514825e-05, | |
| "loss": 0.7188, | |
| "step": 20530 | |
| }, | |
| { | |
| "epoch": 0.22972693363754818, | |
| "grad_norm": 0.27739444375038147, | |
| "learning_rate": 5.367993055872813e-05, | |
| "loss": 0.7196, | |
| "step": 20540 | |
| }, | |
| { | |
| "epoch": 0.22983877732481084, | |
| "grad_norm": 0.27187204360961914, | |
| "learning_rate": 5.345150532230801e-05, | |
| "loss": 0.722, | |
| "step": 20550 | |
| }, | |
| { | |
| "epoch": 0.22995062101207353, | |
| "grad_norm": 0.2951996624469757, | |
| "learning_rate": 5.322308008588789e-05, | |
| "loss": 0.7325, | |
| "step": 20560 | |
| }, | |
| { | |
| "epoch": 0.2300624646993362, | |
| "grad_norm": 0.2677932381629944, | |
| "learning_rate": 5.299465484946777e-05, | |
| "loss": 0.7263, | |
| "step": 20570 | |
| }, | |
| { | |
| "epoch": 0.23017430838659889, | |
| "grad_norm": 0.29231807589530945, | |
| "learning_rate": 5.2766229613047654e-05, | |
| "loss": 0.7284, | |
| "step": 20580 | |
| }, | |
| { | |
| "epoch": 0.23028615207386158, | |
| "grad_norm": 0.30211326479911804, | |
| "learning_rate": 5.253780437662753e-05, | |
| "loss": 0.7222, | |
| "step": 20590 | |
| }, | |
| { | |
| "epoch": 0.23039799576112424, | |
| "grad_norm": 0.29821720719337463, | |
| "learning_rate": 5.230937914020741e-05, | |
| "loss": 0.7316, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 0.23050983944838693, | |
| "grad_norm": 0.3019379675388336, | |
| "learning_rate": 5.208095390378729e-05, | |
| "loss": 0.7328, | |
| "step": 20610 | |
| }, | |
| { | |
| "epoch": 0.23062168313564962, | |
| "grad_norm": 0.2569403052330017, | |
| "learning_rate": 5.185252866736717e-05, | |
| "loss": 0.7215, | |
| "step": 20620 | |
| }, | |
| { | |
| "epoch": 0.2307335268229123, | |
| "grad_norm": 0.3151782155036926, | |
| "learning_rate": 5.1624103430947054e-05, | |
| "loss": 0.7326, | |
| "step": 20630 | |
| }, | |
| { | |
| "epoch": 0.23084537051017498, | |
| "grad_norm": 0.2748591899871826, | |
| "learning_rate": 5.139567819452693e-05, | |
| "loss": 0.7359, | |
| "step": 20640 | |
| }, | |
| { | |
| "epoch": 0.23095721419743767, | |
| "grad_norm": 0.27494433522224426, | |
| "learning_rate": 5.116725295810681e-05, | |
| "loss": 0.7351, | |
| "step": 20650 | |
| }, | |
| { | |
| "epoch": 0.23106905788470034, | |
| "grad_norm": 0.29428452253341675, | |
| "learning_rate": 5.093882772168669e-05, | |
| "loss": 0.7361, | |
| "step": 20660 | |
| }, | |
| { | |
| "epoch": 0.23118090157196303, | |
| "grad_norm": 0.2924981117248535, | |
| "learning_rate": 5.071040248526657e-05, | |
| "loss": 0.7539, | |
| "step": 20670 | |
| }, | |
| { | |
| "epoch": 0.23129274525922572, | |
| "grad_norm": 0.28647035360336304, | |
| "learning_rate": 5.0481977248846455e-05, | |
| "loss": 0.7576, | |
| "step": 20680 | |
| }, | |
| { | |
| "epoch": 0.23140458894648838, | |
| "grad_norm": 0.3107542097568512, | |
| "learning_rate": 5.025355201242633e-05, | |
| "loss": 0.7615, | |
| "step": 20690 | |
| }, | |
| { | |
| "epoch": 0.23151643263375107, | |
| "grad_norm": 0.27186501026153564, | |
| "learning_rate": 5.0025126776006213e-05, | |
| "loss": 0.7641, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 0.23162827632101374, | |
| "grad_norm": 0.2838156819343567, | |
| "learning_rate": 4.9796701539586096e-05, | |
| "loss": 0.7695, | |
| "step": 20710 | |
| }, | |
| { | |
| "epoch": 0.23174012000827643, | |
| "grad_norm": 0.3377101421356201, | |
| "learning_rate": 4.956827630316597e-05, | |
| "loss": 0.7696, | |
| "step": 20720 | |
| }, | |
| { | |
| "epoch": 0.23185196369553912, | |
| "grad_norm": 0.3177778422832489, | |
| "learning_rate": 4.9339851066745855e-05, | |
| "loss": 0.7677, | |
| "step": 20730 | |
| }, | |
| { | |
| "epoch": 0.23196380738280178, | |
| "grad_norm": 0.3157583773136139, | |
| "learning_rate": 4.911142583032573e-05, | |
| "loss": 0.7653, | |
| "step": 20740 | |
| }, | |
| { | |
| "epoch": 0.23207565107006448, | |
| "grad_norm": 0.3123907148838043, | |
| "learning_rate": 4.8883000593905614e-05, | |
| "loss": 0.7677, | |
| "step": 20750 | |
| }, | |
| { | |
| "epoch": 0.23218749475732717, | |
| "grad_norm": 0.30460426211357117, | |
| "learning_rate": 4.86545753574855e-05, | |
| "loss": 0.7743, | |
| "step": 20760 | |
| }, | |
| { | |
| "epoch": 0.23229933844458983, | |
| "grad_norm": 0.27507251501083374, | |
| "learning_rate": 4.842615012106537e-05, | |
| "loss": 0.767, | |
| "step": 20770 | |
| }, | |
| { | |
| "epoch": 0.23241118213185252, | |
| "grad_norm": 0.3233499228954315, | |
| "learning_rate": 4.8197724884645256e-05, | |
| "loss": 0.7717, | |
| "step": 20780 | |
| }, | |
| { | |
| "epoch": 0.23252302581911521, | |
| "grad_norm": 0.30144819617271423, | |
| "learning_rate": 4.796929964822513e-05, | |
| "loss": 0.7609, | |
| "step": 20790 | |
| }, | |
| { | |
| "epoch": 0.23263486950637788, | |
| "grad_norm": 0.29588454961776733, | |
| "learning_rate": 4.7740874411805014e-05, | |
| "loss": 0.7682, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 0.23274671319364057, | |
| "grad_norm": 0.3111203610897064, | |
| "learning_rate": 4.75124491753849e-05, | |
| "loss": 0.7652, | |
| "step": 20810 | |
| }, | |
| { | |
| "epoch": 0.23285855688090326, | |
| "grad_norm": 0.28917646408081055, | |
| "learning_rate": 4.728402393896477e-05, | |
| "loss": 0.7584, | |
| "step": 20820 | |
| }, | |
| { | |
| "epoch": 0.23297040056816593, | |
| "grad_norm": 0.3156343698501587, | |
| "learning_rate": 4.7055598702544656e-05, | |
| "loss": 0.7643, | |
| "step": 20830 | |
| }, | |
| { | |
| "epoch": 0.23308224425542862, | |
| "grad_norm": 0.2909680902957916, | |
| "learning_rate": 4.682717346612454e-05, | |
| "loss": 0.7613, | |
| "step": 20840 | |
| }, | |
| { | |
| "epoch": 0.2331940879426913, | |
| "grad_norm": 0.3006870746612549, | |
| "learning_rate": 4.659874822970442e-05, | |
| "loss": 0.7603, | |
| "step": 20850 | |
| }, | |
| { | |
| "epoch": 0.23330593162995397, | |
| "grad_norm": 0.2844945192337036, | |
| "learning_rate": 4.6370322993284304e-05, | |
| "loss": 0.7589, | |
| "step": 20860 | |
| }, | |
| { | |
| "epoch": 0.23341777531721666, | |
| "grad_norm": 0.26857924461364746, | |
| "learning_rate": 4.614189775686418e-05, | |
| "loss": 0.7401, | |
| "step": 20870 | |
| }, | |
| { | |
| "epoch": 0.23352961900447933, | |
| "grad_norm": 0.31332314014434814, | |
| "learning_rate": 4.591347252044406e-05, | |
| "loss": 0.7468, | |
| "step": 20880 | |
| }, | |
| { | |
| "epoch": 0.23364146269174202, | |
| "grad_norm": 0.28083765506744385, | |
| "learning_rate": 4.568504728402394e-05, | |
| "loss": 0.7451, | |
| "step": 20890 | |
| }, | |
| { | |
| "epoch": 0.2337533063790047, | |
| "grad_norm": 0.29185009002685547, | |
| "learning_rate": 4.545662204760382e-05, | |
| "loss": 0.7478, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 0.23386515006626737, | |
| "grad_norm": 0.30532801151275635, | |
| "learning_rate": 4.5228196811183705e-05, | |
| "loss": 0.7404, | |
| "step": 20910 | |
| }, | |
| { | |
| "epoch": 0.23397699375353007, | |
| "grad_norm": 0.2724134922027588, | |
| "learning_rate": 4.499977157476358e-05, | |
| "loss": 0.732, | |
| "step": 20920 | |
| }, | |
| { | |
| "epoch": 0.23408883744079276, | |
| "grad_norm": 0.29753822088241577, | |
| "learning_rate": 4.4771346338343464e-05, | |
| "loss": 0.7236, | |
| "step": 20930 | |
| }, | |
| { | |
| "epoch": 0.23420068112805542, | |
| "grad_norm": 0.31980055570602417, | |
| "learning_rate": 4.454292110192334e-05, | |
| "loss": 0.7407, | |
| "step": 20940 | |
| }, | |
| { | |
| "epoch": 0.2343125248153181, | |
| "grad_norm": 0.29578351974487305, | |
| "learning_rate": 4.431449586550322e-05, | |
| "loss": 0.7166, | |
| "step": 20950 | |
| }, | |
| { | |
| "epoch": 0.2344243685025808, | |
| "grad_norm": 0.25261184573173523, | |
| "learning_rate": 4.4086070629083105e-05, | |
| "loss": 0.7195, | |
| "step": 20960 | |
| }, | |
| { | |
| "epoch": 0.23453621218984347, | |
| "grad_norm": 0.2669534385204315, | |
| "learning_rate": 4.385764539266298e-05, | |
| "loss": 0.7224, | |
| "step": 20970 | |
| }, | |
| { | |
| "epoch": 0.23464805587710616, | |
| "grad_norm": 0.2817215919494629, | |
| "learning_rate": 4.3629220156242864e-05, | |
| "loss": 0.7405, | |
| "step": 20980 | |
| }, | |
| { | |
| "epoch": 0.23475989956436885, | |
| "grad_norm": 0.27033400535583496, | |
| "learning_rate": 4.340079491982275e-05, | |
| "loss": 0.7292, | |
| "step": 20990 | |
| }, | |
| { | |
| "epoch": 0.23487174325163152, | |
| "grad_norm": 0.3083013594150543, | |
| "learning_rate": 4.317236968340262e-05, | |
| "loss": 0.7271, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.2349835869388942, | |
| "grad_norm": 0.27074989676475525, | |
| "learning_rate": 4.2943944446982506e-05, | |
| "loss": 0.7346, | |
| "step": 21010 | |
| }, | |
| { | |
| "epoch": 0.23509543062615687, | |
| "grad_norm": 0.31609755754470825, | |
| "learning_rate": 4.271551921056238e-05, | |
| "loss": 0.7285, | |
| "step": 21020 | |
| }, | |
| { | |
| "epoch": 0.23520727431341956, | |
| "grad_norm": 0.27084672451019287, | |
| "learning_rate": 4.2487093974142265e-05, | |
| "loss": 0.7411, | |
| "step": 21030 | |
| }, | |
| { | |
| "epoch": 0.23531911800068225, | |
| "grad_norm": 0.26669842004776, | |
| "learning_rate": 4.225866873772215e-05, | |
| "loss": 0.7423, | |
| "step": 21040 | |
| }, | |
| { | |
| "epoch": 0.23543096168794492, | |
| "grad_norm": 0.2873358428478241, | |
| "learning_rate": 4.2030243501302024e-05, | |
| "loss": 0.7345, | |
| "step": 21050 | |
| }, | |
| { | |
| "epoch": 0.2355428053752076, | |
| "grad_norm": 0.2831687033176422, | |
| "learning_rate": 4.1801818264881906e-05, | |
| "loss": 0.7537, | |
| "step": 21060 | |
| }, | |
| { | |
| "epoch": 0.2356546490624703, | |
| "grad_norm": 0.2781788110733032, | |
| "learning_rate": 4.157339302846178e-05, | |
| "loss": 0.7494, | |
| "step": 21070 | |
| }, | |
| { | |
| "epoch": 0.23576649274973296, | |
| "grad_norm": 0.27109071612358093, | |
| "learning_rate": 4.1344967792041665e-05, | |
| "loss": 0.7493, | |
| "step": 21080 | |
| }, | |
| { | |
| "epoch": 0.23587833643699566, | |
| "grad_norm": 0.25398164987564087, | |
| "learning_rate": 4.111654255562155e-05, | |
| "loss": 0.7369, | |
| "step": 21090 | |
| }, | |
| { | |
| "epoch": 0.23599018012425835, | |
| "grad_norm": 0.3150353729724884, | |
| "learning_rate": 4.0888117319201424e-05, | |
| "loss": 0.754, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 0.236102023811521, | |
| "grad_norm": 0.27384257316589355, | |
| "learning_rate": 4.065969208278131e-05, | |
| "loss": 0.7439, | |
| "step": 21110 | |
| }, | |
| { | |
| "epoch": 0.2362138674987837, | |
| "grad_norm": 0.2770559787750244, | |
| "learning_rate": 4.043126684636118e-05, | |
| "loss": 0.7391, | |
| "step": 21120 | |
| }, | |
| { | |
| "epoch": 0.2363257111860464, | |
| "grad_norm": 0.29367002844810486, | |
| "learning_rate": 4.0202841609941066e-05, | |
| "loss": 0.746, | |
| "step": 21130 | |
| }, | |
| { | |
| "epoch": 0.23643755487330906, | |
| "grad_norm": 0.2554051876068115, | |
| "learning_rate": 3.997441637352095e-05, | |
| "loss": 0.7386, | |
| "step": 21140 | |
| }, | |
| { | |
| "epoch": 0.23654939856057175, | |
| "grad_norm": 0.2943428158760071, | |
| "learning_rate": 3.9745991137100825e-05, | |
| "loss": 0.7437, | |
| "step": 21150 | |
| }, | |
| { | |
| "epoch": 0.2366612422478344, | |
| "grad_norm": 0.24465301632881165, | |
| "learning_rate": 3.951756590068071e-05, | |
| "loss": 0.7331, | |
| "step": 21160 | |
| }, | |
| { | |
| "epoch": 0.2367730859350971, | |
| "grad_norm": 0.2545934021472931, | |
| "learning_rate": 3.9289140664260584e-05, | |
| "loss": 0.7361, | |
| "step": 21170 | |
| }, | |
| { | |
| "epoch": 0.2368849296223598, | |
| "grad_norm": 0.2792121469974518, | |
| "learning_rate": 3.9060715427840466e-05, | |
| "loss": 0.7238, | |
| "step": 21180 | |
| }, | |
| { | |
| "epoch": 0.23699677330962246, | |
| "grad_norm": 0.27943745255470276, | |
| "learning_rate": 3.883229019142035e-05, | |
| "loss": 0.726, | |
| "step": 21190 | |
| }, | |
| { | |
| "epoch": 0.23710861699688515, | |
| "grad_norm": 0.2514471411705017, | |
| "learning_rate": 3.8603864955000225e-05, | |
| "loss": 0.7214, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 0.23722046068414784, | |
| "grad_norm": 0.2698551416397095, | |
| "learning_rate": 3.837543971858011e-05, | |
| "loss": 0.7318, | |
| "step": 21210 | |
| }, | |
| { | |
| "epoch": 0.2373323043714105, | |
| "grad_norm": 0.29603877663612366, | |
| "learning_rate": 3.814701448215999e-05, | |
| "loss": 0.742, | |
| "step": 21220 | |
| }, | |
| { | |
| "epoch": 0.2374441480586732, | |
| "grad_norm": 0.26655495166778564, | |
| "learning_rate": 3.791858924573987e-05, | |
| "loss": 0.7331, | |
| "step": 21230 | |
| }, | |
| { | |
| "epoch": 0.2375559917459359, | |
| "grad_norm": 0.29367104172706604, | |
| "learning_rate": 3.769016400931975e-05, | |
| "loss": 0.7233, | |
| "step": 21240 | |
| }, | |
| { | |
| "epoch": 0.23766783543319855, | |
| "grad_norm": 0.2680334746837616, | |
| "learning_rate": 3.7461738772899626e-05, | |
| "loss": 0.732, | |
| "step": 21250 | |
| }, | |
| { | |
| "epoch": 0.23777967912046125, | |
| "grad_norm": 0.2748298943042755, | |
| "learning_rate": 3.723331353647951e-05, | |
| "loss": 0.7453, | |
| "step": 21260 | |
| }, | |
| { | |
| "epoch": 0.23789152280772394, | |
| "grad_norm": 0.28276947140693665, | |
| "learning_rate": 3.700488830005939e-05, | |
| "loss": 0.7524, | |
| "step": 21270 | |
| }, | |
| { | |
| "epoch": 0.2380033664949866, | |
| "grad_norm": 0.2645372450351715, | |
| "learning_rate": 3.677646306363927e-05, | |
| "loss": 0.7542, | |
| "step": 21280 | |
| }, | |
| { | |
| "epoch": 0.2381152101822493, | |
| "grad_norm": 0.2866505980491638, | |
| "learning_rate": 3.654803782721916e-05, | |
| "loss": 0.7447, | |
| "step": 21290 | |
| }, | |
| { | |
| "epoch": 0.23822705386951196, | |
| "grad_norm": 0.29611489176750183, | |
| "learning_rate": 3.631961259079903e-05, | |
| "loss": 0.7662, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 0.23833889755677465, | |
| "grad_norm": 0.29184749722480774, | |
| "learning_rate": 3.6091187354378916e-05, | |
| "loss": 0.7558, | |
| "step": 21310 | |
| }, | |
| { | |
| "epoch": 0.23845074124403734, | |
| "grad_norm": 0.27304571866989136, | |
| "learning_rate": 3.58627621179588e-05, | |
| "loss": 0.7578, | |
| "step": 21320 | |
| }, | |
| { | |
| "epoch": 0.2385625849313, | |
| "grad_norm": 0.2700962424278259, | |
| "learning_rate": 3.5634336881538675e-05, | |
| "loss": 0.7411, | |
| "step": 21330 | |
| }, | |
| { | |
| "epoch": 0.2386744286185627, | |
| "grad_norm": 0.2845793664455414, | |
| "learning_rate": 3.540591164511856e-05, | |
| "loss": 0.7392, | |
| "step": 21340 | |
| }, | |
| { | |
| "epoch": 0.2387862723058254, | |
| "grad_norm": 0.32136180996894836, | |
| "learning_rate": 3.5177486408698433e-05, | |
| "loss": 0.7431, | |
| "step": 21350 | |
| }, | |
| { | |
| "epoch": 0.23889811599308805, | |
| "grad_norm": 0.26846998929977417, | |
| "learning_rate": 3.4949061172278316e-05, | |
| "loss": 0.737, | |
| "step": 21360 | |
| }, | |
| { | |
| "epoch": 0.23900995968035074, | |
| "grad_norm": 0.26363828778266907, | |
| "learning_rate": 3.47206359358582e-05, | |
| "loss": 0.7416, | |
| "step": 21370 | |
| }, | |
| { | |
| "epoch": 0.23912180336761343, | |
| "grad_norm": 0.2900106906890869, | |
| "learning_rate": 3.4492210699438075e-05, | |
| "loss": 0.7373, | |
| "step": 21380 | |
| }, | |
| { | |
| "epoch": 0.2392336470548761, | |
| "grad_norm": 0.2762589156627655, | |
| "learning_rate": 3.426378546301796e-05, | |
| "loss": 0.7379, | |
| "step": 21390 | |
| }, | |
| { | |
| "epoch": 0.2393454907421388, | |
| "grad_norm": 0.2697104513645172, | |
| "learning_rate": 3.4035360226597834e-05, | |
| "loss": 0.7448, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 0.23945733442940148, | |
| "grad_norm": 0.2901761829853058, | |
| "learning_rate": 3.380693499017772e-05, | |
| "loss": 0.7394, | |
| "step": 21410 | |
| }, | |
| { | |
| "epoch": 0.23956917811666414, | |
| "grad_norm": 0.245674267411232, | |
| "learning_rate": 3.35785097537576e-05, | |
| "loss": 0.7387, | |
| "step": 21420 | |
| }, | |
| { | |
| "epoch": 0.23968102180392684, | |
| "grad_norm": 0.2713403105735779, | |
| "learning_rate": 3.3350084517337476e-05, | |
| "loss": 0.7604, | |
| "step": 21430 | |
| }, | |
| { | |
| "epoch": 0.2397928654911895, | |
| "grad_norm": 0.27368244528770447, | |
| "learning_rate": 3.312165928091736e-05, | |
| "loss": 0.7489, | |
| "step": 21440 | |
| }, | |
| { | |
| "epoch": 0.2399047091784522, | |
| "grad_norm": 0.3079991340637207, | |
| "learning_rate": 3.2893234044497234e-05, | |
| "loss": 0.7653, | |
| "step": 21450 | |
| }, | |
| { | |
| "epoch": 0.24001655286571488, | |
| "grad_norm": 0.2920658588409424, | |
| "learning_rate": 3.266480880807712e-05, | |
| "loss": 0.7588, | |
| "step": 21460 | |
| }, | |
| { | |
| "epoch": 0.24012839655297755, | |
| "grad_norm": 0.27589842677116394, | |
| "learning_rate": 3.2436383571657e-05, | |
| "loss": 0.7607, | |
| "step": 21470 | |
| }, | |
| { | |
| "epoch": 0.24024024024024024, | |
| "grad_norm": 0.2592112720012665, | |
| "learning_rate": 3.2207958335236876e-05, | |
| "loss": 0.745, | |
| "step": 21480 | |
| }, | |
| { | |
| "epoch": 0.24035208392750293, | |
| "grad_norm": 0.27625855803489685, | |
| "learning_rate": 3.197953309881676e-05, | |
| "loss": 0.7488, | |
| "step": 21490 | |
| }, | |
| { | |
| "epoch": 0.2404639276147656, | |
| "grad_norm": 0.2769569456577301, | |
| "learning_rate": 3.175110786239664e-05, | |
| "loss": 0.7326, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.24057577130202829, | |
| "grad_norm": 0.2705914080142975, | |
| "learning_rate": 3.152268262597652e-05, | |
| "loss": 0.7512, | |
| "step": 21510 | |
| }, | |
| { | |
| "epoch": 0.24068761498929098, | |
| "grad_norm": 0.2655676603317261, | |
| "learning_rate": 3.12942573895564e-05, | |
| "loss": 0.7366, | |
| "step": 21520 | |
| }, | |
| { | |
| "epoch": 0.24079945867655364, | |
| "grad_norm": 0.2606657147407532, | |
| "learning_rate": 3.106583215313628e-05, | |
| "loss": 0.7436, | |
| "step": 21530 | |
| }, | |
| { | |
| "epoch": 0.24091130236381633, | |
| "grad_norm": 0.27843552827835083, | |
| "learning_rate": 3.083740691671616e-05, | |
| "loss": 0.7342, | |
| "step": 21540 | |
| }, | |
| { | |
| "epoch": 0.24102314605107902, | |
| "grad_norm": 0.27866050601005554, | |
| "learning_rate": 3.060898168029604e-05, | |
| "loss": 0.7305, | |
| "step": 21550 | |
| }, | |
| { | |
| "epoch": 0.2411349897383417, | |
| "grad_norm": 0.2803070545196533, | |
| "learning_rate": 3.0380556443875918e-05, | |
| "loss": 0.727, | |
| "step": 21560 | |
| }, | |
| { | |
| "epoch": 0.24124683342560438, | |
| "grad_norm": 0.27220121026039124, | |
| "learning_rate": 3.01521312074558e-05, | |
| "loss": 0.7195, | |
| "step": 21570 | |
| }, | |
| { | |
| "epoch": 0.24135867711286707, | |
| "grad_norm": 0.26060426235198975, | |
| "learning_rate": 2.992370597103568e-05, | |
| "loss": 0.7013, | |
| "step": 21580 | |
| }, | |
| { | |
| "epoch": 0.24147052080012973, | |
| "grad_norm": 0.24253526329994202, | |
| "learning_rate": 2.969528073461556e-05, | |
| "loss": 0.6925, | |
| "step": 21590 | |
| }, | |
| { | |
| "epoch": 0.24158236448739243, | |
| "grad_norm": 0.26293566823005676, | |
| "learning_rate": 2.946685549819544e-05, | |
| "loss": 0.7028, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 0.2416942081746551, | |
| "grad_norm": 0.26427412033081055, | |
| "learning_rate": 2.923843026177532e-05, | |
| "loss": 0.6993, | |
| "step": 21610 | |
| }, | |
| { | |
| "epoch": 0.24180605186191778, | |
| "grad_norm": 0.26823869347572327, | |
| "learning_rate": 2.90100050253552e-05, | |
| "loss": 0.6999, | |
| "step": 21620 | |
| }, | |
| { | |
| "epoch": 0.24191789554918047, | |
| "grad_norm": 0.24203690886497498, | |
| "learning_rate": 2.878157978893508e-05, | |
| "loss": 0.6906, | |
| "step": 21630 | |
| }, | |
| { | |
| "epoch": 0.24202973923644314, | |
| "grad_norm": 0.2612786889076233, | |
| "learning_rate": 2.8553154552514964e-05, | |
| "loss": 0.6952, | |
| "step": 21640 | |
| }, | |
| { | |
| "epoch": 0.24214158292370583, | |
| "grad_norm": 0.27152737975120544, | |
| "learning_rate": 2.8324729316094843e-05, | |
| "loss": 0.692, | |
| "step": 21650 | |
| }, | |
| { | |
| "epoch": 0.24225342661096852, | |
| "grad_norm": 0.2592925727367401, | |
| "learning_rate": 2.8096304079674726e-05, | |
| "loss": 0.6995, | |
| "step": 21660 | |
| }, | |
| { | |
| "epoch": 0.24236527029823118, | |
| "grad_norm": 0.2419063299894333, | |
| "learning_rate": 2.7867878843254605e-05, | |
| "loss": 0.7067, | |
| "step": 21670 | |
| }, | |
| { | |
| "epoch": 0.24247711398549388, | |
| "grad_norm": 0.24731135368347168, | |
| "learning_rate": 2.7639453606834485e-05, | |
| "loss": 0.734, | |
| "step": 21680 | |
| }, | |
| { | |
| "epoch": 0.24258895767275657, | |
| "grad_norm": 0.25746017694473267, | |
| "learning_rate": 2.7411028370414364e-05, | |
| "loss": 0.7075, | |
| "step": 21690 | |
| }, | |
| { | |
| "epoch": 0.24270080136001923, | |
| "grad_norm": 0.2521972060203552, | |
| "learning_rate": 2.7182603133994244e-05, | |
| "loss": 0.7137, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 0.24281264504728192, | |
| "grad_norm": 0.26796218752861023, | |
| "learning_rate": 2.6954177897574127e-05, | |
| "loss": 0.7227, | |
| "step": 21710 | |
| }, | |
| { | |
| "epoch": 0.2429244887345446, | |
| "grad_norm": 0.30404597520828247, | |
| "learning_rate": 2.6725752661154006e-05, | |
| "loss": 0.7243, | |
| "step": 21720 | |
| }, | |
| { | |
| "epoch": 0.24303633242180728, | |
| "grad_norm": 0.29561156034469604, | |
| "learning_rate": 2.6497327424733885e-05, | |
| "loss": 0.7357, | |
| "step": 21730 | |
| }, | |
| { | |
| "epoch": 0.24314817610906997, | |
| "grad_norm": 0.28066596388816833, | |
| "learning_rate": 2.6268902188313765e-05, | |
| "loss": 0.7224, | |
| "step": 21740 | |
| }, | |
| { | |
| "epoch": 0.24326001979633263, | |
| "grad_norm": 0.29235216975212097, | |
| "learning_rate": 2.6040476951893644e-05, | |
| "loss": 0.7288, | |
| "step": 21750 | |
| }, | |
| { | |
| "epoch": 0.24337186348359532, | |
| "grad_norm": 0.26750460267066956, | |
| "learning_rate": 2.5812051715473527e-05, | |
| "loss": 0.7414, | |
| "step": 21760 | |
| }, | |
| { | |
| "epoch": 0.24348370717085802, | |
| "grad_norm": 0.2707473039627075, | |
| "learning_rate": 2.5583626479053406e-05, | |
| "loss": 0.7478, | |
| "step": 21770 | |
| }, | |
| { | |
| "epoch": 0.24359555085812068, | |
| "grad_norm": 0.26526397466659546, | |
| "learning_rate": 2.5355201242633286e-05, | |
| "loss": 0.7513, | |
| "step": 21780 | |
| }, | |
| { | |
| "epoch": 0.24370739454538337, | |
| "grad_norm": 0.2362915724515915, | |
| "learning_rate": 2.5126776006213165e-05, | |
| "loss": 0.7507, | |
| "step": 21790 | |
| }, | |
| { | |
| "epoch": 0.24381923823264606, | |
| "grad_norm": 0.2512950599193573, | |
| "learning_rate": 2.4898350769793048e-05, | |
| "loss": 0.7417, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 0.24393108191990873, | |
| "grad_norm": 0.2366458922624588, | |
| "learning_rate": 2.4669925533372928e-05, | |
| "loss": 0.7402, | |
| "step": 21810 | |
| }, | |
| { | |
| "epoch": 0.24404292560717142, | |
| "grad_norm": 0.24888353049755096, | |
| "learning_rate": 2.4441500296952807e-05, | |
| "loss": 0.7456, | |
| "step": 21820 | |
| }, | |
| { | |
| "epoch": 0.2441547692944341, | |
| "grad_norm": 0.24143491685390472, | |
| "learning_rate": 2.4213075060532686e-05, | |
| "loss": 0.7405, | |
| "step": 21830 | |
| }, | |
| { | |
| "epoch": 0.24426661298169677, | |
| "grad_norm": 0.2669823169708252, | |
| "learning_rate": 2.3984649824112566e-05, | |
| "loss": 0.7544, | |
| "step": 21840 | |
| }, | |
| { | |
| "epoch": 0.24437845666895947, | |
| "grad_norm": 0.24328452348709106, | |
| "learning_rate": 2.375622458769245e-05, | |
| "loss": 0.7347, | |
| "step": 21850 | |
| }, | |
| { | |
| "epoch": 0.24449030035622216, | |
| "grad_norm": 0.26204219460487366, | |
| "learning_rate": 2.3527799351272328e-05, | |
| "loss": 0.7397, | |
| "step": 21860 | |
| }, | |
| { | |
| "epoch": 0.24460214404348482, | |
| "grad_norm": 0.2631550431251526, | |
| "learning_rate": 2.329937411485221e-05, | |
| "loss": 0.7413, | |
| "step": 21870 | |
| }, | |
| { | |
| "epoch": 0.2447139877307475, | |
| "grad_norm": 0.2729988694190979, | |
| "learning_rate": 2.307094887843209e-05, | |
| "loss": 0.7336, | |
| "step": 21880 | |
| }, | |
| { | |
| "epoch": 0.24482583141801018, | |
| "grad_norm": 0.2702917754650116, | |
| "learning_rate": 2.284252364201197e-05, | |
| "loss": 0.7294, | |
| "step": 21890 | |
| }, | |
| { | |
| "epoch": 0.24493767510527287, | |
| "grad_norm": 0.22882196307182312, | |
| "learning_rate": 2.2614098405591852e-05, | |
| "loss": 0.7164, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 0.24504951879253556, | |
| "grad_norm": 0.2660382390022278, | |
| "learning_rate": 2.2385673169171732e-05, | |
| "loss": 0.7231, | |
| "step": 21910 | |
| }, | |
| { | |
| "epoch": 0.24516136247979822, | |
| "grad_norm": 0.2580036222934723, | |
| "learning_rate": 2.215724793275161e-05, | |
| "loss": 0.7243, | |
| "step": 21920 | |
| }, | |
| { | |
| "epoch": 0.24527320616706091, | |
| "grad_norm": 0.25490158796310425, | |
| "learning_rate": 2.192882269633149e-05, | |
| "loss": 0.7129, | |
| "step": 21930 | |
| }, | |
| { | |
| "epoch": 0.2453850498543236, | |
| "grad_norm": 0.2626509368419647, | |
| "learning_rate": 2.1700397459911374e-05, | |
| "loss": 0.7177, | |
| "step": 21940 | |
| }, | |
| { | |
| "epoch": 0.24549689354158627, | |
| "grad_norm": 0.2642146646976471, | |
| "learning_rate": 2.1471972223491253e-05, | |
| "loss": 0.7119, | |
| "step": 21950 | |
| }, | |
| { | |
| "epoch": 0.24560873722884896, | |
| "grad_norm": 0.2683079242706299, | |
| "learning_rate": 2.1243546987071132e-05, | |
| "loss": 0.7226, | |
| "step": 21960 | |
| }, | |
| { | |
| "epoch": 0.24572058091611165, | |
| "grad_norm": 0.26513761281967163, | |
| "learning_rate": 2.1015121750651012e-05, | |
| "loss": 0.7276, | |
| "step": 21970 | |
| }, | |
| { | |
| "epoch": 0.24583242460337432, | |
| "grad_norm": 0.25856319069862366, | |
| "learning_rate": 2.078669651423089e-05, | |
| "loss": 0.7168, | |
| "step": 21980 | |
| }, | |
| { | |
| "epoch": 0.245944268290637, | |
| "grad_norm": 0.29048866033554077, | |
| "learning_rate": 2.0558271277810774e-05, | |
| "loss": 0.7189, | |
| "step": 21990 | |
| }, | |
| { | |
| "epoch": 0.2460561119778997, | |
| "grad_norm": 0.2775687575340271, | |
| "learning_rate": 2.0329846041390653e-05, | |
| "loss": 0.7276, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.24616795566516236, | |
| "grad_norm": 0.30157843232154846, | |
| "learning_rate": 2.0101420804970533e-05, | |
| "loss": 0.7435, | |
| "step": 22010 | |
| }, | |
| { | |
| "epoch": 0.24627979935242506, | |
| "grad_norm": 0.2602044939994812, | |
| "learning_rate": 1.9872995568550412e-05, | |
| "loss": 0.7365, | |
| "step": 22020 | |
| }, | |
| { | |
| "epoch": 0.24639164303968772, | |
| "grad_norm": 0.29975757002830505, | |
| "learning_rate": 1.9644570332130292e-05, | |
| "loss": 0.7484, | |
| "step": 22030 | |
| }, | |
| { | |
| "epoch": 0.2465034867269504, | |
| "grad_norm": 0.26586923003196716, | |
| "learning_rate": 1.9416145095710175e-05, | |
| "loss": 0.7499, | |
| "step": 22040 | |
| }, | |
| { | |
| "epoch": 0.2466153304142131, | |
| "grad_norm": 0.25447341799736023, | |
| "learning_rate": 1.9187719859290054e-05, | |
| "loss": 0.7523, | |
| "step": 22050 | |
| }, | |
| { | |
| "epoch": 0.24672717410147577, | |
| "grad_norm": 0.2876524031162262, | |
| "learning_rate": 1.8959294622869933e-05, | |
| "loss": 0.7532, | |
| "step": 22060 | |
| }, | |
| { | |
| "epoch": 0.24683901778873846, | |
| "grad_norm": 0.29897189140319824, | |
| "learning_rate": 1.8730869386449813e-05, | |
| "loss": 0.7339, | |
| "step": 22070 | |
| }, | |
| { | |
| "epoch": 0.24695086147600115, | |
| "grad_norm": 0.24629873037338257, | |
| "learning_rate": 1.8502444150029696e-05, | |
| "loss": 0.7253, | |
| "step": 22080 | |
| }, | |
| { | |
| "epoch": 0.2470627051632638, | |
| "grad_norm": 0.2844459116458893, | |
| "learning_rate": 1.827401891360958e-05, | |
| "loss": 0.7247, | |
| "step": 22090 | |
| }, | |
| { | |
| "epoch": 0.2471745488505265, | |
| "grad_norm": 0.2798469662666321, | |
| "learning_rate": 1.8045593677189458e-05, | |
| "loss": 0.7334, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 0.2472863925377892, | |
| "grad_norm": 0.26282501220703125, | |
| "learning_rate": 1.7817168440769337e-05, | |
| "loss": 0.735, | |
| "step": 22110 | |
| }, | |
| { | |
| "epoch": 0.24739823622505186, | |
| "grad_norm": 0.25192755460739136, | |
| "learning_rate": 1.7588743204349217e-05, | |
| "loss": 0.733, | |
| "step": 22120 | |
| }, | |
| { | |
| "epoch": 0.24751007991231455, | |
| "grad_norm": 0.2808292508125305, | |
| "learning_rate": 1.73603179679291e-05, | |
| "loss": 0.7403, | |
| "step": 22130 | |
| }, | |
| { | |
| "epoch": 0.24762192359957724, | |
| "grad_norm": 0.28252866864204407, | |
| "learning_rate": 1.713189273150898e-05, | |
| "loss": 0.7296, | |
| "step": 22140 | |
| }, | |
| { | |
| "epoch": 0.2477337672868399, | |
| "grad_norm": 0.2730456590652466, | |
| "learning_rate": 1.690346749508886e-05, | |
| "loss": 0.7321, | |
| "step": 22150 | |
| }, | |
| { | |
| "epoch": 0.2478456109741026, | |
| "grad_norm": 0.2562378942966461, | |
| "learning_rate": 1.6675042258668738e-05, | |
| "loss": 0.7195, | |
| "step": 22160 | |
| }, | |
| { | |
| "epoch": 0.2479574546613653, | |
| "grad_norm": 0.2450082004070282, | |
| "learning_rate": 1.6446617022248617e-05, | |
| "loss": 0.7277, | |
| "step": 22170 | |
| }, | |
| { | |
| "epoch": 0.24806929834862795, | |
| "grad_norm": 0.25871893763542175, | |
| "learning_rate": 1.62181917858285e-05, | |
| "loss": 0.7143, | |
| "step": 22180 | |
| }, | |
| { | |
| "epoch": 0.24818114203589065, | |
| "grad_norm": 0.2587449848651886, | |
| "learning_rate": 1.598976654940838e-05, | |
| "loss": 0.708, | |
| "step": 22190 | |
| }, | |
| { | |
| "epoch": 0.2482929857231533, | |
| "grad_norm": 0.25496092438697815, | |
| "learning_rate": 1.576134131298826e-05, | |
| "loss": 0.7123, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 0.248404829410416, | |
| "grad_norm": 0.2394058257341385, | |
| "learning_rate": 1.553291607656814e-05, | |
| "loss": 0.714, | |
| "step": 22210 | |
| }, | |
| { | |
| "epoch": 0.2485166730976787, | |
| "grad_norm": 0.2560165524482727, | |
| "learning_rate": 1.530449084014802e-05, | |
| "loss": 0.7162, | |
| "step": 22220 | |
| }, | |
| { | |
| "epoch": 0.24862851678494136, | |
| "grad_norm": 0.24602052569389343, | |
| "learning_rate": 1.50760656037279e-05, | |
| "loss": 0.7408, | |
| "step": 22230 | |
| }, | |
| { | |
| "epoch": 0.24874036047220405, | |
| "grad_norm": 0.27800559997558594, | |
| "learning_rate": 1.484764036730778e-05, | |
| "loss": 0.7247, | |
| "step": 22240 | |
| }, | |
| { | |
| "epoch": 0.24885220415946674, | |
| "grad_norm": 0.24703536927700043, | |
| "learning_rate": 1.461921513088766e-05, | |
| "loss": 0.7352, | |
| "step": 22250 | |
| }, | |
| { | |
| "epoch": 0.2489640478467294, | |
| "grad_norm": 0.27936097979545593, | |
| "learning_rate": 1.439078989446754e-05, | |
| "loss": 0.7421, | |
| "step": 22260 | |
| }, | |
| { | |
| "epoch": 0.2490758915339921, | |
| "grad_norm": 0.265828400850296, | |
| "learning_rate": 1.4162364658047422e-05, | |
| "loss": 0.7234, | |
| "step": 22270 | |
| }, | |
| { | |
| "epoch": 0.24918773522125479, | |
| "grad_norm": 0.26921194791793823, | |
| "learning_rate": 1.3933939421627303e-05, | |
| "loss": 0.7414, | |
| "step": 22280 | |
| }, | |
| { | |
| "epoch": 0.24929957890851745, | |
| "grad_norm": 0.2829255163669586, | |
| "learning_rate": 1.3705514185207182e-05, | |
| "loss": 0.7378, | |
| "step": 22290 | |
| }, | |
| { | |
| "epoch": 0.24941142259578014, | |
| "grad_norm": 0.25702667236328125, | |
| "learning_rate": 1.3477088948787063e-05, | |
| "loss": 0.7475, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 0.24952326628304283, | |
| "grad_norm": 0.28925350308418274, | |
| "learning_rate": 1.3248663712366943e-05, | |
| "loss": 0.738, | |
| "step": 22310 | |
| }, | |
| { | |
| "epoch": 0.2496351099703055, | |
| "grad_norm": 0.2792825698852539, | |
| "learning_rate": 1.3020238475946822e-05, | |
| "loss": 0.7315, | |
| "step": 22320 | |
| }, | |
| { | |
| "epoch": 0.2497469536575682, | |
| "grad_norm": 0.246215358376503, | |
| "learning_rate": 1.2791813239526703e-05, | |
| "loss": 0.7391, | |
| "step": 22330 | |
| }, | |
| { | |
| "epoch": 0.24985879734483085, | |
| "grad_norm": 0.26492443680763245, | |
| "learning_rate": 1.2563388003106583e-05, | |
| "loss": 0.7478, | |
| "step": 22340 | |
| }, | |
| { | |
| "epoch": 0.24997064103209354, | |
| "grad_norm": 0.27402445673942566, | |
| "learning_rate": 1.2334962766686464e-05, | |
| "loss": 0.7528, | |
| "step": 22350 | |
| }, | |
| { | |
| "epoch": 0.25008248471935624, | |
| "grad_norm": 0.2757234573364258, | |
| "learning_rate": 1.2106537530266343e-05, | |
| "loss": 0.7306, | |
| "step": 22360 | |
| }, | |
| { | |
| "epoch": 0.2501943284066189, | |
| "grad_norm": 0.2723679840564728, | |
| "learning_rate": 1.1878112293846224e-05, | |
| "loss": 0.7472, | |
| "step": 22370 | |
| }, | |
| { | |
| "epoch": 0.2503061720938816, | |
| "grad_norm": 0.22666431963443756, | |
| "learning_rate": 1.1649687057426105e-05, | |
| "loss": 0.7443, | |
| "step": 22380 | |
| }, | |
| { | |
| "epoch": 0.25041801578114425, | |
| "grad_norm": 0.24548636376857758, | |
| "learning_rate": 1.1421261821005985e-05, | |
| "loss": 0.7525, | |
| "step": 22390 | |
| }, | |
| { | |
| "epoch": 0.25052985946840695, | |
| "grad_norm": 0.26941460371017456, | |
| "learning_rate": 1.1192836584585866e-05, | |
| "loss": 0.7482, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 0.25064170315566964, | |
| "grad_norm": 0.2741219997406006, | |
| "learning_rate": 1.0964411348165745e-05, | |
| "loss": 0.7404, | |
| "step": 22410 | |
| }, | |
| { | |
| "epoch": 0.25075354684293233, | |
| "grad_norm": 0.2622029483318329, | |
| "learning_rate": 1.0735986111745626e-05, | |
| "loss": 0.7463, | |
| "step": 22420 | |
| }, | |
| { | |
| "epoch": 0.250865390530195, | |
| "grad_norm": 0.25730788707733154, | |
| "learning_rate": 1.0507560875325506e-05, | |
| "loss": 0.7596, | |
| "step": 22430 | |
| }, | |
| { | |
| "epoch": 0.25097723421745766, | |
| "grad_norm": 0.24054691195487976, | |
| "learning_rate": 1.0279135638905387e-05, | |
| "loss": 0.7397, | |
| "step": 22440 | |
| }, | |
| { | |
| "epoch": 0.25108907790472035, | |
| "grad_norm": 0.23557224869728088, | |
| "learning_rate": 1.0050710402485266e-05, | |
| "loss": 0.7426, | |
| "step": 22450 | |
| }, | |
| { | |
| "epoch": 0.25120092159198304, | |
| "grad_norm": 0.25929298996925354, | |
| "learning_rate": 9.822285166065146e-06, | |
| "loss": 0.7402, | |
| "step": 22460 | |
| }, | |
| { | |
| "epoch": 0.25131276527924573, | |
| "grad_norm": 0.26300865411758423, | |
| "learning_rate": 9.593859929645027e-06, | |
| "loss": 0.755, | |
| "step": 22470 | |
| }, | |
| { | |
| "epoch": 0.2514246089665084, | |
| "grad_norm": 0.25753623247146606, | |
| "learning_rate": 9.365434693224906e-06, | |
| "loss": 0.7536, | |
| "step": 22480 | |
| }, | |
| { | |
| "epoch": 0.2515364526537711, | |
| "grad_norm": 0.2438272088766098, | |
| "learning_rate": 9.13700945680479e-06, | |
| "loss": 0.7528, | |
| "step": 22490 | |
| }, | |
| { | |
| "epoch": 0.25164829634103375, | |
| "grad_norm": 0.2870919406414032, | |
| "learning_rate": 8.908584220384669e-06, | |
| "loss": 0.772, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.25176014002829644, | |
| "grad_norm": 0.2551197111606598, | |
| "learning_rate": 8.68015898396455e-06, | |
| "loss": 0.7571, | |
| "step": 22510 | |
| }, | |
| { | |
| "epoch": 0.25187198371555913, | |
| "grad_norm": 0.24423009157180786, | |
| "learning_rate": 8.45173374754443e-06, | |
| "loss": 0.7548, | |
| "step": 22520 | |
| }, | |
| { | |
| "epoch": 0.2519838274028218, | |
| "grad_norm": 0.2683405578136444, | |
| "learning_rate": 8.223308511124309e-06, | |
| "loss": 0.7631, | |
| "step": 22530 | |
| }, | |
| { | |
| "epoch": 0.2520956710900845, | |
| "grad_norm": 0.25919967889785767, | |
| "learning_rate": 7.99488327470419e-06, | |
| "loss": 0.7556, | |
| "step": 22540 | |
| }, | |
| { | |
| "epoch": 0.25220751477734715, | |
| "grad_norm": 0.25076591968536377, | |
| "learning_rate": 7.76645803828407e-06, | |
| "loss": 0.7528, | |
| "step": 22550 | |
| }, | |
| { | |
| "epoch": 0.25231935846460984, | |
| "grad_norm": 0.2598860561847687, | |
| "learning_rate": 7.53803280186395e-06, | |
| "loss": 0.7565, | |
| "step": 22560 | |
| }, | |
| { | |
| "epoch": 0.25243120215187254, | |
| "grad_norm": 0.30933788418769836, | |
| "learning_rate": 7.30960756544383e-06, | |
| "loss": 0.7645, | |
| "step": 22570 | |
| }, | |
| { | |
| "epoch": 0.2525430458391352, | |
| "grad_norm": 0.26472121477127075, | |
| "learning_rate": 7.081182329023711e-06, | |
| "loss": 0.7559, | |
| "step": 22580 | |
| }, | |
| { | |
| "epoch": 0.2526548895263979, | |
| "grad_norm": 0.28362420201301575, | |
| "learning_rate": 6.852757092603591e-06, | |
| "loss": 0.7618, | |
| "step": 22590 | |
| }, | |
| { | |
| "epoch": 0.2527667332136606, | |
| "grad_norm": 0.27758538722991943, | |
| "learning_rate": 6.624331856183471e-06, | |
| "loss": 0.7656, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 0.25287857690092325, | |
| "grad_norm": 0.28303948044776917, | |
| "learning_rate": 6.395906619763352e-06, | |
| "loss": 0.7672, | |
| "step": 22610 | |
| }, | |
| { | |
| "epoch": 0.25299042058818594, | |
| "grad_norm": 0.2938460409641266, | |
| "learning_rate": 6.167481383343232e-06, | |
| "loss": 0.7662, | |
| "step": 22620 | |
| }, | |
| { | |
| "epoch": 0.25310226427544863, | |
| "grad_norm": 0.25707969069480896, | |
| "learning_rate": 5.939056146923112e-06, | |
| "loss": 0.7667, | |
| "step": 22630 | |
| }, | |
| { | |
| "epoch": 0.2532141079627113, | |
| "grad_norm": 0.2813314199447632, | |
| "learning_rate": 5.710630910502992e-06, | |
| "loss": 0.7645, | |
| "step": 22640 | |
| }, | |
| { | |
| "epoch": 0.253325951649974, | |
| "grad_norm": 0.2911704480648041, | |
| "learning_rate": 5.482205674082873e-06, | |
| "loss": 0.763, | |
| "step": 22650 | |
| }, | |
| { | |
| "epoch": 0.2534377953372367, | |
| "grad_norm": 0.2982921600341797, | |
| "learning_rate": 5.253780437662753e-06, | |
| "loss": 0.7606, | |
| "step": 22660 | |
| }, | |
| { | |
| "epoch": 0.25354963902449934, | |
| "grad_norm": 0.2803521156311035, | |
| "learning_rate": 5.025355201242633e-06, | |
| "loss": 0.7617, | |
| "step": 22670 | |
| }, | |
| { | |
| "epoch": 0.25366148271176203, | |
| "grad_norm": 0.26502448320388794, | |
| "learning_rate": 4.7969299648225135e-06, | |
| "loss": 0.7802, | |
| "step": 22680 | |
| }, | |
| { | |
| "epoch": 0.2537733263990247, | |
| "grad_norm": 0.27778494358062744, | |
| "learning_rate": 4.568504728402395e-06, | |
| "loss": 0.7776, | |
| "step": 22690 | |
| }, | |
| { | |
| "epoch": 0.2538851700862874, | |
| "grad_norm": 0.27522069215774536, | |
| "learning_rate": 4.340079491982275e-06, | |
| "loss": 0.7712, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 0.2539970137735501, | |
| "grad_norm": 0.2718433141708374, | |
| "learning_rate": 4.111654255562154e-06, | |
| "loss": 0.7696, | |
| "step": 22710 | |
| }, | |
| { | |
| "epoch": 0.25410885746081274, | |
| "grad_norm": 0.35057663917541504, | |
| "learning_rate": 3.883229019142035e-06, | |
| "loss": 0.7648, | |
| "step": 22720 | |
| }, | |
| { | |
| "epoch": 0.25422070114807543, | |
| "grad_norm": 0.274494469165802, | |
| "learning_rate": 3.654803782721915e-06, | |
| "loss": 0.7578, | |
| "step": 22730 | |
| }, | |
| { | |
| "epoch": 0.2543325448353381, | |
| "grad_norm": 0.2570250928401947, | |
| "learning_rate": 3.4263785463017955e-06, | |
| "loss": 0.7502, | |
| "step": 22740 | |
| }, | |
| { | |
| "epoch": 0.2544443885226008, | |
| "grad_norm": 0.290217787027359, | |
| "learning_rate": 3.197953309881676e-06, | |
| "loss": 0.7607, | |
| "step": 22750 | |
| }, | |
| { | |
| "epoch": 0.2545562322098635, | |
| "grad_norm": 0.25752514600753784, | |
| "learning_rate": 2.969528073461556e-06, | |
| "loss": 0.7612, | |
| "step": 22760 | |
| }, | |
| { | |
| "epoch": 0.2546680758971262, | |
| "grad_norm": 0.23857931792736053, | |
| "learning_rate": 2.7411028370414363e-06, | |
| "loss": 0.7495, | |
| "step": 22770 | |
| }, | |
| { | |
| "epoch": 0.25477991958438884, | |
| "grad_norm": 0.26004472374916077, | |
| "learning_rate": 2.5126776006213166e-06, | |
| "loss": 0.7477, | |
| "step": 22780 | |
| }, | |
| { | |
| "epoch": 0.25489176327165153, | |
| "grad_norm": 0.25449565052986145, | |
| "learning_rate": 2.2842523642011973e-06, | |
| "loss": 0.7379, | |
| "step": 22790 | |
| }, | |
| { | |
| "epoch": 0.2550036069589142, | |
| "grad_norm": 0.2568104565143585, | |
| "learning_rate": 2.055827127781077e-06, | |
| "loss": 0.7407, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 0.2551154506461769, | |
| "grad_norm": 0.253451406955719, | |
| "learning_rate": 1.8274018913609574e-06, | |
| "loss": 0.7241, | |
| "step": 22810 | |
| }, | |
| { | |
| "epoch": 0.2552272943334396, | |
| "grad_norm": 0.25928062200546265, | |
| "learning_rate": 1.598976654940838e-06, | |
| "loss": 0.7502, | |
| "step": 22820 | |
| }, | |
| { | |
| "epoch": 0.2553391380207023, | |
| "grad_norm": 0.24965140223503113, | |
| "learning_rate": 1.3705514185207182e-06, | |
| "loss": 0.7417, | |
| "step": 22830 | |
| }, | |
| { | |
| "epoch": 0.25545098170796493, | |
| "grad_norm": 0.2660306394100189, | |
| "learning_rate": 1.1421261821005987e-06, | |
| "loss": 0.7463, | |
| "step": 22840 | |
| }, | |
| { | |
| "epoch": 0.2555628253952276, | |
| "grad_norm": 0.25784334540367126, | |
| "learning_rate": 9.137009456804787e-07, | |
| "loss": 0.7379, | |
| "step": 22850 | |
| }, | |
| { | |
| "epoch": 0.2556746690824903, | |
| "grad_norm": 0.27776214480400085, | |
| "learning_rate": 6.852757092603591e-07, | |
| "loss": 0.7562, | |
| "step": 22860 | |
| }, | |
| { | |
| "epoch": 0.255786512769753, | |
| "grad_norm": 0.24403463304042816, | |
| "learning_rate": 4.5685047284023936e-07, | |
| "loss": 0.7427, | |
| "step": 22870 | |
| }, | |
| { | |
| "epoch": 0.2558983564570157, | |
| "grad_norm": 0.24544622004032135, | |
| "learning_rate": 2.2842523642011968e-07, | |
| "loss": 0.748, | |
| "step": 22880 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 22889, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.946484739580887e+17, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |