Upload folder using huggingface_hub
Browse files- checkpoint-21500/1_Pooling/config.json +10 -0
 - checkpoint-21500/README.md +605 -0
 - checkpoint-21500/config.json +26 -0
 - checkpoint-21500/config_sentence_transformers.json +10 -0
 - checkpoint-21500/model.safetensors +3 -0
 - checkpoint-21500/modules.json +20 -0
 - checkpoint-21500/optimizer.pt +3 -0
 - checkpoint-21500/rng_state.pth +3 -0
 - checkpoint-21500/scheduler.pt +3 -0
 - checkpoint-21500/sentence_bert_config.json +4 -0
 - checkpoint-21500/special_tokens_map.json +37 -0
 - checkpoint-21500/tokenizer.json +0 -0
 - checkpoint-21500/tokenizer_config.json +64 -0
 - checkpoint-21500/trainer_state.json +1538 -0
 - checkpoint-21500/training_args.bin +3 -0
 - checkpoint-21500/vocab.txt +0 -0
 
    	
        checkpoint-21500/1_Pooling/config.json
    ADDED
    
    | 
         @@ -0,0 +1,10 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
              "word_embedding_dimension": 384,
         
     | 
| 3 | 
         
            +
              "pooling_mode_cls_token": false,
         
     | 
| 4 | 
         
            +
              "pooling_mode_mean_tokens": true,
         
     | 
| 5 | 
         
            +
              "pooling_mode_max_tokens": false,
         
     | 
| 6 | 
         
            +
              "pooling_mode_mean_sqrt_len_tokens": false,
         
     | 
| 7 | 
         
            +
              "pooling_mode_weightedmean_tokens": false,
         
     | 
| 8 | 
         
            +
              "pooling_mode_lasttoken": false,
         
     | 
| 9 | 
         
            +
              "include_prompt": true
         
     | 
| 10 | 
         
            +
            }
         
     | 
    	
        checkpoint-21500/README.md
    ADDED
    
    | 
         @@ -0,0 +1,605 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            ---
         
     | 
| 2 | 
         
            +
            base_model: sentence-transformers/all-MiniLM-L6-v2
         
     | 
| 3 | 
         
            +
            datasets:
         
     | 
| 4 | 
         
            +
            - youssefkhalil320/pairs_three_scores_v5
         
     | 
| 5 | 
         
            +
            language:
         
     | 
| 6 | 
         
            +
            - en
         
     | 
| 7 | 
         
            +
            library_name: sentence-transformers
         
     | 
| 8 | 
         
            +
            license: apache-2.0
         
     | 
| 9 | 
         
            +
            pipeline_tag: sentence-similarity
         
     | 
| 10 | 
         
            +
            tags:
         
     | 
| 11 | 
         
            +
            - sentence-transformers
         
     | 
| 12 | 
         
            +
            - sentence-similarity
         
     | 
| 13 | 
         
            +
            - feature-extraction
         
     | 
| 14 | 
         
            +
            - generated_from_trainer
         
     | 
| 15 | 
         
            +
            - dataset_size:80000003
         
     | 
| 16 | 
         
            +
            - loss:CoSENTLoss
         
     | 
| 17 | 
         
            +
            widget:
         
     | 
| 18 | 
         
            +
            - source_sentence: durable pvc swim ring
         
     | 
| 19 | 
         
            +
              sentences:
         
     | 
| 20 | 
         
            +
              - flaky croissant
         
     | 
| 21 | 
         
            +
              - urban shoes
         
     | 
| 22 | 
         
            +
              - warm drinks mug
         
     | 
| 23 | 
         
            +
            - source_sentence: iso mak retard capsules
         
     | 
| 24 | 
         
            +
              sentences:
         
     | 
| 25 | 
         
            +
              - savory baguette
         
     | 
| 26 | 
         
            +
              - shea butter body cream
         
     | 
| 27 | 
         
            +
              - softwheeled cruiser
         
     | 
| 28 | 
         
            +
            - source_sentence: love sandra potty
         
     | 
| 29 | 
         
            +
              sentences:
         
     | 
| 30 | 
         
            +
              - utensil holder
         
     | 
| 31 | 
         
            +
              - olive pants
         
     | 
| 32 | 
         
            +
              - headwear
         
     | 
| 33 | 
         
            +
            - source_sentence: dusky hair brush
         
     | 
| 34 | 
         
            +
              sentences:
         
     | 
| 35 | 
         
            +
              - back compartment laptop
         
     | 
| 36 | 
         
            +
              - rubber feet platter
         
     | 
| 37 | 
         
            +
              - honed blade knife
         
     | 
| 38 | 
         
            +
            - source_sentence: nkd skn
         
     | 
| 39 | 
         
            +
              sentences:
         
     | 
| 40 | 
         
            +
              - fruit fragrances nail polish remover
         
     | 
| 41 | 
         
            +
              - panini salmon
         
     | 
| 42 | 
         
            +
              - hand drawing bag
         
     | 
| 43 | 
         
            +
            ---
         
     | 
| 44 | 
         
            +
             
     | 
| 45 | 
         
            +
            # all-MiniLM-L6-v8-pair_score
         
     | 
| 46 | 
         
            +
             
     | 
| 47 | 
         
            +
            This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) on the [pairs_three_scores_v5](https://huggingface.co/datasets/youssefkhalil320/pairs_three_scores_v5) dataset. It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
         
     | 
| 48 | 
         
            +
             
     | 
| 49 | 
         
            +
            ## Model Details
         
     | 
| 50 | 
         
            +
             
     | 
| 51 | 
         
            +
            ### Model Description
         
     | 
| 52 | 
         
            +
            - **Model Type:** Sentence Transformer
         
     | 
| 53 | 
         
            +
            - **Base model:** [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) <!-- at revision c9745ed1d9f207416be6d2e6f8de32d1f16199bf -->
         
     | 
| 54 | 
         
            +
            - **Maximum Sequence Length:** 256 tokens
         
     | 
| 55 | 
         
            +
            - **Output Dimensionality:** 384 tokens
         
     | 
| 56 | 
         
            +
            - **Similarity Function:** Cosine Similarity
         
     | 
| 57 | 
         
            +
            - **Training Dataset:**
         
     | 
| 58 | 
         
            +
                - [pairs_three_scores_v5](https://huggingface.co/datasets/youssefkhalil320/pairs_three_scores_v5)
         
     | 
| 59 | 
         
            +
            - **Language:** en
         
     | 
| 60 | 
         
            +
            - **License:** apache-2.0
         
     | 
| 61 | 
         
            +
             
     | 
| 62 | 
         
            +
            ### Model Sources
         
     | 
| 63 | 
         
            +
             
     | 
| 64 | 
         
            +
            - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
         
     | 
| 65 | 
         
            +
            - **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
         
     | 
| 66 | 
         
            +
            - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
         
     | 
| 67 | 
         
            +
             
     | 
| 68 | 
         
            +
            ### Full Model Architecture
         
     | 
| 69 | 
         
            +
             
     | 
| 70 | 
         
            +
            ```
         
     | 
| 71 | 
         
            +
            SentenceTransformer(
         
     | 
| 72 | 
         
            +
              (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
         
     | 
| 73 | 
         
            +
              (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
         
     | 
| 74 | 
         
            +
              (2): Normalize()
         
     | 
| 75 | 
         
            +
            )
         
     | 
| 76 | 
         
            +
            ```
         
     | 
| 77 | 
         
            +
             
     | 
| 78 | 
         
            +
            ## Usage
         
     | 
| 79 | 
         
            +
             
     | 
| 80 | 
         
            +
            ### Direct Usage (Sentence Transformers)
         
     | 
| 81 | 
         
            +
             
     | 
| 82 | 
         
            +
            First install the Sentence Transformers library:
         
     | 
| 83 | 
         
            +
             
     | 
| 84 | 
         
            +
            ```bash
         
     | 
| 85 | 
         
            +
            pip install -U sentence-transformers
         
     | 
| 86 | 
         
            +
            ```
         
     | 
| 87 | 
         
            +
             
     | 
| 88 | 
         
            +
            Then you can load this model and run inference.
         
     | 
| 89 | 
         
            +
            ```python
         
     | 
| 90 | 
         
            +
            from sentence_transformers import SentenceTransformer
         
     | 
| 91 | 
         
            +
             
     | 
| 92 | 
         
            +
            # Download from the 🤗 Hub
         
     | 
| 93 | 
         
            +
            model = SentenceTransformer("sentence_transformers_model_id")
         
     | 
| 94 | 
         
            +
            # Run inference
         
     | 
| 95 | 
         
            +
            sentences = [
         
     | 
| 96 | 
         
            +
                'nkd skn',
         
     | 
| 97 | 
         
            +
                'hand drawing bag',
         
     | 
| 98 | 
         
            +
                'panini salmon',
         
     | 
| 99 | 
         
            +
            ]
         
     | 
| 100 | 
         
            +
            embeddings = model.encode(sentences)
         
     | 
| 101 | 
         
            +
            print(embeddings.shape)
         
     | 
| 102 | 
         
            +
            # [3, 384]
         
     | 
| 103 | 
         
            +
             
     | 
| 104 | 
         
            +
            # Get the similarity scores for the embeddings
         
     | 
| 105 | 
         
            +
            similarities = model.similarity(embeddings, embeddings)
         
     | 
| 106 | 
         
            +
            print(similarities.shape)
         
     | 
| 107 | 
         
            +
            # [3, 3]
         
     | 
| 108 | 
         
            +
            ```
         
     | 
| 109 | 
         
            +
             
     | 
| 110 | 
         
            +
            <!--
         
     | 
| 111 | 
         
            +
            ### Direct Usage (Transformers)
         
     | 
| 112 | 
         
            +
             
     | 
| 113 | 
         
            +
            <details><summary>Click to see the direct usage in Transformers</summary>
         
     | 
| 114 | 
         
            +
             
     | 
| 115 | 
         
            +
            </details>
         
     | 
| 116 | 
         
            +
            -->
         
     | 
| 117 | 
         
            +
             
     | 
| 118 | 
         
            +
            <!--
         
     | 
| 119 | 
         
            +
            ### Downstream Usage (Sentence Transformers)
         
     | 
| 120 | 
         
            +
             
     | 
| 121 | 
         
            +
            You can finetune this model on your own dataset.
         
     | 
| 122 | 
         
            +
             
     | 
| 123 | 
         
            +
            <details><summary>Click to expand</summary>
         
     | 
| 124 | 
         
            +
             
     | 
| 125 | 
         
            +
            </details>
         
     | 
| 126 | 
         
            +
            -->
         
     | 
| 127 | 
         
            +
             
     | 
| 128 | 
         
            +
            <!--
         
     | 
| 129 | 
         
            +
            ### Out-of-Scope Use
         
     | 
| 130 | 
         
            +
             
     | 
| 131 | 
         
            +
            *List how the model may foreseeably be misused and address what users ought not to do with the model.*
         
     | 
| 132 | 
         
            +
            -->
         
     | 
| 133 | 
         
            +
             
     | 
| 134 | 
         
            +
            <!--
         
     | 
| 135 | 
         
            +
            ## Bias, Risks and Limitations
         
     | 
| 136 | 
         
            +
             
     | 
| 137 | 
         
            +
            *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
         
     | 
| 138 | 
         
            +
            -->
         
     | 
| 139 | 
         
            +
             
     | 
| 140 | 
         
            +
            <!--
         
     | 
| 141 | 
         
            +
            ### Recommendations
         
     | 
| 142 | 
         
            +
             
     | 
| 143 | 
         
            +
            *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
         
     | 
| 144 | 
         
            +
            -->
         
     | 
| 145 | 
         
            +
             
     | 
| 146 | 
         
            +
            ## Training Details
         
     | 
| 147 | 
         
            +
             
     | 
| 148 | 
         
            +
            ### Training Dataset
         
     | 
| 149 | 
         
            +
             
     | 
| 150 | 
         
            +
            #### pairs_three_scores_v5
         
     | 
| 151 | 
         
            +
             
     | 
| 152 | 
         
            +
            * Dataset: [pairs_three_scores_v5](https://huggingface.co/datasets/youssefkhalil320/pairs_three_scores_v5) at [3d8c457](https://huggingface.co/datasets/youssefkhalil320/pairs_three_scores_v5/tree/3d8c45703846bd2adfaaf422abafbc389b283de1)
         
     | 
| 153 | 
         
            +
            * Size: 80,000,003 training samples
         
     | 
| 154 | 
         
            +
            * Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>score</code>
         
     | 
| 155 | 
         
            +
            * Approximate statistics based on the first 1000 samples:
         
     | 
| 156 | 
         
            +
              |         | sentence1                                                                        | sentence2                                                                        | score                                                          |
         
     | 
| 157 | 
         
            +
              |:--------|:---------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|:---------------------------------------------------------------|
         
     | 
| 158 | 
         
            +
              | type    | string                                                                           | string                                                                           | float                                                          |
         
     | 
| 159 | 
         
            +
              | details | <ul><li>min: 3 tokens</li><li>mean: 6.06 tokens</li><li>max: 12 tokens</li></ul> | <ul><li>min: 3 tokens</li><li>mean: 5.71 tokens</li><li>max: 13 tokens</li></ul> | <ul><li>min: 0.0</li><li>mean: 0.11</li><li>max: 1.0</li></ul> |
         
     | 
| 160 | 
         
            +
            * Samples:
         
     | 
| 161 | 
         
            +
              | sentence1                            | sentence2                              | score            |
         
     | 
| 162 | 
         
            +
              |:-------------------------------------|:---------------------------------------|:-----------------|
         
     | 
| 163 | 
         
            +
              | <code>vanilla hair cream</code>      | <code>free of paraben hair mask</code> | <code>0.5</code> |
         
     | 
| 164 | 
         
            +
              | <code>nourishing shampoo</code>      | <code>cumin lemon tea</code>           | <code>0.0</code> |
         
     | 
| 165 | 
         
            +
              | <code>safe materials pacifier</code> | <code>facial serum</code>              | <code>0.5</code> |
         
     | 
| 166 | 
         
            +
            * Loss: [<code>CoSENTLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cosentloss) with these parameters:
         
     | 
| 167 | 
         
            +
              ```json
         
     | 
| 168 | 
         
            +
              {
         
     | 
| 169 | 
         
            +
                  "scale": 20.0,
         
     | 
| 170 | 
         
            +
                  "similarity_fct": "pairwise_cos_sim"
         
     | 
| 171 | 
         
            +
              }
         
     | 
| 172 | 
         
            +
              ```
         
     | 
| 173 | 
         
            +
             
     | 
| 174 | 
         
            +
            ### Evaluation Dataset
         
     | 
| 175 | 
         
            +
             
     | 
| 176 | 
         
            +
            #### pairs_three_scores_v5
         
     | 
| 177 | 
         
            +
             
     | 
| 178 | 
         
            +
            * Dataset: [pairs_three_scores_v5](https://huggingface.co/datasets/youssefkhalil320/pairs_three_scores_v5) at [3d8c457](https://huggingface.co/datasets/youssefkhalil320/pairs_three_scores_v5/tree/3d8c45703846bd2adfaaf422abafbc389b283de1)
         
     | 
| 179 | 
         
            +
            * Size: 20,000,001 evaluation samples
         
     | 
| 180 | 
         
            +
            * Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>score</code>
         
     | 
| 181 | 
         
            +
            * Approximate statistics based on the first 1000 samples:
         
     | 
| 182 | 
         
            +
              |         | sentence1                                                                        | sentence2                                                                        | score                                                          |
         
     | 
| 183 | 
         
            +
              |:--------|:---------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|:---------------------------------------------------------------|
         
     | 
| 184 | 
         
            +
              | type    | string                                                                           | string                                                                           | float                                                          |
         
     | 
| 185 | 
         
            +
              | details | <ul><li>min: 3 tokens</li><li>mean: 6.21 tokens</li><li>max: 12 tokens</li></ul> | <ul><li>min: 3 tokens</li><li>mean: 5.75 tokens</li><li>max: 12 tokens</li></ul> | <ul><li>min: 0.0</li><li>mean: 0.11</li><li>max: 1.0</li></ul> |
         
     | 
| 186 | 
         
            +
            * Samples:
         
     | 
| 187 | 
         
            +
              | sentence1                               | sentence2                          | score            |
         
     | 
| 188 | 
         
            +
              |:----------------------------------------|:-----------------------------------|:-----------------|
         
     | 
| 189 | 
         
            +
              | <code>teddy bear toy</code>             | <code>long lasting cat food</code> | <code>0.0</code> |
         
     | 
| 190 | 
         
            +
              | <code>eva hair treatment</code>         | <code>fresh pineapple</code>       | <code>0.0</code> |
         
     | 
| 191 | 
         
            +
              | <code>soft wave hair conditioner</code> | <code>hybrid seat bike</code>      | <code>0.0</code> |
         
     | 
| 192 | 
         
            +
            * Loss: [<code>CoSENTLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cosentloss) with these parameters:
         
     | 
| 193 | 
         
            +
              ```json
         
     | 
| 194 | 
         
            +
              {
         
     | 
| 195 | 
         
            +
                  "scale": 20.0,
         
     | 
| 196 | 
         
            +
                  "similarity_fct": "pairwise_cos_sim"
         
     | 
| 197 | 
         
            +
              }
         
     | 
| 198 | 
         
            +
              ```
         
     | 
| 199 | 
         
            +
             
     | 
| 200 | 
         
            +
            ### Training Hyperparameters
         
     | 
| 201 | 
         
            +
            #### Non-Default Hyperparameters
         
     | 
| 202 | 
         
            +
             
     | 
| 203 | 
         
            +
            - `eval_strategy`: steps
         
     | 
| 204 | 
         
            +
            - `per_device_train_batch_size`: 128
         
     | 
| 205 | 
         
            +
            - `per_device_eval_batch_size`: 128
         
     | 
| 206 | 
         
            +
            - `learning_rate`: 2e-05
         
     | 
| 207 | 
         
            +
            - `num_train_epochs`: 1
         
     | 
| 208 | 
         
            +
            - `warmup_ratio`: 0.1
         
     | 
| 209 | 
         
            +
            - `fp16`: True
         
     | 
| 210 | 
         
            +
             
     | 
| 211 | 
         
            +
            #### All Hyperparameters
         
     | 
| 212 | 
         
            +
            <details><summary>Click to expand</summary>
         
     | 
| 213 | 
         
            +
             
     | 
| 214 | 
         
            +
            - `overwrite_output_dir`: False
         
     | 
| 215 | 
         
            +
            - `do_predict`: False
         
     | 
| 216 | 
         
            +
            - `eval_strategy`: steps
         
     | 
| 217 | 
         
            +
            - `prediction_loss_only`: True
         
     | 
| 218 | 
         
            +
            - `per_device_train_batch_size`: 128
         
     | 
| 219 | 
         
            +
            - `per_device_eval_batch_size`: 128
         
     | 
| 220 | 
         
            +
            - `per_gpu_train_batch_size`: None
         
     | 
| 221 | 
         
            +
            - `per_gpu_eval_batch_size`: None
         
     | 
| 222 | 
         
            +
            - `gradient_accumulation_steps`: 1
         
     | 
| 223 | 
         
            +
            - `eval_accumulation_steps`: None
         
     | 
| 224 | 
         
            +
            - `torch_empty_cache_steps`: None
         
     | 
| 225 | 
         
            +
            - `learning_rate`: 2e-05
         
     | 
| 226 | 
         
            +
            - `weight_decay`: 0.0
         
     | 
| 227 | 
         
            +
            - `adam_beta1`: 0.9
         
     | 
| 228 | 
         
            +
            - `adam_beta2`: 0.999
         
     | 
| 229 | 
         
            +
            - `adam_epsilon`: 1e-08
         
     | 
| 230 | 
         
            +
            - `max_grad_norm`: 1.0
         
     | 
| 231 | 
         
            +
            - `num_train_epochs`: 1
         
     | 
| 232 | 
         
            +
            - `max_steps`: -1
         
     | 
| 233 | 
         
            +
            - `lr_scheduler_type`: linear
         
     | 
| 234 | 
         
            +
            - `lr_scheduler_kwargs`: {}
         
     | 
| 235 | 
         
            +
            - `warmup_ratio`: 0.1
         
     | 
| 236 | 
         
            +
            - `warmup_steps`: 0
         
     | 
| 237 | 
         
            +
            - `log_level`: passive
         
     | 
| 238 | 
         
            +
            - `log_level_replica`: warning
         
     | 
| 239 | 
         
            +
            - `log_on_each_node`: True
         
     | 
| 240 | 
         
            +
            - `logging_nan_inf_filter`: True
         
     | 
| 241 | 
         
            +
            - `save_safetensors`: True
         
     | 
| 242 | 
         
            +
            - `save_on_each_node`: False
         
     | 
| 243 | 
         
            +
            - `save_only_model`: False
         
     | 
| 244 | 
         
            +
            - `restore_callback_states_from_checkpoint`: False
         
     | 
| 245 | 
         
            +
            - `no_cuda`: False
         
     | 
| 246 | 
         
            +
            - `use_cpu`: False
         
     | 
| 247 | 
         
            +
            - `use_mps_device`: False
         
     | 
| 248 | 
         
            +
            - `seed`: 42
         
     | 
| 249 | 
         
            +
            - `data_seed`: None
         
     | 
| 250 | 
         
            +
            - `jit_mode_eval`: False
         
     | 
| 251 | 
         
            +
            - `use_ipex`: False
         
     | 
| 252 | 
         
            +
            - `bf16`: False
         
     | 
| 253 | 
         
            +
            - `fp16`: True
         
     | 
| 254 | 
         
            +
            - `fp16_opt_level`: O1
         
     | 
| 255 | 
         
            +
            - `half_precision_backend`: auto
         
     | 
| 256 | 
         
            +
            - `bf16_full_eval`: False
         
     | 
| 257 | 
         
            +
            - `fp16_full_eval`: False
         
     | 
| 258 | 
         
            +
            - `tf32`: None
         
     | 
| 259 | 
         
            +
            - `local_rank`: 0
         
     | 
| 260 | 
         
            +
            - `ddp_backend`: None
         
     | 
| 261 | 
         
            +
            - `tpu_num_cores`: None
         
     | 
| 262 | 
         
            +
            - `tpu_metrics_debug`: False
         
     | 
| 263 | 
         
            +
            - `debug`: []
         
     | 
| 264 | 
         
            +
            - `dataloader_drop_last`: False
         
     | 
| 265 | 
         
            +
            - `dataloader_num_workers`: 0
         
     | 
| 266 | 
         
            +
            - `dataloader_prefetch_factor`: None
         
     | 
| 267 | 
         
            +
            - `past_index`: -1
         
     | 
| 268 | 
         
            +
            - `disable_tqdm`: False
         
     | 
| 269 | 
         
            +
            - `remove_unused_columns`: True
         
     | 
| 270 | 
         
            +
            - `label_names`: None
         
     | 
| 271 | 
         
            +
            - `load_best_model_at_end`: False
         
     | 
| 272 | 
         
            +
            - `ignore_data_skip`: False
         
     | 
| 273 | 
         
            +
            - `fsdp`: []
         
     | 
| 274 | 
         
            +
            - `fsdp_min_num_params`: 0
         
     | 
| 275 | 
         
            +
            - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
         
     | 
| 276 | 
         
            +
            - `fsdp_transformer_layer_cls_to_wrap`: None
         
     | 
| 277 | 
         
            +
            - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
         
     | 
| 278 | 
         
            +
            - `deepspeed`: None
         
     | 
| 279 | 
         
            +
            - `label_smoothing_factor`: 0.0
         
     | 
| 280 | 
         
            +
            - `optim`: adamw_torch
         
     | 
| 281 | 
         
            +
            - `optim_args`: None
         
     | 
| 282 | 
         
            +
            - `adafactor`: False
         
     | 
| 283 | 
         
            +
            - `group_by_length`: False
         
     | 
| 284 | 
         
            +
            - `length_column_name`: length
         
     | 
| 285 | 
         
            +
            - `ddp_find_unused_parameters`: None
         
     | 
| 286 | 
         
            +
            - `ddp_bucket_cap_mb`: None
         
     | 
| 287 | 
         
            +
            - `ddp_broadcast_buffers`: False
         
     | 
| 288 | 
         
            +
            - `dataloader_pin_memory`: True
         
     | 
| 289 | 
         
            +
            - `dataloader_persistent_workers`: False
         
     | 
| 290 | 
         
            +
            - `skip_memory_metrics`: True
         
     | 
| 291 | 
         
            +
            - `use_legacy_prediction_loop`: False
         
     | 
| 292 | 
         
            +
            - `push_to_hub`: False
         
     | 
| 293 | 
         
            +
            - `resume_from_checkpoint`: None
         
     | 
| 294 | 
         
            +
            - `hub_model_id`: None
         
     | 
| 295 | 
         
            +
            - `hub_strategy`: every_save
         
     | 
| 296 | 
         
            +
            - `hub_private_repo`: False
         
     | 
| 297 | 
         
            +
            - `hub_always_push`: False
         
     | 
| 298 | 
         
            +
            - `gradient_checkpointing`: False
         
     | 
| 299 | 
         
            +
            - `gradient_checkpointing_kwargs`: None
         
     | 
| 300 | 
         
            +
            - `include_inputs_for_metrics`: False
         
     | 
| 301 | 
         
            +
            - `eval_do_concat_batches`: True
         
     | 
| 302 | 
         
            +
            - `fp16_backend`: auto
         
     | 
| 303 | 
         
            +
            - `push_to_hub_model_id`: None
         
     | 
| 304 | 
         
            +
            - `push_to_hub_organization`: None
         
     | 
| 305 | 
         
            +
            - `mp_parameters`: 
         
     | 
| 306 | 
         
            +
            - `auto_find_batch_size`: False
         
     | 
| 307 | 
         
            +
            - `full_determinism`: False
         
     | 
| 308 | 
         
            +
            - `torchdynamo`: None
         
     | 
| 309 | 
         
            +
            - `ray_scope`: last
         
     | 
| 310 | 
         
            +
            - `ddp_timeout`: 1800
         
     | 
| 311 | 
         
            +
            - `torch_compile`: False
         
     | 
| 312 | 
         
            +
            - `torch_compile_backend`: None
         
     | 
| 313 | 
         
            +
            - `torch_compile_mode`: None
         
     | 
| 314 | 
         
            +
            - `dispatch_batches`: None
         
     | 
| 315 | 
         
            +
            - `split_batches`: None
         
     | 
| 316 | 
         
            +
            - `include_tokens_per_second`: False
         
     | 
| 317 | 
         
            +
            - `include_num_input_tokens_seen`: False
         
     | 
| 318 | 
         
            +
            - `neftune_noise_alpha`: None
         
     | 
| 319 | 
         
            +
            - `optim_target_modules`: None
         
     | 
| 320 | 
         
            +
            - `batch_eval_metrics`: False
         
     | 
| 321 | 
         
            +
            - `eval_on_start`: False
         
     | 
| 322 | 
         
            +
            - `use_liger_kernel`: False
         
     | 
| 323 | 
         
            +
            - `eval_use_gather_object`: False
         
     | 
| 324 | 
         
            +
            - `batch_sampler`: batch_sampler
         
     | 
| 325 | 
         
            +
            - `multi_dataset_batch_sampler`: proportional
         
     | 
| 326 | 
         
            +
             
     | 
| 327 | 
         
            +
            </details>
         
     | 
| 328 | 
         
            +
             
     | 
| 329 | 
         
            +
            ### Training Logs
         
     | 
| 330 | 
         
            +
            <details><summary>Click to expand</summary>
         
     | 
| 331 | 
         
            +
             
     | 
| 332 | 
         
            +
            | Epoch  | Step  | Training Loss |
         
     | 
| 333 | 
         
            +
            |:------:|:-----:|:-------------:|
         
     | 
| 334 | 
         
            +
            | 0.0002 | 100   | 10.8792       |
         
     | 
| 335 | 
         
            +
            | 0.0003 | 200   | 10.9284       |
         
     | 
| 336 | 
         
            +
            | 0.0005 | 300   | 10.6466       |
         
     | 
| 337 | 
         
            +
            | 0.0006 | 400   | 10.841        |
         
     | 
| 338 | 
         
            +
            | 0.0008 | 500   | 10.8094       |
         
     | 
| 339 | 
         
            +
            | 0.0010 | 600   | 10.4323       |
         
     | 
| 340 | 
         
            +
            | 0.0011 | 700   | 10.3032       |
         
     | 
| 341 | 
         
            +
            | 0.0013 | 800   | 10.4006       |
         
     | 
| 342 | 
         
            +
            | 0.0014 | 900   | 10.4743       |
         
     | 
| 343 | 
         
            +
            | 0.0016 | 1000  | 10.2334       |
         
     | 
| 344 | 
         
            +
            | 0.0018 | 1100  | 10.0135       |
         
     | 
| 345 | 
         
            +
            | 0.0019 | 1200  | 9.7874        |
         
     | 
| 346 | 
         
            +
            | 0.0021 | 1300  | 9.7419        |
         
     | 
| 347 | 
         
            +
            | 0.0022 | 1400  | 9.7412        |
         
     | 
| 348 | 
         
            +
            | 0.0024 | 1500  | 9.4585        |
         
     | 
| 349 | 
         
            +
            | 0.0026 | 1600  | 9.5339        |
         
     | 
| 350 | 
         
            +
            | 0.0027 | 1700  | 9.4345        |
         
     | 
| 351 | 
         
            +
            | 0.0029 | 1800  | 9.1733        |
         
     | 
| 352 | 
         
            +
            | 0.0030 | 1900  | 8.9952        |
         
     | 
| 353 | 
         
            +
            | 0.0032 | 2000  | 8.9669        |
         
     | 
| 354 | 
         
            +
            | 0.0034 | 2100  | 8.8152        |
         
     | 
| 355 | 
         
            +
            | 0.0035 | 2200  | 8.7936        |
         
     | 
| 356 | 
         
            +
            | 0.0037 | 2300  | 8.6771        |
         
     | 
| 357 | 
         
            +
            | 0.0038 | 2400  | 8.4648        |
         
     | 
| 358 | 
         
            +
            | 0.0040 | 2500  | 8.5764        |
         
     | 
| 359 | 
         
            +
            | 0.0042 | 2600  | 8.4587        |
         
     | 
| 360 | 
         
            +
            | 0.0043 | 2700  | 8.2966        |
         
     | 
| 361 | 
         
            +
            | 0.0045 | 2800  | 8.2329        |
         
     | 
| 362 | 
         
            +
            | 0.0046 | 2900  | 8.1415        |
         
     | 
| 363 | 
         
            +
            | 0.0048 | 3000  | 8.0404        |
         
     | 
| 364 | 
         
            +
            | 0.0050 | 3100  | 7.9698        |
         
     | 
| 365 | 
         
            +
            | 0.0051 | 3200  | 7.9205        |
         
     | 
| 366 | 
         
            +
            | 0.0053 | 3300  | 7.8314        |
         
     | 
| 367 | 
         
            +
            | 0.0054 | 3400  | 7.8369        |
         
     | 
| 368 | 
         
            +
            | 0.0056 | 3500  | 7.6403        |
         
     | 
| 369 | 
         
            +
            | 0.0058 | 3600  | 7.5842        |
         
     | 
| 370 | 
         
            +
            | 0.0059 | 3700  | 7.5812        |
         
     | 
| 371 | 
         
            +
            | 0.0061 | 3800  | 7.4335        |
         
     | 
| 372 | 
         
            +
            | 0.0062 | 3900  | 7.4917        |
         
     | 
| 373 | 
         
            +
            | 0.0064 | 4000  | 7.3204        |
         
     | 
| 374 | 
         
            +
            | 0.0066 | 4100  | 7.2971        |
         
     | 
| 375 | 
         
            +
            | 0.0067 | 4200  | 7.2233        |
         
     | 
| 376 | 
         
            +
            | 0.0069 | 4300  | 7.2081        |
         
     | 
| 377 | 
         
            +
            | 0.0070 | 4400  | 7.1364        |
         
     | 
| 378 | 
         
            +
            | 0.0072 | 4500  | 7.0663        |
         
     | 
| 379 | 
         
            +
            | 0.0074 | 4600  | 6.9601        |
         
     | 
| 380 | 
         
            +
            | 0.0075 | 4700  | 6.9546        |
         
     | 
| 381 | 
         
            +
            | 0.0077 | 4800  | 6.9019        |
         
     | 
| 382 | 
         
            +
            | 0.0078 | 4900  | 6.8801        |
         
     | 
| 383 | 
         
            +
            | 0.0080 | 5000  | 6.7734        |
         
     | 
| 384 | 
         
            +
            | 0.0082 | 5100  | 6.7648        |
         
     | 
| 385 | 
         
            +
            | 0.0083 | 5200  | 6.7498        |
         
     | 
| 386 | 
         
            +
            | 0.0085 | 5300  | 6.6872        |
         
     | 
| 387 | 
         
            +
            | 0.0086 | 5400  | 6.6264        |
         
     | 
| 388 | 
         
            +
            | 0.0088 | 5500  | 6.579         |
         
     | 
| 389 | 
         
            +
            | 0.0090 | 5600  | 6.6001        |
         
     | 
| 390 | 
         
            +
            | 0.0091 | 5700  | 6.5971        |
         
     | 
| 391 | 
         
            +
            | 0.0093 | 5800  | 6.4694        |
         
     | 
| 392 | 
         
            +
            | 0.0094 | 5900  | 6.3983        |
         
     | 
| 393 | 
         
            +
            | 0.0096 | 6000  | 6.4477        |
         
     | 
| 394 | 
         
            +
            | 0.0098 | 6100  | 6.4308        |
         
     | 
| 395 | 
         
            +
            | 0.0099 | 6200  | 6.4248        |
         
     | 
| 396 | 
         
            +
            | 0.0101 | 6300  | 6.2642        |
         
     | 
| 397 | 
         
            +
            | 0.0102 | 6400  | 6.2763        |
         
     | 
| 398 | 
         
            +
            | 0.0104 | 6500  | 6.3878        |
         
     | 
| 399 | 
         
            +
            | 0.0106 | 6600  | 6.2601        |
         
     | 
| 400 | 
         
            +
            | 0.0107 | 6700  | 6.1789        |
         
     | 
| 401 | 
         
            +
            | 0.0109 | 6800  | 6.1773        |
         
     | 
| 402 | 
         
            +
            | 0.0110 | 6900  | 6.1439        |
         
     | 
| 403 | 
         
            +
            | 0.0112 | 7000  | 6.1863        |
         
     | 
| 404 | 
         
            +
            | 0.0114 | 7100  | 6.0513        |
         
     | 
| 405 | 
         
            +
            | 0.0115 | 7200  | 6.0671        |
         
     | 
| 406 | 
         
            +
            | 0.0117 | 7300  | 6.0212        |
         
     | 
| 407 | 
         
            +
            | 0.0118 | 7400  | 6.0043        |
         
     | 
| 408 | 
         
            +
            | 0.0120 | 7500  | 6.0166        |
         
     | 
| 409 | 
         
            +
            | 0.0122 | 7600  | 5.9754        |
         
     | 
| 410 | 
         
            +
            | 0.0123 | 7700  | 5.9211        |
         
     | 
| 411 | 
         
            +
            | 0.0125 | 7800  | 5.7867        |
         
     | 
| 412 | 
         
            +
            | 0.0126 | 7900  | 5.8534        |
         
     | 
| 413 | 
         
            +
            | 0.0128 | 8000  | 5.7708        |
         
     | 
| 414 | 
         
            +
            | 0.0130 | 8100  | 5.8328        |
         
     | 
| 415 | 
         
            +
            | 0.0131 | 8200  | 5.7417        |
         
     | 
| 416 | 
         
            +
            | 0.0133 | 8300  | 5.8097        |
         
     | 
| 417 | 
         
            +
            | 0.0134 | 8400  | 5.7578        |
         
     | 
| 418 | 
         
            +
            | 0.0136 | 8500  | 5.643         |
         
     | 
| 419 | 
         
            +
            | 0.0138 | 8600  | 5.6401        |
         
     | 
| 420 | 
         
            +
            | 0.0139 | 8700  | 5.6627        |
         
     | 
| 421 | 
         
            +
            | 0.0141 | 8800  | 5.6167        |
         
     | 
| 422 | 
         
            +
            | 0.0142 | 8900  | 5.6539        |
         
     | 
| 423 | 
         
            +
            | 0.0144 | 9000  | 5.4513        |
         
     | 
| 424 | 
         
            +
            | 0.0146 | 9100  | 5.4132        |
         
     | 
| 425 | 
         
            +
            | 0.0147 | 9200  | 5.4714        |
         
     | 
| 426 | 
         
            +
            | 0.0149 | 9300  | 5.4786        |
         
     | 
| 427 | 
         
            +
            | 0.0150 | 9400  | 5.3928        |
         
     | 
| 428 | 
         
            +
            | 0.0152 | 9500  | 5.4774        |
         
     | 
| 429 | 
         
            +
            | 0.0154 | 9600  | 5.2881        |
         
     | 
| 430 | 
         
            +
            | 0.0155 | 9700  | 5.3699        |
         
     | 
| 431 | 
         
            +
            | 0.0157 | 9800  | 5.1483        |
         
     | 
| 432 | 
         
            +
            | 0.0158 | 9900  | 5.3051        |
         
     | 
| 433 | 
         
            +
            | 0.0160 | 10000 | 5.2546        |
         
     | 
| 434 | 
         
            +
            | 0.0162 | 10100 | 5.2314        |
         
     | 
| 435 | 
         
            +
            | 0.0163 | 10200 | 5.1783        |
         
     | 
| 436 | 
         
            +
            | 0.0165 | 10300 | 5.2074        |
         
     | 
| 437 | 
         
            +
            | 0.0166 | 10400 | 5.2825        |
         
     | 
| 438 | 
         
            +
            | 0.0168 | 10500 | 5.1715        |
         
     | 
| 439 | 
         
            +
            | 0.0170 | 10600 | 5.087         |
         
     | 
| 440 | 
         
            +
            | 0.0171 | 10700 | 5.082         |
         
     | 
| 441 | 
         
            +
            | 0.0173 | 10800 | 4.9111        |
         
     | 
| 442 | 
         
            +
            | 0.0174 | 10900 | 5.0213        |
         
     | 
| 443 | 
         
            +
            | 0.0176 | 11000 | 4.9898        |
         
     | 
| 444 | 
         
            +
            | 0.0178 | 11100 | 4.7734        |
         
     | 
| 445 | 
         
            +
            | 0.0179 | 11200 | 4.9511        |
         
     | 
| 446 | 
         
            +
            | 0.0181 | 11300 | 5.0481        |
         
     | 
| 447 | 
         
            +
            | 0.0182 | 11400 | 4.8441        |
         
     | 
| 448 | 
         
            +
            | 0.0184 | 11500 | 4.873         |
         
     | 
| 449 | 
         
            +
            | 0.0186 | 11600 | 4.9988        |
         
     | 
| 450 | 
         
            +
            | 0.0187 | 11700 | 4.7653        |
         
     | 
| 451 | 
         
            +
            | 0.0189 | 11800 | 4.804         |
         
     | 
| 452 | 
         
            +
            | 0.0190 | 11900 | 4.8288        |
         
     | 
| 453 | 
         
            +
            | 0.0192 | 12000 | 4.7053        |
         
     | 
| 454 | 
         
            +
            | 0.0194 | 12100 | 4.6887        |
         
     | 
| 455 | 
         
            +
            | 0.0195 | 12200 | 4.7832        |
         
     | 
| 456 | 
         
            +
            | 0.0197 | 12300 | 4.6817        |
         
     | 
| 457 | 
         
            +
            | 0.0198 | 12400 | 4.6252        |
         
     | 
| 458 | 
         
            +
            | 0.0200 | 12500 | 4.5936        |
         
     | 
| 459 | 
         
            +
            | 0.0202 | 12600 | 4.7452        |
         
     | 
| 460 | 
         
            +
            | 0.0203 | 12700 | 4.5321        |
         
     | 
| 461 | 
         
            +
            | 0.0205 | 12800 | 4.4964        |
         
     | 
| 462 | 
         
            +
            | 0.0206 | 12900 | 4.4421        |
         
     | 
| 463 | 
         
            +
            | 0.0208 | 13000 | 4.3782        |
         
     | 
| 464 | 
         
            +
            | 0.0210 | 13100 | 4.5169        |
         
     | 
| 465 | 
         
            +
            | 0.0211 | 13200 | 4.533         |
         
     | 
| 466 | 
         
            +
            | 0.0213 | 13300 | 4.3725        |
         
     | 
| 467 | 
         
            +
            | 0.0214 | 13400 | 4.2911        |
         
     | 
| 468 | 
         
            +
            | 0.0216 | 13500 | 4.2261        |
         
     | 
| 469 | 
         
            +
            | 0.0218 | 13600 | 4.2467        |
         
     | 
| 470 | 
         
            +
            | 0.0219 | 13700 | 4.1558        |
         
     | 
| 471 | 
         
            +
            | 0.0221 | 13800 | 4.2794        |
         
     | 
| 472 | 
         
            +
            | 0.0222 | 13900 | 4.2383        |
         
     | 
| 473 | 
         
            +
            | 0.0224 | 14000 | 4.1654        |
         
     | 
| 474 | 
         
            +
            | 0.0226 | 14100 | 4.158         |
         
     | 
| 475 | 
         
            +
            | 0.0227 | 14200 | 4.1299        |
         
     | 
| 476 | 
         
            +
            | 0.0229 | 14300 | 4.1902        |
         
     | 
| 477 | 
         
            +
            | 0.0230 | 14400 | 3.7853        |
         
     | 
| 478 | 
         
            +
            | 0.0232 | 14500 | 4.0514        |
         
     | 
| 479 | 
         
            +
            | 0.0234 | 14600 | 4.1655        |
         
     | 
| 480 | 
         
            +
            | 0.0235 | 14700 | 4.051         |
         
     | 
| 481 | 
         
            +
            | 0.0237 | 14800 | 4.078         |
         
     | 
| 482 | 
         
            +
            | 0.0238 | 14900 | 4.1193        |
         
     | 
| 483 | 
         
            +
            | 0.0240 | 15000 | 4.1536        |
         
     | 
| 484 | 
         
            +
            | 0.0242 | 15100 | 3.935         |
         
     | 
| 485 | 
         
            +
            | 0.0243 | 15200 | 3.9535        |
         
     | 
| 486 | 
         
            +
            | 0.0245 | 15300 | 3.7051        |
         
     | 
| 487 | 
         
            +
            | 0.0246 | 15400 | 3.8329        |
         
     | 
| 488 | 
         
            +
            | 0.0248 | 15500 | 3.9412        |
         
     | 
| 489 | 
         
            +
            | 0.0250 | 15600 | 3.6668        |
         
     | 
| 490 | 
         
            +
            | 0.0251 | 15700 | 3.7758        |
         
     | 
| 491 | 
         
            +
            | 0.0253 | 15800 | 3.8805        |
         
     | 
| 492 | 
         
            +
            | 0.0254 | 15900 | 3.8848        |
         
     | 
| 493 | 
         
            +
            | 0.0256 | 16000 | 3.75          |
         
     | 
| 494 | 
         
            +
            | 0.0258 | 16100 | 3.5685        |
         
     | 
| 495 | 
         
            +
            | 0.0259 | 16200 | 3.7016        |
         
     | 
| 496 | 
         
            +
            | 0.0261 | 16300 | 4.0955        |
         
     | 
| 497 | 
         
            +
            | 0.0262 | 16400 | 3.7577        |
         
     | 
| 498 | 
         
            +
            | 0.0264 | 16500 | 3.7485        |
         
     | 
| 499 | 
         
            +
            | 0.0266 | 16600 | 3.8263        |
         
     | 
| 500 | 
         
            +
            | 0.0267 | 16700 | 3.6922        |
         
     | 
| 501 | 
         
            +
            | 0.0269 | 16800 | 3.6568        |
         
     | 
| 502 | 
         
            +
            | 0.0270 | 16900 | 3.7317        |
         
     | 
| 503 | 
         
            +
            | 0.0272 | 17000 | 3.5089        |
         
     | 
| 504 | 
         
            +
            | 0.0274 | 17100 | 3.7377        |
         
     | 
| 505 | 
         
            +
            | 0.0275 | 17200 | 3.6206        |
         
     | 
| 506 | 
         
            +
            | 0.0277 | 17300 | 3.3702        |
         
     | 
| 507 | 
         
            +
            | 0.0278 | 17400 | 3.5126        |
         
     | 
| 508 | 
         
            +
            | 0.0280 | 17500 | 3.4841        |
         
     | 
| 509 | 
         
            +
            | 0.0282 | 17600 | 3.1464        |
         
     | 
| 510 | 
         
            +
            | 0.0283 | 17700 | 3.7012        |
         
     | 
| 511 | 
         
            +
            | 0.0285 | 17800 | 3.5802        |
         
     | 
| 512 | 
         
            +
            | 0.0286 | 17900 | 3.4952        |
         
     | 
| 513 | 
         
            +
            | 0.0288 | 18000 | 3.1174        |
         
     | 
| 514 | 
         
            +
            | 0.0290 | 18100 | 3.3134        |
         
     | 
| 515 | 
         
            +
            | 0.0291 | 18200 | 3.3578        |
         
     | 
| 516 | 
         
            +
            | 0.0293 | 18300 | 3.0209        |
         
     | 
| 517 | 
         
            +
            | 0.0294 | 18400 | 3.3796        |
         
     | 
| 518 | 
         
            +
            | 0.0296 | 18500 | 3.2287        |
         
     | 
| 519 | 
         
            +
            | 0.0298 | 18600 | 3.1537        |
         
     | 
| 520 | 
         
            +
            | 0.0299 | 18700 | 2.9073        |
         
     | 
| 521 | 
         
            +
            | 0.0301 | 18800 | 3.3444        |
         
     | 
| 522 | 
         
            +
            | 0.0302 | 18900 | 3.1341        |
         
     | 
| 523 | 
         
            +
            | 0.0304 | 19000 | 2.8862        |
         
     | 
| 524 | 
         
            +
            | 0.0306 | 19100 | 3.2033        |
         
     | 
| 525 | 
         
            +
            | 0.0307 | 19200 | 3.2764        |
         
     | 
| 526 | 
         
            +
            | 0.0309 | 19300 | 3.0725        |
         
     | 
| 527 | 
         
            +
            | 0.0310 | 19400 | 3.0436        |
         
     | 
| 528 | 
         
            +
            | 0.0312 | 19500 | 3.3493        |
         
     | 
| 529 | 
         
            +
            | 0.0314 | 19600 | 3.0141        |
         
     | 
| 530 | 
         
            +
            | 0.0315 | 19700 | 2.779         |
         
     | 
| 531 | 
         
            +
            | 0.0317 | 19800 | 3.3543        |
         
     | 
| 532 | 
         
            +
            | 0.0318 | 19900 | 3.1526        |
         
     | 
| 533 | 
         
            +
            | 0.0320 | 20000 | 2.7896        |
         
     | 
| 534 | 
         
            +
            | 0.0322 | 20100 | 2.9398        |
         
     | 
| 535 | 
         
            +
            | 0.0323 | 20200 | 3.1254        |
         
     | 
| 536 | 
         
            +
            | 0.0325 | 20300 | 2.8832        |
         
     | 
| 537 | 
         
            +
            | 0.0326 | 20400 | 3.0542        |
         
     | 
| 538 | 
         
            +
            | 0.0328 | 20500 | 2.9722        |
         
     | 
| 539 | 
         
            +
            | 0.0330 | 20600 | 2.9321        |
         
     | 
| 540 | 
         
            +
            | 0.0331 | 20700 | 2.6448        |
         
     | 
| 541 | 
         
            +
            | 0.0333 | 20800 | 3.4006        |
         
     | 
| 542 | 
         
            +
            | 0.0334 | 20900 | 3.0022        |
         
     | 
| 543 | 
         
            +
            | 0.0336 | 21000 | 2.6366        |
         
     | 
| 544 | 
         
            +
            | 0.0338 | 21100 | 3.0112        |
         
     | 
| 545 | 
         
            +
            | 0.0339 | 21200 | 2.7856        |
         
     | 
| 546 | 
         
            +
            | 0.0341 | 21300 | 3.0967        |
         
     | 
| 547 | 
         
            +
            | 0.0342 | 21400 | 2.8754        |
         
     | 
| 548 | 
         
            +
            | 0.0344 | 21500 | 3.1269        |
         
     | 
| 549 | 
         
            +
             
     | 
| 550 | 
         
            +
            </details>
         
     | 
| 551 | 
         
            +
             
     | 
| 552 | 
         
            +
            ### Framework Versions
         
     | 
| 553 | 
         
            +
            - Python: 3.8.10
         
     | 
| 554 | 
         
            +
            - Sentence Transformers: 3.1.1
         
     | 
| 555 | 
         
            +
            - Transformers: 4.45.2
         
     | 
| 556 | 
         
            +
            - PyTorch: 2.4.1+cu118
         
     | 
| 557 | 
         
            +
            - Accelerate: 1.0.1
         
     | 
| 558 | 
         
            +
            - Datasets: 3.0.1
         
     | 
| 559 | 
         
            +
            - Tokenizers: 0.20.3
         
     | 
| 560 | 
         
            +
             
     | 
| 561 | 
         
            +
            ## Citation
         
     | 
| 562 | 
         
            +
             
     | 
| 563 | 
         
            +
            ### BibTeX
         
     | 
| 564 | 
         
            +
             
     | 
| 565 | 
         
            +
            #### Sentence Transformers
         
     | 
| 566 | 
         
            +
            ```bibtex
         
     | 
| 567 | 
         
            +
            @inproceedings{reimers-2019-sentence-bert,
         
     | 
| 568 | 
         
            +
                title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
         
     | 
| 569 | 
         
            +
                author = "Reimers, Nils and Gurevych, Iryna",
         
     | 
| 570 | 
         
            +
                booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
         
     | 
| 571 | 
         
            +
                month = "11",
         
     | 
| 572 | 
         
            +
                year = "2019",
         
     | 
| 573 | 
         
            +
                publisher = "Association for Computational Linguistics",
         
     | 
| 574 | 
         
            +
                url = "https://arxiv.org/abs/1908.10084",
         
     | 
| 575 | 
         
            +
            }
         
     | 
| 576 | 
         
            +
            ```
         
     | 
| 577 | 
         
            +
             
     | 
| 578 | 
         
            +
            #### CoSENTLoss
         
     | 
| 579 | 
         
            +
            ```bibtex
         
     | 
| 580 | 
         
            +
            @online{kexuefm-8847,
         
     | 
| 581 | 
         
            +
                title={CoSENT: A more efficient sentence vector scheme than Sentence-BERT},
         
     | 
| 582 | 
         
            +
                author={Su Jianlin},
         
     | 
| 583 | 
         
            +
                year={2022},
         
     | 
| 584 | 
         
            +
                month={Jan},
         
     | 
| 585 | 
         
            +
                url={https://kexue.fm/archives/8847},
         
     | 
| 586 | 
         
            +
            }
         
     | 
| 587 | 
         
            +
            ```
         
     | 
| 588 | 
         
            +
             
     | 
| 589 | 
         
            +
            <!--
         
     | 
| 590 | 
         
            +
            ## Glossary
         
     | 
| 591 | 
         
            +
             
     | 
| 592 | 
         
            +
            *Clearly define terms in order to be accessible across audiences.*
         
     | 
| 593 | 
         
            +
            -->
         
     | 
| 594 | 
         
            +
             
     | 
| 595 | 
         
            +
            <!--
         
     | 
| 596 | 
         
            +
            ## Model Card Authors
         
     | 
| 597 | 
         
            +
             
     | 
| 598 | 
         
            +
            *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
         
     | 
| 599 | 
         
            +
            -->
         
     | 
| 600 | 
         
            +
             
     | 
| 601 | 
         
            +
            <!--
         
     | 
| 602 | 
         
            +
            ## Model Card Contact
         
     | 
| 603 | 
         
            +
             
     | 
| 604 | 
         
            +
            *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
         
     | 
| 605 | 
         
            +
            -->
         
     | 
    	
        checkpoint-21500/config.json
    ADDED
    
    | 
         @@ -0,0 +1,26 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
              "_name_or_path": "sentence-transformers/all-MiniLM-L6-v2",
         
     | 
| 3 | 
         
            +
              "architectures": [
         
     | 
| 4 | 
         
            +
                "BertModel"
         
     | 
| 5 | 
         
            +
              ],
         
     | 
| 6 | 
         
            +
              "attention_probs_dropout_prob": 0.1,
         
     | 
| 7 | 
         
            +
              "classifier_dropout": null,
         
     | 
| 8 | 
         
            +
              "gradient_checkpointing": false,
         
     | 
| 9 | 
         
            +
              "hidden_act": "gelu",
         
     | 
| 10 | 
         
            +
              "hidden_dropout_prob": 0.1,
         
     | 
| 11 | 
         
            +
              "hidden_size": 384,
         
     | 
| 12 | 
         
            +
              "initializer_range": 0.02,
         
     | 
| 13 | 
         
            +
              "intermediate_size": 1536,
         
     | 
| 14 | 
         
            +
              "layer_norm_eps": 1e-12,
         
     | 
| 15 | 
         
            +
              "max_position_embeddings": 512,
         
     | 
| 16 | 
         
            +
              "model_type": "bert",
         
     | 
| 17 | 
         
            +
              "num_attention_heads": 12,
         
     | 
| 18 | 
         
            +
              "num_hidden_layers": 6,
         
     | 
| 19 | 
         
            +
              "pad_token_id": 0,
         
     | 
| 20 | 
         
            +
              "position_embedding_type": "absolute",
         
     | 
| 21 | 
         
            +
              "torch_dtype": "float32",
         
     | 
| 22 | 
         
            +
              "transformers_version": "4.45.2",
         
     | 
| 23 | 
         
            +
              "type_vocab_size": 2,
         
     | 
| 24 | 
         
            +
              "use_cache": true,
         
     | 
| 25 | 
         
            +
              "vocab_size": 30522
         
     | 
| 26 | 
         
            +
            }
         
     | 
    	
        checkpoint-21500/config_sentence_transformers.json
    ADDED
    
    | 
         @@ -0,0 +1,10 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
              "__version__": {
         
     | 
| 3 | 
         
            +
                "sentence_transformers": "3.1.1",
         
     | 
| 4 | 
         
            +
                "transformers": "4.45.2",
         
     | 
| 5 | 
         
            +
                "pytorch": "2.4.1+cu118"
         
     | 
| 6 | 
         
            +
              },
         
     | 
| 7 | 
         
            +
              "prompts": {},
         
     | 
| 8 | 
         
            +
              "default_prompt_name": null,
         
     | 
| 9 | 
         
            +
              "similarity_fn_name": null
         
     | 
| 10 | 
         
            +
            }
         
     | 
    	
        checkpoint-21500/model.safetensors
    ADDED
    
    | 
         @@ -0,0 +1,3 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:99c88814c068c5e8eefba4a3e02ea5461df3a259fa05e31cf17f7ea244b7b60f
         
     | 
| 3 | 
         
            +
            size 90864192
         
     | 
    	
        checkpoint-21500/modules.json
    ADDED
    
    | 
         @@ -0,0 +1,20 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            [
         
     | 
| 2 | 
         
            +
              {
         
     | 
| 3 | 
         
            +
                "idx": 0,
         
     | 
| 4 | 
         
            +
                "name": "0",
         
     | 
| 5 | 
         
            +
                "path": "",
         
     | 
| 6 | 
         
            +
                "type": "sentence_transformers.models.Transformer"
         
     | 
| 7 | 
         
            +
              },
         
     | 
| 8 | 
         
            +
              {
         
     | 
| 9 | 
         
            +
                "idx": 1,
         
     | 
| 10 | 
         
            +
                "name": "1",
         
     | 
| 11 | 
         
            +
                "path": "1_Pooling",
         
     | 
| 12 | 
         
            +
                "type": "sentence_transformers.models.Pooling"
         
     | 
| 13 | 
         
            +
              },
         
     | 
| 14 | 
         
            +
              {
         
     | 
| 15 | 
         
            +
                "idx": 2,
         
     | 
| 16 | 
         
            +
                "name": "2",
         
     | 
| 17 | 
         
            +
                "path": "2_Normalize",
         
     | 
| 18 | 
         
            +
                "type": "sentence_transformers.models.Normalize"
         
     | 
| 19 | 
         
            +
              }
         
     | 
| 20 | 
         
            +
            ]
         
     | 
    	
        checkpoint-21500/optimizer.pt
    ADDED
    
    | 
         @@ -0,0 +1,3 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:a77ecdc4f5a3da5d70b62074ff6ffb1eeed1cca41780e4009fec3c138dea8778
         
     | 
| 3 | 
         
            +
            size 180607738
         
     | 
    	
        checkpoint-21500/rng_state.pth
    ADDED
    
    | 
         @@ -0,0 +1,3 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:d36b1876a7db1c92218c3f439cb495bb0c126cbb258f747d067a650b122d5628
         
     | 
| 3 | 
         
            +
            size 14244
         
     | 
    	
        checkpoint-21500/scheduler.pt
    ADDED
    
    | 
         @@ -0,0 +1,3 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:375cf91aac6f147d861e47fcb84cb786b3f26891d8432ca3403ccd4b62e745a6
         
     | 
| 3 | 
         
            +
            size 1064
         
     | 
    	
        checkpoint-21500/sentence_bert_config.json
    ADDED
    
    | 
         @@ -0,0 +1,4 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
              "max_seq_length": 256,
         
     | 
| 3 | 
         
            +
              "do_lower_case": false
         
     | 
| 4 | 
         
            +
            }
         
     | 
    	
        checkpoint-21500/special_tokens_map.json
    ADDED
    
    | 
         @@ -0,0 +1,37 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
              "cls_token": {
         
     | 
| 3 | 
         
            +
                "content": "[CLS]",
         
     | 
| 4 | 
         
            +
                "lstrip": false,
         
     | 
| 5 | 
         
            +
                "normalized": false,
         
     | 
| 6 | 
         
            +
                "rstrip": false,
         
     | 
| 7 | 
         
            +
                "single_word": false
         
     | 
| 8 | 
         
            +
              },
         
     | 
| 9 | 
         
            +
              "mask_token": {
         
     | 
| 10 | 
         
            +
                "content": "[MASK]",
         
     | 
| 11 | 
         
            +
                "lstrip": false,
         
     | 
| 12 | 
         
            +
                "normalized": false,
         
     | 
| 13 | 
         
            +
                "rstrip": false,
         
     | 
| 14 | 
         
            +
                "single_word": false
         
     | 
| 15 | 
         
            +
              },
         
     | 
| 16 | 
         
            +
              "pad_token": {
         
     | 
| 17 | 
         
            +
                "content": "[PAD]",
         
     | 
| 18 | 
         
            +
                "lstrip": false,
         
     | 
| 19 | 
         
            +
                "normalized": false,
         
     | 
| 20 | 
         
            +
                "rstrip": false,
         
     | 
| 21 | 
         
            +
                "single_word": false
         
     | 
| 22 | 
         
            +
              },
         
     | 
| 23 | 
         
            +
              "sep_token": {
         
     | 
| 24 | 
         
            +
                "content": "[SEP]",
         
     | 
| 25 | 
         
            +
                "lstrip": false,
         
     | 
| 26 | 
         
            +
                "normalized": false,
         
     | 
| 27 | 
         
            +
                "rstrip": false,
         
     | 
| 28 | 
         
            +
                "single_word": false
         
     | 
| 29 | 
         
            +
              },
         
     | 
| 30 | 
         
            +
              "unk_token": {
         
     | 
| 31 | 
         
            +
                "content": "[UNK]",
         
     | 
| 32 | 
         
            +
                "lstrip": false,
         
     | 
| 33 | 
         
            +
                "normalized": false,
         
     | 
| 34 | 
         
            +
                "rstrip": false,
         
     | 
| 35 | 
         
            +
                "single_word": false
         
     | 
| 36 | 
         
            +
              }
         
     | 
| 37 | 
         
            +
            }
         
     | 
    	
        checkpoint-21500/tokenizer.json
    ADDED
    
    | 
         The diff for this file is too large to render. 
		See raw diff 
     | 
| 
         | 
    	
        checkpoint-21500/tokenizer_config.json
    ADDED
    
    | 
         @@ -0,0 +1,64 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
              "added_tokens_decoder": {
         
     | 
| 3 | 
         
            +
                "0": {
         
     | 
| 4 | 
         
            +
                  "content": "[PAD]",
         
     | 
| 5 | 
         
            +
                  "lstrip": false,
         
     | 
| 6 | 
         
            +
                  "normalized": false,
         
     | 
| 7 | 
         
            +
                  "rstrip": false,
         
     | 
| 8 | 
         
            +
                  "single_word": false,
         
     | 
| 9 | 
         
            +
                  "special": true
         
     | 
| 10 | 
         
            +
                },
         
     | 
| 11 | 
         
            +
                "100": {
         
     | 
| 12 | 
         
            +
                  "content": "[UNK]",
         
     | 
| 13 | 
         
            +
                  "lstrip": false,
         
     | 
| 14 | 
         
            +
                  "normalized": false,
         
     | 
| 15 | 
         
            +
                  "rstrip": false,
         
     | 
| 16 | 
         
            +
                  "single_word": false,
         
     | 
| 17 | 
         
            +
                  "special": true
         
     | 
| 18 | 
         
            +
                },
         
     | 
| 19 | 
         
            +
                "101": {
         
     | 
| 20 | 
         
            +
                  "content": "[CLS]",
         
     | 
| 21 | 
         
            +
                  "lstrip": false,
         
     | 
| 22 | 
         
            +
                  "normalized": false,
         
     | 
| 23 | 
         
            +
                  "rstrip": false,
         
     | 
| 24 | 
         
            +
                  "single_word": false,
         
     | 
| 25 | 
         
            +
                  "special": true
         
     | 
| 26 | 
         
            +
                },
         
     | 
| 27 | 
         
            +
                "102": {
         
     | 
| 28 | 
         
            +
                  "content": "[SEP]",
         
     | 
| 29 | 
         
            +
                  "lstrip": false,
         
     | 
| 30 | 
         
            +
                  "normalized": false,
         
     | 
| 31 | 
         
            +
                  "rstrip": false,
         
     | 
| 32 | 
         
            +
                  "single_word": false,
         
     | 
| 33 | 
         
            +
                  "special": true
         
     | 
| 34 | 
         
            +
                },
         
     | 
| 35 | 
         
            +
                "103": {
         
     | 
| 36 | 
         
            +
                  "content": "[MASK]",
         
     | 
| 37 | 
         
            +
                  "lstrip": false,
         
     | 
| 38 | 
         
            +
                  "normalized": false,
         
     | 
| 39 | 
         
            +
                  "rstrip": false,
         
     | 
| 40 | 
         
            +
                  "single_word": false,
         
     | 
| 41 | 
         
            +
                  "special": true
         
     | 
| 42 | 
         
            +
                }
         
     | 
| 43 | 
         
            +
              },
         
     | 
| 44 | 
         
            +
              "clean_up_tokenization_spaces": false,
         
     | 
| 45 | 
         
            +
              "cls_token": "[CLS]",
         
     | 
| 46 | 
         
            +
              "do_basic_tokenize": true,
         
     | 
| 47 | 
         
            +
              "do_lower_case": true,
         
     | 
| 48 | 
         
            +
              "mask_token": "[MASK]",
         
     | 
| 49 | 
         
            +
              "max_length": 128,
         
     | 
| 50 | 
         
            +
              "model_max_length": 256,
         
     | 
| 51 | 
         
            +
              "never_split": null,
         
     | 
| 52 | 
         
            +
              "pad_to_multiple_of": null,
         
     | 
| 53 | 
         
            +
              "pad_token": "[PAD]",
         
     | 
| 54 | 
         
            +
              "pad_token_type_id": 0,
         
     | 
| 55 | 
         
            +
              "padding_side": "right",
         
     | 
| 56 | 
         
            +
              "sep_token": "[SEP]",
         
     | 
| 57 | 
         
            +
              "stride": 0,
         
     | 
| 58 | 
         
            +
              "strip_accents": null,
         
     | 
| 59 | 
         
            +
              "tokenize_chinese_chars": true,
         
     | 
| 60 | 
         
            +
              "tokenizer_class": "BertTokenizer",
         
     | 
| 61 | 
         
            +
              "truncation_side": "right",
         
     | 
| 62 | 
         
            +
              "truncation_strategy": "longest_first",
         
     | 
| 63 | 
         
            +
              "unk_token": "[UNK]"
         
     | 
| 64 | 
         
            +
            }
         
     | 
    	
        checkpoint-21500/trainer_state.json
    ADDED
    
    | 
         @@ -0,0 +1,1538 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
              "best_metric": null,
         
     | 
| 3 | 
         
            +
              "best_model_checkpoint": null,
         
     | 
| 4 | 
         
            +
              "epoch": 0.034399944960088066,
         
     | 
| 5 | 
         
            +
              "eval_steps": 200000,
         
     | 
| 6 | 
         
            +
              "global_step": 21500,
         
     | 
| 7 | 
         
            +
              "is_hyper_param_search": false,
         
     | 
| 8 | 
         
            +
              "is_local_process_zero": true,
         
     | 
| 9 | 
         
            +
              "is_world_process_zero": true,
         
     | 
| 10 | 
         
            +
              "log_history": [
         
     | 
| 11 | 
         
            +
                {
         
     | 
| 12 | 
         
            +
                  "epoch": 0.0001599997440004096,
         
     | 
| 13 | 
         
            +
                  "grad_norm": 84.32501983642578,
         
     | 
| 14 | 
         
            +
                  "learning_rate": 3.103950336794611e-08,
         
     | 
| 15 | 
         
            +
                  "loss": 10.8792,
         
     | 
| 16 | 
         
            +
                  "step": 100
         
     | 
| 17 | 
         
            +
                },
         
     | 
| 18 | 
         
            +
                {
         
     | 
| 19 | 
         
            +
                  "epoch": 0.0003199994880008192,
         
     | 
| 20 | 
         
            +
                  "grad_norm": 60.63747024536133,
         
     | 
| 21 | 
         
            +
                  "learning_rate": 6.303899137613798e-08,
         
     | 
| 22 | 
         
            +
                  "loss": 10.9284,
         
     | 
| 23 | 
         
            +
                  "step": 200
         
     | 
| 24 | 
         
            +
                },
         
     | 
| 25 | 
         
            +
                {
         
     | 
| 26 | 
         
            +
                  "epoch": 0.00047999923200122877,
         
     | 
| 27 | 
         
            +
                  "grad_norm": 55.71075439453125,
         
     | 
| 28 | 
         
            +
                  "learning_rate": 9.503847938432986e-08,
         
     | 
| 29 | 
         
            +
                  "loss": 10.6466,
         
     | 
| 30 | 
         
            +
                  "step": 300
         
     | 
| 31 | 
         
            +
                },
         
     | 
| 32 | 
         
            +
                {
         
     | 
| 33 | 
         
            +
                  "epoch": 0.0006399989760016384,
         
     | 
| 34 | 
         
            +
                  "grad_norm": 57.63307189941406,
         
     | 
| 35 | 
         
            +
                  "learning_rate": 1.2703796739252173e-07,
         
     | 
| 36 | 
         
            +
                  "loss": 10.841,
         
     | 
| 37 | 
         
            +
                  "step": 400
         
     | 
| 38 | 
         
            +
                },
         
     | 
| 39 | 
         
            +
                {
         
     | 
| 40 | 
         
            +
                  "epoch": 0.000799998720002048,
         
     | 
| 41 | 
         
            +
                  "grad_norm": 89.1032485961914,
         
     | 
| 42 | 
         
            +
                  "learning_rate": 1.590374554007136e-07,
         
     | 
| 43 | 
         
            +
                  "loss": 10.8094,
         
     | 
| 44 | 
         
            +
                  "step": 500
         
     | 
| 45 | 
         
            +
                },
         
     | 
| 46 | 
         
            +
                {
         
     | 
| 47 | 
         
            +
                  "epoch": 0.0009599984640024575,
         
     | 
| 48 | 
         
            +
                  "grad_norm": 57.2479362487793,
         
     | 
| 49 | 
         
            +
                  "learning_rate": 1.9103694340890547e-07,
         
     | 
| 50 | 
         
            +
                  "loss": 10.4323,
         
     | 
| 51 | 
         
            +
                  "step": 600
         
     | 
| 52 | 
         
            +
                },
         
     | 
| 53 | 
         
            +
                {
         
     | 
| 54 | 
         
            +
                  "epoch": 0.0011199982080028672,
         
     | 
| 55 | 
         
            +
                  "grad_norm": 51.17530059814453,
         
     | 
| 56 | 
         
            +
                  "learning_rate": 2.2303643141709733e-07,
         
     | 
| 57 | 
         
            +
                  "loss": 10.3032,
         
     | 
| 58 | 
         
            +
                  "step": 700
         
     | 
| 59 | 
         
            +
                },
         
     | 
| 60 | 
         
            +
                {
         
     | 
| 61 | 
         
            +
                  "epoch": 0.0012799979520032767,
         
     | 
| 62 | 
         
            +
                  "grad_norm": 60.76409912109375,
         
     | 
| 63 | 
         
            +
                  "learning_rate": 2.550359194252892e-07,
         
     | 
| 64 | 
         
            +
                  "loss": 10.4006,
         
     | 
| 65 | 
         
            +
                  "step": 800
         
     | 
| 66 | 
         
            +
                },
         
     | 
| 67 | 
         
            +
                {
         
     | 
| 68 | 
         
            +
                  "epoch": 0.0014399976960036865,
         
     | 
| 69 | 
         
            +
                  "grad_norm": 67.00859069824219,
         
     | 
| 70 | 
         
            +
                  "learning_rate": 2.870354074334811e-07,
         
     | 
| 71 | 
         
            +
                  "loss": 10.4743,
         
     | 
| 72 | 
         
            +
                  "step": 900
         
     | 
| 73 | 
         
            +
                },
         
     | 
| 74 | 
         
            +
                {
         
     | 
| 75 | 
         
            +
                  "epoch": 0.001599997440004096,
         
     | 
| 76 | 
         
            +
                  "grad_norm": 68.4343032836914,
         
     | 
| 77 | 
         
            +
                  "learning_rate": 3.19034895441673e-07,
         
     | 
| 78 | 
         
            +
                  "loss": 10.2334,
         
     | 
| 79 | 
         
            +
                  "step": 1000
         
     | 
| 80 | 
         
            +
                },
         
     | 
| 81 | 
         
            +
                {
         
     | 
| 82 | 
         
            +
                  "epoch": 0.0017599971840045055,
         
     | 
| 83 | 
         
            +
                  "grad_norm": 48.704105377197266,
         
     | 
| 84 | 
         
            +
                  "learning_rate": 3.510343834498648e-07,
         
     | 
| 85 | 
         
            +
                  "loss": 10.0135,
         
     | 
| 86 | 
         
            +
                  "step": 1100
         
     | 
| 87 | 
         
            +
                },
         
     | 
| 88 | 
         
            +
                {
         
     | 
| 89 | 
         
            +
                  "epoch": 0.001919996928004915,
         
     | 
| 90 | 
         
            +
                  "grad_norm": 45.30134963989258,
         
     | 
| 91 | 
         
            +
                  "learning_rate": 3.830338714580567e-07,
         
     | 
| 92 | 
         
            +
                  "loss": 9.7874,
         
     | 
| 93 | 
         
            +
                  "step": 1200
         
     | 
| 94 | 
         
            +
                },
         
     | 
| 95 | 
         
            +
                {
         
     | 
| 96 | 
         
            +
                  "epoch": 0.002079996672005325,
         
     | 
| 97 | 
         
            +
                  "grad_norm": 84.56024169921875,
         
     | 
| 98 | 
         
            +
                  "learning_rate": 4.150333594662486e-07,
         
     | 
| 99 | 
         
            +
                  "loss": 9.7419,
         
     | 
| 100 | 
         
            +
                  "step": 1300
         
     | 
| 101 | 
         
            +
                },
         
     | 
| 102 | 
         
            +
                {
         
     | 
| 103 | 
         
            +
                  "epoch": 0.0022399964160057344,
         
     | 
| 104 | 
         
            +
                  "grad_norm": 45.73213195800781,
         
     | 
| 105 | 
         
            +
                  "learning_rate": 4.470328474744404e-07,
         
     | 
| 106 | 
         
            +
                  "loss": 9.7412,
         
     | 
| 107 | 
         
            +
                  "step": 1400
         
     | 
| 108 | 
         
            +
                },
         
     | 
| 109 | 
         
            +
                {
         
     | 
| 110 | 
         
            +
                  "epoch": 0.002399996160006144,
         
     | 
| 111 | 
         
            +
                  "grad_norm": 50.21996307373047,
         
     | 
| 112 | 
         
            +
                  "learning_rate": 4.790323354826324e-07,
         
     | 
| 113 | 
         
            +
                  "loss": 9.4585,
         
     | 
| 114 | 
         
            +
                  "step": 1500
         
     | 
| 115 | 
         
            +
                },
         
     | 
| 116 | 
         
            +
                {
         
     | 
| 117 | 
         
            +
                  "epoch": 0.0025599959040065534,
         
     | 
| 118 | 
         
            +
                  "grad_norm": 59.475799560546875,
         
     | 
| 119 | 
         
            +
                  "learning_rate": 5.110318234908241e-07,
         
     | 
| 120 | 
         
            +
                  "loss": 9.5339,
         
     | 
| 121 | 
         
            +
                  "step": 1600
         
     | 
| 122 | 
         
            +
                },
         
     | 
| 123 | 
         
            +
                {
         
     | 
| 124 | 
         
            +
                  "epoch": 0.002719995648006963,
         
     | 
| 125 | 
         
            +
                  "grad_norm": 82.53620910644531,
         
     | 
| 126 | 
         
            +
                  "learning_rate": 5.43031311499016e-07,
         
     | 
| 127 | 
         
            +
                  "loss": 9.4345,
         
     | 
| 128 | 
         
            +
                  "step": 1700
         
     | 
| 129 | 
         
            +
                },
         
     | 
| 130 | 
         
            +
                {
         
     | 
| 131 | 
         
            +
                  "epoch": 0.002879995392007373,
         
     | 
| 132 | 
         
            +
                  "grad_norm": 39.44235610961914,
         
     | 
| 133 | 
         
            +
                  "learning_rate": 5.750307995072079e-07,
         
     | 
| 134 | 
         
            +
                  "loss": 9.1733,
         
     | 
| 135 | 
         
            +
                  "step": 1800
         
     | 
| 136 | 
         
            +
                },
         
     | 
| 137 | 
         
            +
                {
         
     | 
| 138 | 
         
            +
                  "epoch": 0.0030399951360077825,
         
     | 
| 139 | 
         
            +
                  "grad_norm": 37.58698654174805,
         
     | 
| 140 | 
         
            +
                  "learning_rate": 6.070302875153998e-07,
         
     | 
| 141 | 
         
            +
                  "loss": 8.9952,
         
     | 
| 142 | 
         
            +
                  "step": 1900
         
     | 
| 143 | 
         
            +
                },
         
     | 
| 144 | 
         
            +
                {
         
     | 
| 145 | 
         
            +
                  "epoch": 0.003199994880008192,
         
     | 
| 146 | 
         
            +
                  "grad_norm": 40.35204315185547,
         
     | 
| 147 | 
         
            +
                  "learning_rate": 6.390297755235917e-07,
         
     | 
| 148 | 
         
            +
                  "loss": 8.9669,
         
     | 
| 149 | 
         
            +
                  "step": 2000
         
     | 
| 150 | 
         
            +
                },
         
     | 
| 151 | 
         
            +
                {
         
     | 
| 152 | 
         
            +
                  "epoch": 0.0033599946240086016,
         
     | 
| 153 | 
         
            +
                  "grad_norm": 57.84451675415039,
         
     | 
| 154 | 
         
            +
                  "learning_rate": 6.707092686517017e-07,
         
     | 
| 155 | 
         
            +
                  "loss": 8.8152,
         
     | 
| 156 | 
         
            +
                  "step": 2100
         
     | 
| 157 | 
         
            +
                },
         
     | 
| 158 | 
         
            +
                {
         
     | 
| 159 | 
         
            +
                  "epoch": 0.003519994368009011,
         
     | 
| 160 | 
         
            +
                  "grad_norm": 40.126953125,
         
     | 
| 161 | 
         
            +
                  "learning_rate": 7.027087566598935e-07,
         
     | 
| 162 | 
         
            +
                  "loss": 8.7936,
         
     | 
| 163 | 
         
            +
                  "step": 2200
         
     | 
| 164 | 
         
            +
                },
         
     | 
| 165 | 
         
            +
                {
         
     | 
| 166 | 
         
            +
                  "epoch": 0.0036799941120094206,
         
     | 
| 167 | 
         
            +
                  "grad_norm": 35.435707092285156,
         
     | 
| 168 | 
         
            +
                  "learning_rate": 7.347082446680854e-07,
         
     | 
| 169 | 
         
            +
                  "loss": 8.6771,
         
     | 
| 170 | 
         
            +
                  "step": 2300
         
     | 
| 171 | 
         
            +
                },
         
     | 
| 172 | 
         
            +
                {
         
     | 
| 173 | 
         
            +
                  "epoch": 0.00383999385600983,
         
     | 
| 174 | 
         
            +
                  "grad_norm": 42.3509635925293,
         
     | 
| 175 | 
         
            +
                  "learning_rate": 7.667077326762773e-07,
         
     | 
| 176 | 
         
            +
                  "loss": 8.4648,
         
     | 
| 177 | 
         
            +
                  "step": 2400
         
     | 
| 178 | 
         
            +
                },
         
     | 
| 179 | 
         
            +
                {
         
     | 
| 180 | 
         
            +
                  "epoch": 0.00399999360001024,
         
     | 
| 181 | 
         
            +
                  "grad_norm": 33.58556365966797,
         
     | 
| 182 | 
         
            +
                  "learning_rate": 7.987072206844691e-07,
         
     | 
| 183 | 
         
            +
                  "loss": 8.5764,
         
     | 
| 184 | 
         
            +
                  "step": 2500
         
     | 
| 185 | 
         
            +
                },
         
     | 
| 186 | 
         
            +
                {
         
     | 
| 187 | 
         
            +
                  "epoch": 0.00415999334401065,
         
     | 
| 188 | 
         
            +
                  "grad_norm": 34.014678955078125,
         
     | 
| 189 | 
         
            +
                  "learning_rate": 8.30706708692661e-07,
         
     | 
| 190 | 
         
            +
                  "loss": 8.4587,
         
     | 
| 191 | 
         
            +
                  "step": 2600
         
     | 
| 192 | 
         
            +
                },
         
     | 
| 193 | 
         
            +
                {
         
     | 
| 194 | 
         
            +
                  "epoch": 0.004319993088011059,
         
     | 
| 195 | 
         
            +
                  "grad_norm": 36.43831253051758,
         
     | 
| 196 | 
         
            +
                  "learning_rate": 8.627061967008528e-07,
         
     | 
| 197 | 
         
            +
                  "loss": 8.2966,
         
     | 
| 198 | 
         
            +
                  "step": 2700
         
     | 
| 199 | 
         
            +
                },
         
     | 
| 200 | 
         
            +
                {
         
     | 
| 201 | 
         
            +
                  "epoch": 0.004479992832011469,
         
     | 
| 202 | 
         
            +
                  "grad_norm": 31.411684036254883,
         
     | 
| 203 | 
         
            +
                  "learning_rate": 8.947056847090448e-07,
         
     | 
| 204 | 
         
            +
                  "loss": 8.2329,
         
     | 
| 205 | 
         
            +
                  "step": 2800
         
     | 
| 206 | 
         
            +
                },
         
     | 
| 207 | 
         
            +
                {
         
     | 
| 208 | 
         
            +
                  "epoch": 0.004639992576011879,
         
     | 
| 209 | 
         
            +
                  "grad_norm": 47.570125579833984,
         
     | 
| 210 | 
         
            +
                  "learning_rate": 9.267051727172366e-07,
         
     | 
| 211 | 
         
            +
                  "loss": 8.1415,
         
     | 
| 212 | 
         
            +
                  "step": 2900
         
     | 
| 213 | 
         
            +
                },
         
     | 
| 214 | 
         
            +
                {
         
     | 
| 215 | 
         
            +
                  "epoch": 0.004799992320012288,
         
     | 
| 216 | 
         
            +
                  "grad_norm": 30.771928787231445,
         
     | 
| 217 | 
         
            +
                  "learning_rate": 9.587046607254284e-07,
         
     | 
| 218 | 
         
            +
                  "loss": 8.0404,
         
     | 
| 219 | 
         
            +
                  "step": 3000
         
     | 
| 220 | 
         
            +
                },
         
     | 
| 221 | 
         
            +
                {
         
     | 
| 222 | 
         
            +
                  "epoch": 0.004959992064012698,
         
     | 
| 223 | 
         
            +
                  "grad_norm": 26.92803955078125,
         
     | 
| 224 | 
         
            +
                  "learning_rate": 9.907041487336204e-07,
         
     | 
| 225 | 
         
            +
                  "loss": 7.9698,
         
     | 
| 226 | 
         
            +
                  "step": 3100
         
     | 
| 227 | 
         
            +
                },
         
     | 
| 228 | 
         
            +
                {
         
     | 
| 229 | 
         
            +
                  "epoch": 0.005119991808013107,
         
     | 
| 230 | 
         
            +
                  "grad_norm": 31.121917724609375,
         
     | 
| 231 | 
         
            +
                  "learning_rate": 1.0227036367418122e-06,
         
     | 
| 232 | 
         
            +
                  "loss": 7.9205,
         
     | 
| 233 | 
         
            +
                  "step": 3200
         
     | 
| 234 | 
         
            +
                },
         
     | 
| 235 | 
         
            +
                {
         
     | 
| 236 | 
         
            +
                  "epoch": 0.005279991552013517,
         
     | 
| 237 | 
         
            +
                  "grad_norm": 33.991416931152344,
         
     | 
| 238 | 
         
            +
                  "learning_rate": 1.054703124750004e-06,
         
     | 
| 239 | 
         
            +
                  "loss": 7.8314,
         
     | 
| 240 | 
         
            +
                  "step": 3300
         
     | 
| 241 | 
         
            +
                },
         
     | 
| 242 | 
         
            +
                {
         
     | 
| 243 | 
         
            +
                  "epoch": 0.005439991296013926,
         
     | 
| 244 | 
         
            +
                  "grad_norm": 31.278030395507812,
         
     | 
| 245 | 
         
            +
                  "learning_rate": 1.086702612758196e-06,
         
     | 
| 246 | 
         
            +
                  "loss": 7.8369,
         
     | 
| 247 | 
         
            +
                  "step": 3400
         
     | 
| 248 | 
         
            +
                },
         
     | 
| 249 | 
         
            +
                {
         
     | 
| 250 | 
         
            +
                  "epoch": 0.005599991040014336,
         
     | 
| 251 | 
         
            +
                  "grad_norm": 28.116140365600586,
         
     | 
| 252 | 
         
            +
                  "learning_rate": 1.1187021007663878e-06,
         
     | 
| 253 | 
         
            +
                  "loss": 7.6403,
         
     | 
| 254 | 
         
            +
                  "step": 3500
         
     | 
| 255 | 
         
            +
                },
         
     | 
| 256 | 
         
            +
                {
         
     | 
| 257 | 
         
            +
                  "epoch": 0.005759990784014746,
         
     | 
| 258 | 
         
            +
                  "grad_norm": 30.954113006591797,
         
     | 
| 259 | 
         
            +
                  "learning_rate": 1.1507015887745798e-06,
         
     | 
| 260 | 
         
            +
                  "loss": 7.5842,
         
     | 
| 261 | 
         
            +
                  "step": 3600
         
     | 
| 262 | 
         
            +
                },
         
     | 
| 263 | 
         
            +
                {
         
     | 
| 264 | 
         
            +
                  "epoch": 0.005919990528015155,
         
     | 
| 265 | 
         
            +
                  "grad_norm": 36.53567886352539,
         
     | 
| 266 | 
         
            +
                  "learning_rate": 1.1827010767827715e-06,
         
     | 
| 267 | 
         
            +
                  "loss": 7.5812,
         
     | 
| 268 | 
         
            +
                  "step": 3700
         
     | 
| 269 | 
         
            +
                },
         
     | 
| 270 | 
         
            +
                {
         
     | 
| 271 | 
         
            +
                  "epoch": 0.006079990272015565,
         
     | 
| 272 | 
         
            +
                  "grad_norm": 36.81153106689453,
         
     | 
| 273 | 
         
            +
                  "learning_rate": 1.2147005647909635e-06,
         
     | 
| 274 | 
         
            +
                  "loss": 7.4335,
         
     | 
| 275 | 
         
            +
                  "step": 3800
         
     | 
| 276 | 
         
            +
                },
         
     | 
| 277 | 
         
            +
                {
         
     | 
| 278 | 
         
            +
                  "epoch": 0.006239990016015974,
         
     | 
| 279 | 
         
            +
                  "grad_norm": 22.556833267211914,
         
     | 
| 280 | 
         
            +
                  "learning_rate": 1.2467000527991553e-06,
         
     | 
| 281 | 
         
            +
                  "loss": 7.4917,
         
     | 
| 282 | 
         
            +
                  "step": 3900
         
     | 
| 283 | 
         
            +
                },
         
     | 
| 284 | 
         
            +
                {
         
     | 
| 285 | 
         
            +
                  "epoch": 0.006399989760016384,
         
     | 
| 286 | 
         
            +
                  "grad_norm": 40.195579528808594,
         
     | 
| 287 | 
         
            +
                  "learning_rate": 1.278699540807347e-06,
         
     | 
| 288 | 
         
            +
                  "loss": 7.3204,
         
     | 
| 289 | 
         
            +
                  "step": 4000
         
     | 
| 290 | 
         
            +
                },
         
     | 
| 291 | 
         
            +
                {
         
     | 
| 292 | 
         
            +
                  "epoch": 0.006559989504016793,
         
     | 
| 293 | 
         
            +
                  "grad_norm": 21.862642288208008,
         
     | 
| 294 | 
         
            +
                  "learning_rate": 1.310699028815539e-06,
         
     | 
| 295 | 
         
            +
                  "loss": 7.2971,
         
     | 
| 296 | 
         
            +
                  "step": 4100
         
     | 
| 297 | 
         
            +
                },
         
     | 
| 298 | 
         
            +
                {
         
     | 
| 299 | 
         
            +
                  "epoch": 0.006719989248017203,
         
     | 
| 300 | 
         
            +
                  "grad_norm": 29.61161231994629,
         
     | 
| 301 | 
         
            +
                  "learning_rate": 1.3426985168237308e-06,
         
     | 
| 302 | 
         
            +
                  "loss": 7.2233,
         
     | 
| 303 | 
         
            +
                  "step": 4200
         
     | 
| 304 | 
         
            +
                },
         
     | 
| 305 | 
         
            +
                {
         
     | 
| 306 | 
         
            +
                  "epoch": 0.006879988992017613,
         
     | 
| 307 | 
         
            +
                  "grad_norm": 22.342451095581055,
         
     | 
| 308 | 
         
            +
                  "learning_rate": 1.3746980048319228e-06,
         
     | 
| 309 | 
         
            +
                  "loss": 7.2081,
         
     | 
| 310 | 
         
            +
                  "step": 4300
         
     | 
| 311 | 
         
            +
                },
         
     | 
| 312 | 
         
            +
                {
         
     | 
| 313 | 
         
            +
                  "epoch": 0.007039988736018022,
         
     | 
| 314 | 
         
            +
                  "grad_norm": 36.36684799194336,
         
     | 
| 315 | 
         
            +
                  "learning_rate": 1.4066974928401148e-06,
         
     | 
| 316 | 
         
            +
                  "loss": 7.1364,
         
     | 
| 317 | 
         
            +
                  "step": 4400
         
     | 
| 318 | 
         
            +
                },
         
     | 
| 319 | 
         
            +
                {
         
     | 
| 320 | 
         
            +
                  "epoch": 0.007199988480018432,
         
     | 
| 321 | 
         
            +
                  "grad_norm": 25.563953399658203,
         
     | 
| 322 | 
         
            +
                  "learning_rate": 1.4386969808483064e-06,
         
     | 
| 323 | 
         
            +
                  "loss": 7.0663,
         
     | 
| 324 | 
         
            +
                  "step": 4500
         
     | 
| 325 | 
         
            +
                },
         
     | 
| 326 | 
         
            +
                {
         
     | 
| 327 | 
         
            +
                  "epoch": 0.007359988224018841,
         
     | 
| 328 | 
         
            +
                  "grad_norm": 22.50385856628418,
         
     | 
| 329 | 
         
            +
                  "learning_rate": 1.4706964688564984e-06,
         
     | 
| 330 | 
         
            +
                  "loss": 6.9601,
         
     | 
| 331 | 
         
            +
                  "step": 4600
         
     | 
| 332 | 
         
            +
                },
         
     | 
| 333 | 
         
            +
                {
         
     | 
| 334 | 
         
            +
                  "epoch": 0.007519987968019251,
         
     | 
| 335 | 
         
            +
                  "grad_norm": 31.61231231689453,
         
     | 
| 336 | 
         
            +
                  "learning_rate": 1.5026959568646904e-06,
         
     | 
| 337 | 
         
            +
                  "loss": 6.9546,
         
     | 
| 338 | 
         
            +
                  "step": 4700
         
     | 
| 339 | 
         
            +
                },
         
     | 
| 340 | 
         
            +
                {
         
     | 
| 341 | 
         
            +
                  "epoch": 0.00767998771201966,
         
     | 
| 342 | 
         
            +
                  "grad_norm": 18.862520217895508,
         
     | 
| 343 | 
         
            +
                  "learning_rate": 1.5346954448728822e-06,
         
     | 
| 344 | 
         
            +
                  "loss": 6.9019,
         
     | 
| 345 | 
         
            +
                  "step": 4800
         
     | 
| 346 | 
         
            +
                },
         
     | 
| 347 | 
         
            +
                {
         
     | 
| 348 | 
         
            +
                  "epoch": 0.00783998745602007,
         
     | 
| 349 | 
         
            +
                  "grad_norm": 32.594539642333984,
         
     | 
| 350 | 
         
            +
                  "learning_rate": 1.5666949328810741e-06,
         
     | 
| 351 | 
         
            +
                  "loss": 6.8801,
         
     | 
| 352 | 
         
            +
                  "step": 4900
         
     | 
| 353 | 
         
            +
                },
         
     | 
| 354 | 
         
            +
                {
         
     | 
| 355 | 
         
            +
                  "epoch": 0.00799998720002048,
         
     | 
| 356 | 
         
            +
                  "grad_norm": 21.06804084777832,
         
     | 
| 357 | 
         
            +
                  "learning_rate": 1.598694420889266e-06,
         
     | 
| 358 | 
         
            +
                  "loss": 6.7734,
         
     | 
| 359 | 
         
            +
                  "step": 5000
         
     | 
| 360 | 
         
            +
                },
         
     | 
| 361 | 
         
            +
                {
         
     | 
| 362 | 
         
            +
                  "epoch": 0.00815998694402089,
         
     | 
| 363 | 
         
            +
                  "grad_norm": 31.783803939819336,
         
     | 
| 364 | 
         
            +
                  "learning_rate": 1.6303739140173757e-06,
         
     | 
| 365 | 
         
            +
                  "loss": 6.7648,
         
     | 
| 366 | 
         
            +
                  "step": 5100
         
     | 
| 367 | 
         
            +
                },
         
     | 
| 368 | 
         
            +
                {
         
     | 
| 369 | 
         
            +
                  "epoch": 0.0083199866880213,
         
     | 
| 370 | 
         
            +
                  "grad_norm": 49.79084777832031,
         
     | 
| 371 | 
         
            +
                  "learning_rate": 1.6623734020255677e-06,
         
     | 
| 372 | 
         
            +
                  "loss": 6.7498,
         
     | 
| 373 | 
         
            +
                  "step": 5200
         
     | 
| 374 | 
         
            +
                },
         
     | 
| 375 | 
         
            +
                {
         
     | 
| 376 | 
         
            +
                  "epoch": 0.008479986432021708,
         
     | 
| 377 | 
         
            +
                  "grad_norm": 26.1977481842041,
         
     | 
| 378 | 
         
            +
                  "learning_rate": 1.6943728900337597e-06,
         
     | 
| 379 | 
         
            +
                  "loss": 6.6872,
         
     | 
| 380 | 
         
            +
                  "step": 5300
         
     | 
| 381 | 
         
            +
                },
         
     | 
| 382 | 
         
            +
                {
         
     | 
| 383 | 
         
            +
                  "epoch": 0.008639986176022118,
         
     | 
| 384 | 
         
            +
                  "grad_norm": 21.942001342773438,
         
     | 
| 385 | 
         
            +
                  "learning_rate": 1.7263723780419515e-06,
         
     | 
| 386 | 
         
            +
                  "loss": 6.6264,
         
     | 
| 387 | 
         
            +
                  "step": 5400
         
     | 
| 388 | 
         
            +
                },
         
     | 
| 389 | 
         
            +
                {
         
     | 
| 390 | 
         
            +
                  "epoch": 0.008799985920022528,
         
     | 
| 391 | 
         
            +
                  "grad_norm": 32.572959899902344,
         
     | 
| 392 | 
         
            +
                  "learning_rate": 1.7583718660501433e-06,
         
     | 
| 393 | 
         
            +
                  "loss": 6.579,
         
     | 
| 394 | 
         
            +
                  "step": 5500
         
     | 
| 395 | 
         
            +
                },
         
     | 
| 396 | 
         
            +
                {
         
     | 
| 397 | 
         
            +
                  "epoch": 0.008959985664022938,
         
     | 
| 398 | 
         
            +
                  "grad_norm": 20.728240966796875,
         
     | 
| 399 | 
         
            +
                  "learning_rate": 1.7903713540583353e-06,
         
     | 
| 400 | 
         
            +
                  "loss": 6.6001,
         
     | 
| 401 | 
         
            +
                  "step": 5600
         
     | 
| 402 | 
         
            +
                },
         
     | 
| 403 | 
         
            +
                {
         
     | 
| 404 | 
         
            +
                  "epoch": 0.009119985408023347,
         
     | 
| 405 | 
         
            +
                  "grad_norm": 24.334205627441406,
         
     | 
| 406 | 
         
            +
                  "learning_rate": 1.822370842066527e-06,
         
     | 
| 407 | 
         
            +
                  "loss": 6.5971,
         
     | 
| 408 | 
         
            +
                  "step": 5700
         
     | 
| 409 | 
         
            +
                },
         
     | 
| 410 | 
         
            +
                {
         
     | 
| 411 | 
         
            +
                  "epoch": 0.009279985152023757,
         
     | 
| 412 | 
         
            +
                  "grad_norm": 27.025753021240234,
         
     | 
| 413 | 
         
            +
                  "learning_rate": 1.854370330074719e-06,
         
     | 
| 414 | 
         
            +
                  "loss": 6.4694,
         
     | 
| 415 | 
         
            +
                  "step": 5800
         
     | 
| 416 | 
         
            +
                },
         
     | 
| 417 | 
         
            +
                {
         
     | 
| 418 | 
         
            +
                  "epoch": 0.009439984896024167,
         
     | 
| 419 | 
         
            +
                  "grad_norm": 23.506013870239258,
         
     | 
| 420 | 
         
            +
                  "learning_rate": 1.8863698180829106e-06,
         
     | 
| 421 | 
         
            +
                  "loss": 6.3983,
         
     | 
| 422 | 
         
            +
                  "step": 5900
         
     | 
| 423 | 
         
            +
                },
         
     | 
| 424 | 
         
            +
                {
         
     | 
| 425 | 
         
            +
                  "epoch": 0.009599984640024576,
         
     | 
| 426 | 
         
            +
                  "grad_norm": 35.65713882446289,
         
     | 
| 427 | 
         
            +
                  "learning_rate": 1.9183693060911026e-06,
         
     | 
| 428 | 
         
            +
                  "loss": 6.4477,
         
     | 
| 429 | 
         
            +
                  "step": 6000
         
     | 
| 430 | 
         
            +
                },
         
     | 
| 431 | 
         
            +
                {
         
     | 
| 432 | 
         
            +
                  "epoch": 0.009759984384024985,
         
     | 
| 433 | 
         
            +
                  "grad_norm": 22.977373123168945,
         
     | 
| 434 | 
         
            +
                  "learning_rate": 1.950368794099295e-06,
         
     | 
| 435 | 
         
            +
                  "loss": 6.4308,
         
     | 
| 436 | 
         
            +
                  "step": 6100
         
     | 
| 437 | 
         
            +
                },
         
     | 
| 438 | 
         
            +
                {
         
     | 
| 439 | 
         
            +
                  "epoch": 0.009919984128025396,
         
     | 
| 440 | 
         
            +
                  "grad_norm": 22.127635955810547,
         
     | 
| 441 | 
         
            +
                  "learning_rate": 1.982368282107486e-06,
         
     | 
| 442 | 
         
            +
                  "loss": 6.4248,
         
     | 
| 443 | 
         
            +
                  "step": 6200
         
     | 
| 444 | 
         
            +
                },
         
     | 
| 445 | 
         
            +
                {
         
     | 
| 446 | 
         
            +
                  "epoch": 0.010079983872025805,
         
     | 
| 447 | 
         
            +
                  "grad_norm": 33.53960418701172,
         
     | 
| 448 | 
         
            +
                  "learning_rate": 2.0143677701156784e-06,
         
     | 
| 449 | 
         
            +
                  "loss": 6.2642,
         
     | 
| 450 | 
         
            +
                  "step": 6300
         
     | 
| 451 | 
         
            +
                },
         
     | 
| 452 | 
         
            +
                {
         
     | 
| 453 | 
         
            +
                  "epoch": 0.010239983616026214,
         
     | 
| 454 | 
         
            +
                  "grad_norm": 24.39597511291504,
         
     | 
| 455 | 
         
            +
                  "learning_rate": 2.04636725812387e-06,
         
     | 
| 456 | 
         
            +
                  "loss": 6.2763,
         
     | 
| 457 | 
         
            +
                  "step": 6400
         
     | 
| 458 | 
         
            +
                },
         
     | 
| 459 | 
         
            +
                {
         
     | 
| 460 | 
         
            +
                  "epoch": 0.010399983360026625,
         
     | 
| 461 | 
         
            +
                  "grad_norm": 24.471288681030273,
         
     | 
| 462 | 
         
            +
                  "learning_rate": 2.078366746132062e-06,
         
     | 
| 463 | 
         
            +
                  "loss": 6.3878,
         
     | 
| 464 | 
         
            +
                  "step": 6500
         
     | 
| 465 | 
         
            +
                },
         
     | 
| 466 | 
         
            +
                {
         
     | 
| 467 | 
         
            +
                  "epoch": 0.010559983104027034,
         
     | 
| 468 | 
         
            +
                  "grad_norm": 34.05498123168945,
         
     | 
| 469 | 
         
            +
                  "learning_rate": 2.110366234140254e-06,
         
     | 
| 470 | 
         
            +
                  "loss": 6.2601,
         
     | 
| 471 | 
         
            +
                  "step": 6600
         
     | 
| 472 | 
         
            +
                },
         
     | 
| 473 | 
         
            +
                {
         
     | 
| 474 | 
         
            +
                  "epoch": 0.010719982848027443,
         
     | 
| 475 | 
         
            +
                  "grad_norm": 30.60455322265625,
         
     | 
| 476 | 
         
            +
                  "learning_rate": 2.142365722148446e-06,
         
     | 
| 477 | 
         
            +
                  "loss": 6.1789,
         
     | 
| 478 | 
         
            +
                  "step": 6700
         
     | 
| 479 | 
         
            +
                },
         
     | 
| 480 | 
         
            +
                {
         
     | 
| 481 | 
         
            +
                  "epoch": 0.010879982592027852,
         
     | 
| 482 | 
         
            +
                  "grad_norm": 27.737686157226562,
         
     | 
| 483 | 
         
            +
                  "learning_rate": 2.1743652101566377e-06,
         
     | 
| 484 | 
         
            +
                  "loss": 6.1773,
         
     | 
| 485 | 
         
            +
                  "step": 6800
         
     | 
| 486 | 
         
            +
                },
         
     | 
| 487 | 
         
            +
                {
         
     | 
| 488 | 
         
            +
                  "epoch": 0.011039982336028263,
         
     | 
| 489 | 
         
            +
                  "grad_norm": 24.246810913085938,
         
     | 
| 490 | 
         
            +
                  "learning_rate": 2.2063646981648294e-06,
         
     | 
| 491 | 
         
            +
                  "loss": 6.1439,
         
     | 
| 492 | 
         
            +
                  "step": 6900
         
     | 
| 493 | 
         
            +
                },
         
     | 
| 494 | 
         
            +
                {
         
     | 
| 495 | 
         
            +
                  "epoch": 0.011199982080028672,
         
     | 
| 496 | 
         
            +
                  "grad_norm": 27.53533363342285,
         
     | 
| 497 | 
         
            +
                  "learning_rate": 2.2383641861730217e-06,
         
     | 
| 498 | 
         
            +
                  "loss": 6.1863,
         
     | 
| 499 | 
         
            +
                  "step": 7000
         
     | 
| 500 | 
         
            +
                },
         
     | 
| 501 | 
         
            +
                {
         
     | 
| 502 | 
         
            +
                  "epoch": 0.011359981824029081,
         
     | 
| 503 | 
         
            +
                  "grad_norm": 27.81687355041504,
         
     | 
| 504 | 
         
            +
                  "learning_rate": 2.2703636741812134e-06,
         
     | 
| 505 | 
         
            +
                  "loss": 6.0513,
         
     | 
| 506 | 
         
            +
                  "step": 7100
         
     | 
| 507 | 
         
            +
                },
         
     | 
| 508 | 
         
            +
                {
         
     | 
| 509 | 
         
            +
                  "epoch": 0.011519981568029492,
         
     | 
| 510 | 
         
            +
                  "grad_norm": 28.00519371032715,
         
     | 
| 511 | 
         
            +
                  "learning_rate": 2.3020431673093234e-06,
         
     | 
| 512 | 
         
            +
                  "loss": 6.0671,
         
     | 
| 513 | 
         
            +
                  "step": 7200
         
     | 
| 514 | 
         
            +
                },
         
     | 
| 515 | 
         
            +
                {
         
     | 
| 516 | 
         
            +
                  "epoch": 0.011679981312029901,
         
     | 
| 517 | 
         
            +
                  "grad_norm": 29.347061157226562,
         
     | 
| 518 | 
         
            +
                  "learning_rate": 2.3340426553175152e-06,
         
     | 
| 519 | 
         
            +
                  "loss": 6.0212,
         
     | 
| 520 | 
         
            +
                  "step": 7300
         
     | 
| 521 | 
         
            +
                },
         
     | 
| 522 | 
         
            +
                {
         
     | 
| 523 | 
         
            +
                  "epoch": 0.01183998105603031,
         
     | 
| 524 | 
         
            +
                  "grad_norm": 29.621200561523438,
         
     | 
| 525 | 
         
            +
                  "learning_rate": 2.365722148445625e-06,
         
     | 
| 526 | 
         
            +
                  "loss": 6.0043,
         
     | 
| 527 | 
         
            +
                  "step": 7400
         
     | 
| 528 | 
         
            +
                },
         
     | 
| 529 | 
         
            +
                {
         
     | 
| 530 | 
         
            +
                  "epoch": 0.011999980800030719,
         
     | 
| 531 | 
         
            +
                  "grad_norm": 31.689117431640625,
         
     | 
| 532 | 
         
            +
                  "learning_rate": 2.397721636453817e-06,
         
     | 
| 533 | 
         
            +
                  "loss": 6.0166,
         
     | 
| 534 | 
         
            +
                  "step": 7500
         
     | 
| 535 | 
         
            +
                },
         
     | 
| 536 | 
         
            +
                {
         
     | 
| 537 | 
         
            +
                  "epoch": 0.01215998054403113,
         
     | 
| 538 | 
         
            +
                  "grad_norm": 46.79508972167969,
         
     | 
| 539 | 
         
            +
                  "learning_rate": 2.429721124462009e-06,
         
     | 
| 540 | 
         
            +
                  "loss": 5.9754,
         
     | 
| 541 | 
         
            +
                  "step": 7600
         
     | 
| 542 | 
         
            +
                },
         
     | 
| 543 | 
         
            +
                {
         
     | 
| 544 | 
         
            +
                  "epoch": 0.012319980288031539,
         
     | 
| 545 | 
         
            +
                  "grad_norm": 28.857833862304688,
         
     | 
| 546 | 
         
            +
                  "learning_rate": 2.4617206124702006e-06,
         
     | 
| 547 | 
         
            +
                  "loss": 5.9211,
         
     | 
| 548 | 
         
            +
                  "step": 7700
         
     | 
| 549 | 
         
            +
                },
         
     | 
| 550 | 
         
            +
                {
         
     | 
| 551 | 
         
            +
                  "epoch": 0.012479980032031948,
         
     | 
| 552 | 
         
            +
                  "grad_norm": 58.34132766723633,
         
     | 
| 553 | 
         
            +
                  "learning_rate": 2.4937201004783928e-06,
         
     | 
| 554 | 
         
            +
                  "loss": 5.7867,
         
     | 
| 555 | 
         
            +
                  "step": 7800
         
     | 
| 556 | 
         
            +
                },
         
     | 
| 557 | 
         
            +
                {
         
     | 
| 558 | 
         
            +
                  "epoch": 0.012639979776032359,
         
     | 
| 559 | 
         
            +
                  "grad_norm": 49.33425521850586,
         
     | 
| 560 | 
         
            +
                  "learning_rate": 2.525719588486584e-06,
         
     | 
| 561 | 
         
            +
                  "loss": 5.8534,
         
     | 
| 562 | 
         
            +
                  "step": 7900
         
     | 
| 563 | 
         
            +
                },
         
     | 
| 564 | 
         
            +
                {
         
     | 
| 565 | 
         
            +
                  "epoch": 0.012799979520032768,
         
     | 
| 566 | 
         
            +
                  "grad_norm": 39.17392349243164,
         
     | 
| 567 | 
         
            +
                  "learning_rate": 2.5577190764947763e-06,
         
     | 
| 568 | 
         
            +
                  "loss": 5.7708,
         
     | 
| 569 | 
         
            +
                  "step": 8000
         
     | 
| 570 | 
         
            +
                },
         
     | 
| 571 | 
         
            +
                {
         
     | 
| 572 | 
         
            +
                  "epoch": 0.012959979264033177,
         
     | 
| 573 | 
         
            +
                  "grad_norm": 45.94136428833008,
         
     | 
| 574 | 
         
            +
                  "learning_rate": 2.589718564502968e-06,
         
     | 
| 575 | 
         
            +
                  "loss": 5.8328,
         
     | 
| 576 | 
         
            +
                  "step": 8100
         
     | 
| 577 | 
         
            +
                },
         
     | 
| 578 | 
         
            +
                {
         
     | 
| 579 | 
         
            +
                  "epoch": 0.013119979008033586,
         
     | 
| 580 | 
         
            +
                  "grad_norm": 36.19196319580078,
         
     | 
| 581 | 
         
            +
                  "learning_rate": 2.6217180525111603e-06,
         
     | 
| 582 | 
         
            +
                  "loss": 5.7417,
         
     | 
| 583 | 
         
            +
                  "step": 8200
         
     | 
| 584 | 
         
            +
                },
         
     | 
| 585 | 
         
            +
                {
         
     | 
| 586 | 
         
            +
                  "epoch": 0.013279978752033997,
         
     | 
| 587 | 
         
            +
                  "grad_norm": 37.051658630371094,
         
     | 
| 588 | 
         
            +
                  "learning_rate": 2.653717540519352e-06,
         
     | 
| 589 | 
         
            +
                  "loss": 5.8097,
         
     | 
| 590 | 
         
            +
                  "step": 8300
         
     | 
| 591 | 
         
            +
                },
         
     | 
| 592 | 
         
            +
                {
         
     | 
| 593 | 
         
            +
                  "epoch": 0.013439978496034406,
         
     | 
| 594 | 
         
            +
                  "grad_norm": 90.0757064819336,
         
     | 
| 595 | 
         
            +
                  "learning_rate": 2.6857170285275435e-06,
         
     | 
| 596 | 
         
            +
                  "loss": 5.7578,
         
     | 
| 597 | 
         
            +
                  "step": 8400
         
     | 
| 598 | 
         
            +
                },
         
     | 
| 599 | 
         
            +
                {
         
     | 
| 600 | 
         
            +
                  "epoch": 0.013599978240034815,
         
     | 
| 601 | 
         
            +
                  "grad_norm": 92.7857894897461,
         
     | 
| 602 | 
         
            +
                  "learning_rate": 2.7177165165357357e-06,
         
     | 
| 603 | 
         
            +
                  "loss": 5.643,
         
     | 
| 604 | 
         
            +
                  "step": 8500
         
     | 
| 605 | 
         
            +
                },
         
     | 
| 606 | 
         
            +
                {
         
     | 
| 607 | 
         
            +
                  "epoch": 0.013759977984035226,
         
     | 
| 608 | 
         
            +
                  "grad_norm": 26.648149490356445,
         
     | 
| 609 | 
         
            +
                  "learning_rate": 2.7497160045439274e-06,
         
     | 
| 610 | 
         
            +
                  "loss": 5.6401,
         
     | 
| 611 | 
         
            +
                  "step": 8600
         
     | 
| 612 | 
         
            +
                },
         
     | 
| 613 | 
         
            +
                {
         
     | 
| 614 | 
         
            +
                  "epoch": 0.013919977728035635,
         
     | 
| 615 | 
         
            +
                  "grad_norm": 45.42919158935547,
         
     | 
| 616 | 
         
            +
                  "learning_rate": 2.7817154925521196e-06,
         
     | 
| 617 | 
         
            +
                  "loss": 5.6627,
         
     | 
| 618 | 
         
            +
                  "step": 8700
         
     | 
| 619 | 
         
            +
                },
         
     | 
| 620 | 
         
            +
                {
         
     | 
| 621 | 
         
            +
                  "epoch": 0.014079977472036044,
         
     | 
| 622 | 
         
            +
                  "grad_norm": 48.3182487487793,
         
     | 
| 623 | 
         
            +
                  "learning_rate": 2.8137149805603114e-06,
         
     | 
| 624 | 
         
            +
                  "loss": 5.6167,
         
     | 
| 625 | 
         
            +
                  "step": 8800
         
     | 
| 626 | 
         
            +
                },
         
     | 
| 627 | 
         
            +
                {
         
     | 
| 628 | 
         
            +
                  "epoch": 0.014239977216036454,
         
     | 
| 629 | 
         
            +
                  "grad_norm": 51.463653564453125,
         
     | 
| 630 | 
         
            +
                  "learning_rate": 2.8457144685685028e-06,
         
     | 
| 631 | 
         
            +
                  "loss": 5.6539,
         
     | 
| 632 | 
         
            +
                  "step": 8900
         
     | 
| 633 | 
         
            +
                },
         
     | 
| 634 | 
         
            +
                {
         
     | 
| 635 | 
         
            +
                  "epoch": 0.014399976960036864,
         
     | 
| 636 | 
         
            +
                  "grad_norm": 47.81680679321289,
         
     | 
| 637 | 
         
            +
                  "learning_rate": 2.877713956576695e-06,
         
     | 
| 638 | 
         
            +
                  "loss": 5.4513,
         
     | 
| 639 | 
         
            +
                  "step": 9000
         
     | 
| 640 | 
         
            +
                },
         
     | 
| 641 | 
         
            +
                {
         
     | 
| 642 | 
         
            +
                  "epoch": 0.014559976704037273,
         
     | 
| 643 | 
         
            +
                  "grad_norm": 42.410667419433594,
         
     | 
| 644 | 
         
            +
                  "learning_rate": 2.9097134445848868e-06,
         
     | 
| 645 | 
         
            +
                  "loss": 5.4132,
         
     | 
| 646 | 
         
            +
                  "step": 9100
         
     | 
| 647 | 
         
            +
                },
         
     | 
| 648 | 
         
            +
                {
         
     | 
| 649 | 
         
            +
                  "epoch": 0.014719976448037683,
         
     | 
| 650 | 
         
            +
                  "grad_norm": 55.33562088012695,
         
     | 
| 651 | 
         
            +
                  "learning_rate": 2.941712932593079e-06,
         
     | 
| 652 | 
         
            +
                  "loss": 5.4714,
         
     | 
| 653 | 
         
            +
                  "step": 9200
         
     | 
| 654 | 
         
            +
                },
         
     | 
| 655 | 
         
            +
                {
         
     | 
| 656 | 
         
            +
                  "epoch": 0.014879976192038093,
         
     | 
| 657 | 
         
            +
                  "grad_norm": 38.538246154785156,
         
     | 
| 658 | 
         
            +
                  "learning_rate": 2.9737124206012707e-06,
         
     | 
| 659 | 
         
            +
                  "loss": 5.4786,
         
     | 
| 660 | 
         
            +
                  "step": 9300
         
     | 
| 661 | 
         
            +
                },
         
     | 
| 662 | 
         
            +
                {
         
     | 
| 663 | 
         
            +
                  "epoch": 0.015039975936038502,
         
     | 
| 664 | 
         
            +
                  "grad_norm": 43.42023468017578,
         
     | 
| 665 | 
         
            +
                  "learning_rate": 3.0057119086094625e-06,
         
     | 
| 666 | 
         
            +
                  "loss": 5.3928,
         
     | 
| 667 | 
         
            +
                  "step": 9400
         
     | 
| 668 | 
         
            +
                },
         
     | 
| 669 | 
         
            +
                {
         
     | 
| 670 | 
         
            +
                  "epoch": 0.015199975680038912,
         
     | 
| 671 | 
         
            +
                  "grad_norm": 24.861467361450195,
         
     | 
| 672 | 
         
            +
                  "learning_rate": 3.037391401737572e-06,
         
     | 
| 673 | 
         
            +
                  "loss": 5.4774,
         
     | 
| 674 | 
         
            +
                  "step": 9500
         
     | 
| 675 | 
         
            +
                },
         
     | 
| 676 | 
         
            +
                {
         
     | 
| 677 | 
         
            +
                  "epoch": 0.01535997542403932,
         
     | 
| 678 | 
         
            +
                  "grad_norm": 98.92141723632812,
         
     | 
| 679 | 
         
            +
                  "learning_rate": 3.0693908897457643e-06,
         
     | 
| 680 | 
         
            +
                  "loss": 5.2881,
         
     | 
| 681 | 
         
            +
                  "step": 9600
         
     | 
| 682 | 
         
            +
                },
         
     | 
| 683 | 
         
            +
                {
         
     | 
| 684 | 
         
            +
                  "epoch": 0.015519975168039732,
         
     | 
| 685 | 
         
            +
                  "grad_norm": 62.839866638183594,
         
     | 
| 686 | 
         
            +
                  "learning_rate": 3.101390377753956e-06,
         
     | 
| 687 | 
         
            +
                  "loss": 5.3699,
         
     | 
| 688 | 
         
            +
                  "step": 9700
         
     | 
| 689 | 
         
            +
                },
         
     | 
| 690 | 
         
            +
                {
         
     | 
| 691 | 
         
            +
                  "epoch": 0.01567997491204014,
         
     | 
| 692 | 
         
            +
                  "grad_norm": 46.006065368652344,
         
     | 
| 693 | 
         
            +
                  "learning_rate": 3.133069870882066e-06,
         
     | 
| 694 | 
         
            +
                  "loss": 5.1483,
         
     | 
| 695 | 
         
            +
                  "step": 9800
         
     | 
| 696 | 
         
            +
                },
         
     | 
| 697 | 
         
            +
                {
         
     | 
| 698 | 
         
            +
                  "epoch": 0.01583997465604055,
         
     | 
| 699 | 
         
            +
                  "grad_norm": 89.62445068359375,
         
     | 
| 700 | 
         
            +
                  "learning_rate": 3.1650693588902583e-06,
         
     | 
| 701 | 
         
            +
                  "loss": 5.3051,
         
     | 
| 702 | 
         
            +
                  "step": 9900
         
     | 
| 703 | 
         
            +
                },
         
     | 
| 704 | 
         
            +
                {
         
     | 
| 705 | 
         
            +
                  "epoch": 0.01599997440004096,
         
     | 
| 706 | 
         
            +
                  "grad_norm": 41.113609313964844,
         
     | 
| 707 | 
         
            +
                  "learning_rate": 3.19706884689845e-06,
         
     | 
| 708 | 
         
            +
                  "loss": 5.2546,
         
     | 
| 709 | 
         
            +
                  "step": 10000
         
     | 
| 710 | 
         
            +
                },
         
     | 
| 711 | 
         
            +
                {
         
     | 
| 712 | 
         
            +
                  "epoch": 0.01615997414404137,
         
     | 
| 713 | 
         
            +
                  "grad_norm": 46.37376403808594,
         
     | 
| 714 | 
         
            +
                  "learning_rate": 3.2290683349066414e-06,
         
     | 
| 715 | 
         
            +
                  "loss": 5.2314,
         
     | 
| 716 | 
         
            +
                  "step": 10100
         
     | 
| 717 | 
         
            +
                },
         
     | 
| 718 | 
         
            +
                {
         
     | 
| 719 | 
         
            +
                  "epoch": 0.01631997388804178,
         
     | 
| 720 | 
         
            +
                  "grad_norm": 60.3846321105957,
         
     | 
| 721 | 
         
            +
                  "learning_rate": 3.2610678229148337e-06,
         
     | 
| 722 | 
         
            +
                  "loss": 5.1783,
         
     | 
| 723 | 
         
            +
                  "step": 10200
         
     | 
| 724 | 
         
            +
                },
         
     | 
| 725 | 
         
            +
                {
         
     | 
| 726 | 
         
            +
                  "epoch": 0.016479973632042188,
         
     | 
| 727 | 
         
            +
                  "grad_norm": 145.4359130859375,
         
     | 
| 728 | 
         
            +
                  "learning_rate": 3.2930673109230254e-06,
         
     | 
| 729 | 
         
            +
                  "loss": 5.2074,
         
     | 
| 730 | 
         
            +
                  "step": 10300
         
     | 
| 731 | 
         
            +
                },
         
     | 
| 732 | 
         
            +
                {
         
     | 
| 733 | 
         
            +
                  "epoch": 0.0166399733760426,
         
     | 
| 734 | 
         
            +
                  "grad_norm": 69.00183868408203,
         
     | 
| 735 | 
         
            +
                  "learning_rate": 3.325066798931217e-06,
         
     | 
| 736 | 
         
            +
                  "loss": 5.2825,
         
     | 
| 737 | 
         
            +
                  "step": 10400
         
     | 
| 738 | 
         
            +
                },
         
     | 
| 739 | 
         
            +
                {
         
     | 
| 740 | 
         
            +
                  "epoch": 0.01679997312004301,
         
     | 
| 741 | 
         
            +
                  "grad_norm": 48.03580093383789,
         
     | 
| 742 | 
         
            +
                  "learning_rate": 3.3570662869394094e-06,
         
     | 
| 743 | 
         
            +
                  "loss": 5.1715,
         
     | 
| 744 | 
         
            +
                  "step": 10500
         
     | 
| 745 | 
         
            +
                },
         
     | 
| 746 | 
         
            +
                {
         
     | 
| 747 | 
         
            +
                  "epoch": 0.016959972864043417,
         
     | 
| 748 | 
         
            +
                  "grad_norm": 58.56736755371094,
         
     | 
| 749 | 
         
            +
                  "learning_rate": 3.389065774947601e-06,
         
     | 
| 750 | 
         
            +
                  "loss": 5.087,
         
     | 
| 751 | 
         
            +
                  "step": 10600
         
     | 
| 752 | 
         
            +
                },
         
     | 
| 753 | 
         
            +
                {
         
     | 
| 754 | 
         
            +
                  "epoch": 0.017119972608043828,
         
     | 
| 755 | 
         
            +
                  "grad_norm": 54.484527587890625,
         
     | 
| 756 | 
         
            +
                  "learning_rate": 3.421065262955793e-06,
         
     | 
| 757 | 
         
            +
                  "loss": 5.082,
         
     | 
| 758 | 
         
            +
                  "step": 10700
         
     | 
| 759 | 
         
            +
                },
         
     | 
| 760 | 
         
            +
                {
         
     | 
| 761 | 
         
            +
                  "epoch": 0.017279972352044235,
         
     | 
| 762 | 
         
            +
                  "grad_norm": 74.30866241455078,
         
     | 
| 763 | 
         
            +
                  "learning_rate": 3.4530647509639847e-06,
         
     | 
| 764 | 
         
            +
                  "loss": 4.9111,
         
     | 
| 765 | 
         
            +
                  "step": 10800
         
     | 
| 766 | 
         
            +
                },
         
     | 
| 767 | 
         
            +
                {
         
     | 
| 768 | 
         
            +
                  "epoch": 0.017439972096044646,
         
     | 
| 769 | 
         
            +
                  "grad_norm": 60.489505767822266,
         
     | 
| 770 | 
         
            +
                  "learning_rate": 3.4850642389721765e-06,
         
     | 
| 771 | 
         
            +
                  "loss": 5.0213,
         
     | 
| 772 | 
         
            +
                  "step": 10900
         
     | 
| 773 | 
         
            +
                },
         
     | 
| 774 | 
         
            +
                {
         
     | 
| 775 | 
         
            +
                  "epoch": 0.017599971840045057,
         
     | 
| 776 | 
         
            +
                  "grad_norm": 61.25093460083008,
         
     | 
| 777 | 
         
            +
                  "learning_rate": 3.5170637269803687e-06,
         
     | 
| 778 | 
         
            +
                  "loss": 4.9898,
         
     | 
| 779 | 
         
            +
                  "step": 11000
         
     | 
| 780 | 
         
            +
                },
         
     | 
| 781 | 
         
            +
                {
         
     | 
| 782 | 
         
            +
                  "epoch": 0.017759971584045464,
         
     | 
| 783 | 
         
            +
                  "grad_norm": 51.98568344116211,
         
     | 
| 784 | 
         
            +
                  "learning_rate": 3.5490632149885605e-06,
         
     | 
| 785 | 
         
            +
                  "loss": 4.7734,
         
     | 
| 786 | 
         
            +
                  "step": 11100
         
     | 
| 787 | 
         
            +
                },
         
     | 
| 788 | 
         
            +
                {
         
     | 
| 789 | 
         
            +
                  "epoch": 0.017919971328045875,
         
     | 
| 790 | 
         
            +
                  "grad_norm": 64.08167266845703,
         
     | 
| 791 | 
         
            +
                  "learning_rate": 3.581062702996752e-06,
         
     | 
| 792 | 
         
            +
                  "loss": 4.9511,
         
     | 
| 793 | 
         
            +
                  "step": 11200
         
     | 
| 794 | 
         
            +
                },
         
     | 
| 795 | 
         
            +
                {
         
     | 
| 796 | 
         
            +
                  "epoch": 0.018079971072046286,
         
     | 
| 797 | 
         
            +
                  "grad_norm": 61.8354606628418,
         
     | 
| 798 | 
         
            +
                  "learning_rate": 3.613062191004944e-06,
         
     | 
| 799 | 
         
            +
                  "loss": 5.0481,
         
     | 
| 800 | 
         
            +
                  "step": 11300
         
     | 
| 801 | 
         
            +
                },
         
     | 
| 802 | 
         
            +
                {
         
     | 
| 803 | 
         
            +
                  "epoch": 0.018239970816046693,
         
     | 
| 804 | 
         
            +
                  "grad_norm": 97.53675842285156,
         
     | 
| 805 | 
         
            +
                  "learning_rate": 3.645061679013136e-06,
         
     | 
| 806 | 
         
            +
                  "loss": 4.8441,
         
     | 
| 807 | 
         
            +
                  "step": 11400
         
     | 
| 808 | 
         
            +
                },
         
     | 
| 809 | 
         
            +
                {
         
     | 
| 810 | 
         
            +
                  "epoch": 0.018399970560047104,
         
     | 
| 811 | 
         
            +
                  "grad_norm": 49.35017013549805,
         
     | 
| 812 | 
         
            +
                  "learning_rate": 3.677061167021328e-06,
         
     | 
| 813 | 
         
            +
                  "loss": 4.873,
         
     | 
| 814 | 
         
            +
                  "step": 11500
         
     | 
| 815 | 
         
            +
                },
         
     | 
| 816 | 
         
            +
                {
         
     | 
| 817 | 
         
            +
                  "epoch": 0.018559970304047515,
         
     | 
| 818 | 
         
            +
                  "grad_norm": 44.33409118652344,
         
     | 
| 819 | 
         
            +
                  "learning_rate": 3.70906065502952e-06,
         
     | 
| 820 | 
         
            +
                  "loss": 4.9988,
         
     | 
| 821 | 
         
            +
                  "step": 11600
         
     | 
| 822 | 
         
            +
                },
         
     | 
| 823 | 
         
            +
                {
         
     | 
| 824 | 
         
            +
                  "epoch": 0.018719970048047922,
         
     | 
| 825 | 
         
            +
                  "grad_norm": 140.5505828857422,
         
     | 
| 826 | 
         
            +
                  "learning_rate": 3.741060143037712e-06,
         
     | 
| 827 | 
         
            +
                  "loss": 4.7653,
         
     | 
| 828 | 
         
            +
                  "step": 11700
         
     | 
| 829 | 
         
            +
                },
         
     | 
| 830 | 
         
            +
                {
         
     | 
| 831 | 
         
            +
                  "epoch": 0.018879969792048333,
         
     | 
| 832 | 
         
            +
                  "grad_norm": 68.21163177490234,
         
     | 
| 833 | 
         
            +
                  "learning_rate": 3.7730596310459034e-06,
         
     | 
| 834 | 
         
            +
                  "loss": 4.804,
         
     | 
| 835 | 
         
            +
                  "step": 11800
         
     | 
| 836 | 
         
            +
                },
         
     | 
| 837 | 
         
            +
                {
         
     | 
| 838 | 
         
            +
                  "epoch": 0.019039969536048744,
         
     | 
| 839 | 
         
            +
                  "grad_norm": 48.678226470947266,
         
     | 
| 840 | 
         
            +
                  "learning_rate": 3.805059119054095e-06,
         
     | 
| 841 | 
         
            +
                  "loss": 4.8288,
         
     | 
| 842 | 
         
            +
                  "step": 11900
         
     | 
| 843 | 
         
            +
                },
         
     | 
| 844 | 
         
            +
                {
         
     | 
| 845 | 
         
            +
                  "epoch": 0.01919996928004915,
         
     | 
| 846 | 
         
            +
                  "grad_norm": 76.32611083984375,
         
     | 
| 847 | 
         
            +
                  "learning_rate": 3.837058607062287e-06,
         
     | 
| 848 | 
         
            +
                  "loss": 4.7053,
         
     | 
| 849 | 
         
            +
                  "step": 12000
         
     | 
| 850 | 
         
            +
                },
         
     | 
| 851 | 
         
            +
                {
         
     | 
| 852 | 
         
            +
                  "epoch": 0.019359969024049562,
         
     | 
| 853 | 
         
            +
                  "grad_norm": 70.85586547851562,
         
     | 
| 854 | 
         
            +
                  "learning_rate": 3.869058095070479e-06,
         
     | 
| 855 | 
         
            +
                  "loss": 4.6887,
         
     | 
| 856 | 
         
            +
                  "step": 12100
         
     | 
| 857 | 
         
            +
                },
         
     | 
| 858 | 
         
            +
                {
         
     | 
| 859 | 
         
            +
                  "epoch": 0.01951996876804997,
         
     | 
| 860 | 
         
            +
                  "grad_norm": 66.46036529541016,
         
     | 
| 861 | 
         
            +
                  "learning_rate": 3.901057583078671e-06,
         
     | 
| 862 | 
         
            +
                  "loss": 4.7832,
         
     | 
| 863 | 
         
            +
                  "step": 12200
         
     | 
| 864 | 
         
            +
                },
         
     | 
| 865 | 
         
            +
                {
         
     | 
| 866 | 
         
            +
                  "epoch": 0.01967996851205038,
         
     | 
| 867 | 
         
            +
                  "grad_norm": 165.13221740722656,
         
     | 
| 868 | 
         
            +
                  "learning_rate": 3.9330570710868636e-06,
         
     | 
| 869 | 
         
            +
                  "loss": 4.6817,
         
     | 
| 870 | 
         
            +
                  "step": 12300
         
     | 
| 871 | 
         
            +
                },
         
     | 
| 872 | 
         
            +
                {
         
     | 
| 873 | 
         
            +
                  "epoch": 0.01983996825605079,
         
     | 
| 874 | 
         
            +
                  "grad_norm": 118.48895263671875,
         
     | 
| 875 | 
         
            +
                  "learning_rate": 3.965056559095055e-06,
         
     | 
| 876 | 
         
            +
                  "loss": 4.6252,
         
     | 
| 877 | 
         
            +
                  "step": 12400
         
     | 
| 878 | 
         
            +
                },
         
     | 
| 879 | 
         
            +
                {
         
     | 
| 880 | 
         
            +
                  "epoch": 0.0199999680000512,
         
     | 
| 881 | 
         
            +
                  "grad_norm": 64.3436050415039,
         
     | 
| 882 | 
         
            +
                  "learning_rate": 3.997056047103246e-06,
         
     | 
| 883 | 
         
            +
                  "loss": 4.5936,
         
     | 
| 884 | 
         
            +
                  "step": 12500
         
     | 
| 885 | 
         
            +
                },
         
     | 
| 886 | 
         
            +
                {
         
     | 
| 887 | 
         
            +
                  "epoch": 0.02015996774405161,
         
     | 
| 888 | 
         
            +
                  "grad_norm": 42.27592468261719,
         
     | 
| 889 | 
         
            +
                  "learning_rate": 4.0290555351114385e-06,
         
     | 
| 890 | 
         
            +
                  "loss": 4.7452,
         
     | 
| 891 | 
         
            +
                  "step": 12600
         
     | 
| 892 | 
         
            +
                },
         
     | 
| 893 | 
         
            +
                {
         
     | 
| 894 | 
         
            +
                  "epoch": 0.02031996748805202,
         
     | 
| 895 | 
         
            +
                  "grad_norm": 60.829036712646484,
         
     | 
| 896 | 
         
            +
                  "learning_rate": 4.061055023119631e-06,
         
     | 
| 897 | 
         
            +
                  "loss": 4.5321,
         
     | 
| 898 | 
         
            +
                  "step": 12700
         
     | 
| 899 | 
         
            +
                },
         
     | 
| 900 | 
         
            +
                {
         
     | 
| 901 | 
         
            +
                  "epoch": 0.020479967232052428,
         
     | 
| 902 | 
         
            +
                  "grad_norm": 161.975830078125,
         
     | 
| 903 | 
         
            +
                  "learning_rate": 4.093054511127823e-06,
         
     | 
| 904 | 
         
            +
                  "loss": 4.4964,
         
     | 
| 905 | 
         
            +
                  "step": 12800
         
     | 
| 906 | 
         
            +
                },
         
     | 
| 907 | 
         
            +
                {
         
     | 
| 908 | 
         
            +
                  "epoch": 0.02063996697605284,
         
     | 
| 909 | 
         
            +
                  "grad_norm": 99.2963638305664,
         
     | 
| 910 | 
         
            +
                  "learning_rate": 4.125053999136014e-06,
         
     | 
| 911 | 
         
            +
                  "loss": 4.4421,
         
     | 
| 912 | 
         
            +
                  "step": 12900
         
     | 
| 913 | 
         
            +
                },
         
     | 
| 914 | 
         
            +
                {
         
     | 
| 915 | 
         
            +
                  "epoch": 0.02079996672005325,
         
     | 
| 916 | 
         
            +
                  "grad_norm": 68.78880310058594,
         
     | 
| 917 | 
         
            +
                  "learning_rate": 4.156733492264124e-06,
         
     | 
| 918 | 
         
            +
                  "loss": 4.3782,
         
     | 
| 919 | 
         
            +
                  "step": 13000
         
     | 
| 920 | 
         
            +
                },
         
     | 
| 921 | 
         
            +
                {
         
     | 
| 922 | 
         
            +
                  "epoch": 0.020959966464053657,
         
     | 
| 923 | 
         
            +
                  "grad_norm": 80.74951171875,
         
     | 
| 924 | 
         
            +
                  "learning_rate": 4.188732980272316e-06,
         
     | 
| 925 | 
         
            +
                  "loss": 4.5169,
         
     | 
| 926 | 
         
            +
                  "step": 13100
         
     | 
| 927 | 
         
            +
                },
         
     | 
| 928 | 
         
            +
                {
         
     | 
| 929 | 
         
            +
                  "epoch": 0.021119966208054067,
         
     | 
| 930 | 
         
            +
                  "grad_norm": 157.87254333496094,
         
     | 
| 931 | 
         
            +
                  "learning_rate": 4.220412473400426e-06,
         
     | 
| 932 | 
         
            +
                  "loss": 4.533,
         
     | 
| 933 | 
         
            +
                  "step": 13200
         
     | 
| 934 | 
         
            +
                },
         
     | 
| 935 | 
         
            +
                {
         
     | 
| 936 | 
         
            +
                  "epoch": 0.02127996595205448,
         
     | 
| 937 | 
         
            +
                  "grad_norm": 148.68331909179688,
         
     | 
| 938 | 
         
            +
                  "learning_rate": 4.252411961408618e-06,
         
     | 
| 939 | 
         
            +
                  "loss": 4.3725,
         
     | 
| 940 | 
         
            +
                  "step": 13300
         
     | 
| 941 | 
         
            +
                },
         
     | 
| 942 | 
         
            +
                {
         
     | 
| 943 | 
         
            +
                  "epoch": 0.021439965696054886,
         
     | 
| 944 | 
         
            +
                  "grad_norm": 72.9531021118164,
         
     | 
| 945 | 
         
            +
                  "learning_rate": 4.28441144941681e-06,
         
     | 
| 946 | 
         
            +
                  "loss": 4.2911,
         
     | 
| 947 | 
         
            +
                  "step": 13400
         
     | 
| 948 | 
         
            +
                },
         
     | 
| 949 | 
         
            +
                {
         
     | 
| 950 | 
         
            +
                  "epoch": 0.021599965440055297,
         
     | 
| 951 | 
         
            +
                  "grad_norm": 73.24847412109375,
         
     | 
| 952 | 
         
            +
                  "learning_rate": 4.316410937425001e-06,
         
     | 
| 953 | 
         
            +
                  "loss": 4.2261,
         
     | 
| 954 | 
         
            +
                  "step": 13500
         
     | 
| 955 | 
         
            +
                },
         
     | 
| 956 | 
         
            +
                {
         
     | 
| 957 | 
         
            +
                  "epoch": 0.021759965184055704,
         
     | 
| 958 | 
         
            +
                  "grad_norm": 94.57313537597656,
         
     | 
| 959 | 
         
            +
                  "learning_rate": 4.348410425433194e-06,
         
     | 
| 960 | 
         
            +
                  "loss": 4.2467,
         
     | 
| 961 | 
         
            +
                  "step": 13600
         
     | 
| 962 | 
         
            +
                },
         
     | 
| 963 | 
         
            +
                {
         
     | 
| 964 | 
         
            +
                  "epoch": 0.021919964928056115,
         
     | 
| 965 | 
         
            +
                  "grad_norm": 105.674560546875,
         
     | 
| 966 | 
         
            +
                  "learning_rate": 4.380409913441385e-06,
         
     | 
| 967 | 
         
            +
                  "loss": 4.1558,
         
     | 
| 968 | 
         
            +
                  "step": 13700
         
     | 
| 969 | 
         
            +
                },
         
     | 
| 970 | 
         
            +
                {
         
     | 
| 971 | 
         
            +
                  "epoch": 0.022079964672056526,
         
     | 
| 972 | 
         
            +
                  "grad_norm": 63.658287048339844,
         
     | 
| 973 | 
         
            +
                  "learning_rate": 4.412409401449577e-06,
         
     | 
| 974 | 
         
            +
                  "loss": 4.2794,
         
     | 
| 975 | 
         
            +
                  "step": 13800
         
     | 
| 976 | 
         
            +
                },
         
     | 
| 977 | 
         
            +
                {
         
     | 
| 978 | 
         
            +
                  "epoch": 0.022239964416056933,
         
     | 
| 979 | 
         
            +
                  "grad_norm": 77.69287109375,
         
     | 
| 980 | 
         
            +
                  "learning_rate": 4.444408889457769e-06,
         
     | 
| 981 | 
         
            +
                  "loss": 4.2383,
         
     | 
| 982 | 
         
            +
                  "step": 13900
         
     | 
| 983 | 
         
            +
                },
         
     | 
| 984 | 
         
            +
                {
         
     | 
| 985 | 
         
            +
                  "epoch": 0.022399964160057344,
         
     | 
| 986 | 
         
            +
                  "grad_norm": 82.83360290527344,
         
     | 
| 987 | 
         
            +
                  "learning_rate": 4.4764083774659615e-06,
         
     | 
| 988 | 
         
            +
                  "loss": 4.1654,
         
     | 
| 989 | 
         
            +
                  "step": 14000
         
     | 
| 990 | 
         
            +
                },
         
     | 
| 991 | 
         
            +
                {
         
     | 
| 992 | 
         
            +
                  "epoch": 0.022559963904057755,
         
     | 
| 993 | 
         
            +
                  "grad_norm": 47.373531341552734,
         
     | 
| 994 | 
         
            +
                  "learning_rate": 4.508407865474153e-06,
         
     | 
| 995 | 
         
            +
                  "loss": 4.158,
         
     | 
| 996 | 
         
            +
                  "step": 14100
         
     | 
| 997 | 
         
            +
                },
         
     | 
| 998 | 
         
            +
                {
         
     | 
| 999 | 
         
            +
                  "epoch": 0.022719963648058162,
         
     | 
| 1000 | 
         
            +
                  "grad_norm": 97.64757537841797,
         
     | 
| 1001 | 
         
            +
                  "learning_rate": 4.540407353482344e-06,
         
     | 
| 1002 | 
         
            +
                  "loss": 4.1299,
         
     | 
| 1003 | 
         
            +
                  "step": 14200
         
     | 
| 1004 | 
         
            +
                },
         
     | 
| 1005 | 
         
            +
                {
         
     | 
| 1006 | 
         
            +
                  "epoch": 0.022879963392058573,
         
     | 
| 1007 | 
         
            +
                  "grad_norm": 54.75618362426758,
         
     | 
| 1008 | 
         
            +
                  "learning_rate": 4.5724068414905365e-06,
         
     | 
| 1009 | 
         
            +
                  "loss": 4.1902,
         
     | 
| 1010 | 
         
            +
                  "step": 14300
         
     | 
| 1011 | 
         
            +
                },
         
     | 
| 1012 | 
         
            +
                {
         
     | 
| 1013 | 
         
            +
                  "epoch": 0.023039963136058984,
         
     | 
| 1014 | 
         
            +
                  "grad_norm": 258.4887390136719,
         
     | 
| 1015 | 
         
            +
                  "learning_rate": 4.604406329498729e-06,
         
     | 
| 1016 | 
         
            +
                  "loss": 3.7853,
         
     | 
| 1017 | 
         
            +
                  "step": 14400
         
     | 
| 1018 | 
         
            +
                },
         
     | 
| 1019 | 
         
            +
                {
         
     | 
| 1020 | 
         
            +
                  "epoch": 0.02319996288005939,
         
     | 
| 1021 | 
         
            +
                  "grad_norm": 104.63798522949219,
         
     | 
| 1022 | 
         
            +
                  "learning_rate": 4.63640581750692e-06,
         
     | 
| 1023 | 
         
            +
                  "loss": 4.0514,
         
     | 
| 1024 | 
         
            +
                  "step": 14500
         
     | 
| 1025 | 
         
            +
                },
         
     | 
| 1026 | 
         
            +
                {
         
     | 
| 1027 | 
         
            +
                  "epoch": 0.023359962624059802,
         
     | 
| 1028 | 
         
            +
                  "grad_norm": 60.090843200683594,
         
     | 
| 1029 | 
         
            +
                  "learning_rate": 4.668405305515112e-06,
         
     | 
| 1030 | 
         
            +
                  "loss": 4.1655,
         
     | 
| 1031 | 
         
            +
                  "step": 14600
         
     | 
| 1032 | 
         
            +
                },
         
     | 
| 1033 | 
         
            +
                {
         
     | 
| 1034 | 
         
            +
                  "epoch": 0.023519962368060213,
         
     | 
| 1035 | 
         
            +
                  "grad_norm": 44.36670684814453,
         
     | 
| 1036 | 
         
            +
                  "learning_rate": 4.7004047935233036e-06,
         
     | 
| 1037 | 
         
            +
                  "loss": 4.051,
         
     | 
| 1038 | 
         
            +
                  "step": 14700
         
     | 
| 1039 | 
         
            +
                },
         
     | 
| 1040 | 
         
            +
                {
         
     | 
| 1041 | 
         
            +
                  "epoch": 0.02367996211206062,
         
     | 
| 1042 | 
         
            +
                  "grad_norm": 41.61213302612305,
         
     | 
| 1043 | 
         
            +
                  "learning_rate": 4.732404281531496e-06,
         
     | 
| 1044 | 
         
            +
                  "loss": 4.078,
         
     | 
| 1045 | 
         
            +
                  "step": 14800
         
     | 
| 1046 | 
         
            +
                },
         
     | 
| 1047 | 
         
            +
                {
         
     | 
| 1048 | 
         
            +
                  "epoch": 0.02383996185606103,
         
     | 
| 1049 | 
         
            +
                  "grad_norm": 73.2448501586914,
         
     | 
| 1050 | 
         
            +
                  "learning_rate": 4.764403769539688e-06,
         
     | 
| 1051 | 
         
            +
                  "loss": 4.1193,
         
     | 
| 1052 | 
         
            +
                  "step": 14900
         
     | 
| 1053 | 
         
            +
                },
         
     | 
| 1054 | 
         
            +
                {
         
     | 
| 1055 | 
         
            +
                  "epoch": 0.023999961600061438,
         
     | 
| 1056 | 
         
            +
                  "grad_norm": 77.30301666259766,
         
     | 
| 1057 | 
         
            +
                  "learning_rate": 4.796403257547879e-06,
         
     | 
| 1058 | 
         
            +
                  "loss": 4.1536,
         
     | 
| 1059 | 
         
            +
                  "step": 15000
         
     | 
| 1060 | 
         
            +
                },
         
     | 
| 1061 | 
         
            +
                {
         
     | 
| 1062 | 
         
            +
                  "epoch": 0.02415996134406185,
         
     | 
| 1063 | 
         
            +
                  "grad_norm": 48.1458854675293,
         
     | 
| 1064 | 
         
            +
                  "learning_rate": 4.8284027455560715e-06,
         
     | 
| 1065 | 
         
            +
                  "loss": 3.935,
         
     | 
| 1066 | 
         
            +
                  "step": 15100
         
     | 
| 1067 | 
         
            +
                },
         
     | 
| 1068 | 
         
            +
                {
         
     | 
| 1069 | 
         
            +
                  "epoch": 0.02431996108806226,
         
     | 
| 1070 | 
         
            +
                  "grad_norm": 129.59295654296875,
         
     | 
| 1071 | 
         
            +
                  "learning_rate": 4.860402233564263e-06,
         
     | 
| 1072 | 
         
            +
                  "loss": 3.9535,
         
     | 
| 1073 | 
         
            +
                  "step": 15200
         
     | 
| 1074 | 
         
            +
                },
         
     | 
| 1075 | 
         
            +
                {
         
     | 
| 1076 | 
         
            +
                  "epoch": 0.024479960832062667,
         
     | 
| 1077 | 
         
            +
                  "grad_norm": 163.0813751220703,
         
     | 
| 1078 | 
         
            +
                  "learning_rate": 4.892401721572455e-06,
         
     | 
| 1079 | 
         
            +
                  "loss": 3.7051,
         
     | 
| 1080 | 
         
            +
                  "step": 15300
         
     | 
| 1081 | 
         
            +
                },
         
     | 
| 1082 | 
         
            +
                {
         
     | 
| 1083 | 
         
            +
                  "epoch": 0.024639960576063078,
         
     | 
| 1084 | 
         
            +
                  "grad_norm": 102.2786865234375,
         
     | 
| 1085 | 
         
            +
                  "learning_rate": 4.924401209580647e-06,
         
     | 
| 1086 | 
         
            +
                  "loss": 3.8329,
         
     | 
| 1087 | 
         
            +
                  "step": 15400
         
     | 
| 1088 | 
         
            +
                },
         
     | 
| 1089 | 
         
            +
                {
         
     | 
| 1090 | 
         
            +
                  "epoch": 0.02479996032006349,
         
     | 
| 1091 | 
         
            +
                  "grad_norm": 160.66392517089844,
         
     | 
| 1092 | 
         
            +
                  "learning_rate": 4.956400697588839e-06,
         
     | 
| 1093 | 
         
            +
                  "loss": 3.9412,
         
     | 
| 1094 | 
         
            +
                  "step": 15500
         
     | 
| 1095 | 
         
            +
                },
         
     | 
| 1096 | 
         
            +
                {
         
     | 
| 1097 | 
         
            +
                  "epoch": 0.024959960064063896,
         
     | 
| 1098 | 
         
            +
                  "grad_norm": 136.77218627929688,
         
     | 
| 1099 | 
         
            +
                  "learning_rate": 4.988400185597031e-06,
         
     | 
| 1100 | 
         
            +
                  "loss": 3.6668,
         
     | 
| 1101 | 
         
            +
                  "step": 15600
         
     | 
| 1102 | 
         
            +
                },
         
     | 
| 1103 | 
         
            +
                {
         
     | 
| 1104 | 
         
            +
                  "epoch": 0.025119959808064307,
         
     | 
| 1105 | 
         
            +
                  "grad_norm": 63.87991714477539,
         
     | 
| 1106 | 
         
            +
                  "learning_rate": 5.0200796787251404e-06,
         
     | 
| 1107 | 
         
            +
                  "loss": 3.7758,
         
     | 
| 1108 | 
         
            +
                  "step": 15700
         
     | 
| 1109 | 
         
            +
                },
         
     | 
| 1110 | 
         
            +
                {
         
     | 
| 1111 | 
         
            +
                  "epoch": 0.025279959552064718,
         
     | 
| 1112 | 
         
            +
                  "grad_norm": 352.977294921875,
         
     | 
| 1113 | 
         
            +
                  "learning_rate": 5.052079166733333e-06,
         
     | 
| 1114 | 
         
            +
                  "loss": 3.8805,
         
     | 
| 1115 | 
         
            +
                  "step": 15800
         
     | 
| 1116 | 
         
            +
                },
         
     | 
| 1117 | 
         
            +
                {
         
     | 
| 1118 | 
         
            +
                  "epoch": 0.025439959296065125,
         
     | 
| 1119 | 
         
            +
                  "grad_norm": 148.54776000976562,
         
     | 
| 1120 | 
         
            +
                  "learning_rate": 5.084078654741524e-06,
         
     | 
| 1121 | 
         
            +
                  "loss": 3.8848,
         
     | 
| 1122 | 
         
            +
                  "step": 15900
         
     | 
| 1123 | 
         
            +
                },
         
     | 
| 1124 | 
         
            +
                {
         
     | 
| 1125 | 
         
            +
                  "epoch": 0.025599959040065536,
         
     | 
| 1126 | 
         
            +
                  "grad_norm": 105.01113891601562,
         
     | 
| 1127 | 
         
            +
                  "learning_rate": 5.116078142749716e-06,
         
     | 
| 1128 | 
         
            +
                  "loss": 3.75,
         
     | 
| 1129 | 
         
            +
                  "step": 16000
         
     | 
| 1130 | 
         
            +
                },
         
     | 
| 1131 | 
         
            +
                {
         
     | 
| 1132 | 
         
            +
                  "epoch": 0.025759958784065947,
         
     | 
| 1133 | 
         
            +
                  "grad_norm": 170.62828063964844,
         
     | 
| 1134 | 
         
            +
                  "learning_rate": 5.148077630757908e-06,
         
     | 
| 1135 | 
         
            +
                  "loss": 3.5685,
         
     | 
| 1136 | 
         
            +
                  "step": 16100
         
     | 
| 1137 | 
         
            +
                },
         
     | 
| 1138 | 
         
            +
                {
         
     | 
| 1139 | 
         
            +
                  "epoch": 0.025919958528066354,
         
     | 
| 1140 | 
         
            +
                  "grad_norm": 164.85324096679688,
         
     | 
| 1141 | 
         
            +
                  "learning_rate": 5.180077118766101e-06,
         
     | 
| 1142 | 
         
            +
                  "loss": 3.7016,
         
     | 
| 1143 | 
         
            +
                  "step": 16200
         
     | 
| 1144 | 
         
            +
                },
         
     | 
| 1145 | 
         
            +
                {
         
     | 
| 1146 | 
         
            +
                  "epoch": 0.026079958272066765,
         
     | 
| 1147 | 
         
            +
                  "grad_norm": 79.85810852050781,
         
     | 
| 1148 | 
         
            +
                  "learning_rate": 5.212076606774292e-06,
         
     | 
| 1149 | 
         
            +
                  "loss": 4.0955,
         
     | 
| 1150 | 
         
            +
                  "step": 16300
         
     | 
| 1151 | 
         
            +
                },
         
     | 
| 1152 | 
         
            +
                {
         
     | 
| 1153 | 
         
            +
                  "epoch": 0.026239958016067173,
         
     | 
| 1154 | 
         
            +
                  "grad_norm": 109.73529815673828,
         
     | 
| 1155 | 
         
            +
                  "learning_rate": 5.244076094782484e-06,
         
     | 
| 1156 | 
         
            +
                  "loss": 3.7577,
         
     | 
| 1157 | 
         
            +
                  "step": 16400
         
     | 
| 1158 | 
         
            +
                },
         
     | 
| 1159 | 
         
            +
                {
         
     | 
| 1160 | 
         
            +
                  "epoch": 0.026399957760067583,
         
     | 
| 1161 | 
         
            +
                  "grad_norm": 105.98066711425781,
         
     | 
| 1162 | 
         
            +
                  "learning_rate": 5.276075582790676e-06,
         
     | 
| 1163 | 
         
            +
                  "loss": 3.7485,
         
     | 
| 1164 | 
         
            +
                  "step": 16500
         
     | 
| 1165 | 
         
            +
                },
         
     | 
| 1166 | 
         
            +
                {
         
     | 
| 1167 | 
         
            +
                  "epoch": 0.026559957504067994,
         
     | 
| 1168 | 
         
            +
                  "grad_norm": 71.02545166015625,
         
     | 
| 1169 | 
         
            +
                  "learning_rate": 5.3080750707988686e-06,
         
     | 
| 1170 | 
         
            +
                  "loss": 3.8263,
         
     | 
| 1171 | 
         
            +
                  "step": 16600
         
     | 
| 1172 | 
         
            +
                },
         
     | 
| 1173 | 
         
            +
                {
         
     | 
| 1174 | 
         
            +
                  "epoch": 0.0267199572480684,
         
     | 
| 1175 | 
         
            +
                  "grad_norm": 245.44224548339844,
         
     | 
| 1176 | 
         
            +
                  "learning_rate": 5.340074558807059e-06,
         
     | 
| 1177 | 
         
            +
                  "loss": 3.6922,
         
     | 
| 1178 | 
         
            +
                  "step": 16700
         
     | 
| 1179 | 
         
            +
                },
         
     | 
| 1180 | 
         
            +
                {
         
     | 
| 1181 | 
         
            +
                  "epoch": 0.026879956992068813,
         
     | 
| 1182 | 
         
            +
                  "grad_norm": 42.178157806396484,
         
     | 
| 1183 | 
         
            +
                  "learning_rate": 5.372074046815251e-06,
         
     | 
| 1184 | 
         
            +
                  "loss": 3.6568,
         
     | 
| 1185 | 
         
            +
                  "step": 16800
         
     | 
| 1186 | 
         
            +
                },
         
     | 
| 1187 | 
         
            +
                {
         
     | 
| 1188 | 
         
            +
                  "epoch": 0.027039956736069223,
         
     | 
| 1189 | 
         
            +
                  "grad_norm": 114.55894470214844,
         
     | 
| 1190 | 
         
            +
                  "learning_rate": 5.404073534823443e-06,
         
     | 
| 1191 | 
         
            +
                  "loss": 3.7317,
         
     | 
| 1192 | 
         
            +
                  "step": 16900
         
     | 
| 1193 | 
         
            +
                },
         
     | 
| 1194 | 
         
            +
                {
         
     | 
| 1195 | 
         
            +
                  "epoch": 0.02719995648006963,
         
     | 
| 1196 | 
         
            +
                  "grad_norm": 86.70626831054688,
         
     | 
| 1197 | 
         
            +
                  "learning_rate": 5.436073022831635e-06,
         
     | 
| 1198 | 
         
            +
                  "loss": 3.5089,
         
     | 
| 1199 | 
         
            +
                  "step": 17000
         
     | 
| 1200 | 
         
            +
                },
         
     | 
| 1201 | 
         
            +
                {
         
     | 
| 1202 | 
         
            +
                  "epoch": 0.02735995622407004,
         
     | 
| 1203 | 
         
            +
                  "grad_norm": 202.02505493164062,
         
     | 
| 1204 | 
         
            +
                  "learning_rate": 5.468072510839827e-06,
         
     | 
| 1205 | 
         
            +
                  "loss": 3.7377,
         
     | 
| 1206 | 
         
            +
                  "step": 17100
         
     | 
| 1207 | 
         
            +
                },
         
     | 
| 1208 | 
         
            +
                {
         
     | 
| 1209 | 
         
            +
                  "epoch": 0.027519955968070452,
         
     | 
| 1210 | 
         
            +
                  "grad_norm": 114.00701141357422,
         
     | 
| 1211 | 
         
            +
                  "learning_rate": 5.500071998848019e-06,
         
     | 
| 1212 | 
         
            +
                  "loss": 3.6206,
         
     | 
| 1213 | 
         
            +
                  "step": 17200
         
     | 
| 1214 | 
         
            +
                },
         
     | 
| 1215 | 
         
            +
                {
         
     | 
| 1216 | 
         
            +
                  "epoch": 0.02767995571207086,
         
     | 
| 1217 | 
         
            +
                  "grad_norm": 152.38311767578125,
         
     | 
| 1218 | 
         
            +
                  "learning_rate": 5.532071486856211e-06,
         
     | 
| 1219 | 
         
            +
                  "loss": 3.3702,
         
     | 
| 1220 | 
         
            +
                  "step": 17300
         
     | 
| 1221 | 
         
            +
                },
         
     | 
| 1222 | 
         
            +
                {
         
     | 
| 1223 | 
         
            +
                  "epoch": 0.02783995545607127,
         
     | 
| 1224 | 
         
            +
                  "grad_norm": 156.1048126220703,
         
     | 
| 1225 | 
         
            +
                  "learning_rate": 5.564070974864403e-06,
         
     | 
| 1226 | 
         
            +
                  "loss": 3.5126,
         
     | 
| 1227 | 
         
            +
                  "step": 17400
         
     | 
| 1228 | 
         
            +
                },
         
     | 
| 1229 | 
         
            +
                {
         
     | 
| 1230 | 
         
            +
                  "epoch": 0.02799995520007168,
         
     | 
| 1231 | 
         
            +
                  "grad_norm": 117.87386322021484,
         
     | 
| 1232 | 
         
            +
                  "learning_rate": 5.596070462872595e-06,
         
     | 
| 1233 | 
         
            +
                  "loss": 3.4841,
         
     | 
| 1234 | 
         
            +
                  "step": 17500
         
     | 
| 1235 | 
         
            +
                },
         
     | 
| 1236 | 
         
            +
                {
         
     | 
| 1237 | 
         
            +
                  "epoch": 0.02815995494407209,
         
     | 
| 1238 | 
         
            +
                  "grad_norm": 616.7991333007812,
         
     | 
| 1239 | 
         
            +
                  "learning_rate": 5.628069950880786e-06,
         
     | 
| 1240 | 
         
            +
                  "loss": 3.1464,
         
     | 
| 1241 | 
         
            +
                  "step": 17600
         
     | 
| 1242 | 
         
            +
                },
         
     | 
| 1243 | 
         
            +
                {
         
     | 
| 1244 | 
         
            +
                  "epoch": 0.0283199546880725,
         
     | 
| 1245 | 
         
            +
                  "grad_norm": 131.32760620117188,
         
     | 
| 1246 | 
         
            +
                  "learning_rate": 5.6600694388889786e-06,
         
     | 
| 1247 | 
         
            +
                  "loss": 3.7012,
         
     | 
| 1248 | 
         
            +
                  "step": 17700
         
     | 
| 1249 | 
         
            +
                },
         
     | 
| 1250 | 
         
            +
                {
         
     | 
| 1251 | 
         
            +
                  "epoch": 0.028479954432072907,
         
     | 
| 1252 | 
         
            +
                  "grad_norm": 60.172969818115234,
         
     | 
| 1253 | 
         
            +
                  "learning_rate": 5.69206892689717e-06,
         
     | 
| 1254 | 
         
            +
                  "loss": 3.5802,
         
     | 
| 1255 | 
         
            +
                  "step": 17800
         
     | 
| 1256 | 
         
            +
                },
         
     | 
| 1257 | 
         
            +
                {
         
     | 
| 1258 | 
         
            +
                  "epoch": 0.028639954176073318,
         
     | 
| 1259 | 
         
            +
                  "grad_norm": 169.24374389648438,
         
     | 
| 1260 | 
         
            +
                  "learning_rate": 5.724068414905361e-06,
         
     | 
| 1261 | 
         
            +
                  "loss": 3.4952,
         
     | 
| 1262 | 
         
            +
                  "step": 17900
         
     | 
| 1263 | 
         
            +
                },
         
     | 
| 1264 | 
         
            +
                {
         
     | 
| 1265 | 
         
            +
                  "epoch": 0.02879995392007373,
         
     | 
| 1266 | 
         
            +
                  "grad_norm": 158.77391052246094,
         
     | 
| 1267 | 
         
            +
                  "learning_rate": 5.7560679029135535e-06,
         
     | 
| 1268 | 
         
            +
                  "loss": 3.1174,
         
     | 
| 1269 | 
         
            +
                  "step": 18000
         
     | 
| 1270 | 
         
            +
                },
         
     | 
| 1271 | 
         
            +
                {
         
     | 
| 1272 | 
         
            +
                  "epoch": 0.028959953664074136,
         
     | 
| 1273 | 
         
            +
                  "grad_norm": 218.98867797851562,
         
     | 
| 1274 | 
         
            +
                  "learning_rate": 5.787747396041664e-06,
         
     | 
| 1275 | 
         
            +
                  "loss": 3.3134,
         
     | 
| 1276 | 
         
            +
                  "step": 18100
         
     | 
| 1277 | 
         
            +
                },
         
     | 
| 1278 | 
         
            +
                {
         
     | 
| 1279 | 
         
            +
                  "epoch": 0.029119953408074547,
         
     | 
| 1280 | 
         
            +
                  "grad_norm": 185.3249053955078,
         
     | 
| 1281 | 
         
            +
                  "learning_rate": 5.819746884049856e-06,
         
     | 
| 1282 | 
         
            +
                  "loss": 3.3578,
         
     | 
| 1283 | 
         
            +
                  "step": 18200
         
     | 
| 1284 | 
         
            +
                },
         
     | 
| 1285 | 
         
            +
                {
         
     | 
| 1286 | 
         
            +
                  "epoch": 0.029279953152074958,
         
     | 
| 1287 | 
         
            +
                  "grad_norm": 93.69242858886719,
         
     | 
| 1288 | 
         
            +
                  "learning_rate": 5.851746372058048e-06,
         
     | 
| 1289 | 
         
            +
                  "loss": 3.0209,
         
     | 
| 1290 | 
         
            +
                  "step": 18300
         
     | 
| 1291 | 
         
            +
                },
         
     | 
| 1292 | 
         
            +
                {
         
     | 
| 1293 | 
         
            +
                  "epoch": 0.029439952896075365,
         
     | 
| 1294 | 
         
            +
                  "grad_norm": 85.82784271240234,
         
     | 
| 1295 | 
         
            +
                  "learning_rate": 5.883745860066239e-06,
         
     | 
| 1296 | 
         
            +
                  "loss": 3.3796,
         
     | 
| 1297 | 
         
            +
                  "step": 18400
         
     | 
| 1298 | 
         
            +
                },
         
     | 
| 1299 | 
         
            +
                {
         
     | 
| 1300 | 
         
            +
                  "epoch": 0.029599952640075776,
         
     | 
| 1301 | 
         
            +
                  "grad_norm": 125.96697998046875,
         
     | 
| 1302 | 
         
            +
                  "learning_rate": 5.915745348074431e-06,
         
     | 
| 1303 | 
         
            +
                  "loss": 3.2287,
         
     | 
| 1304 | 
         
            +
                  "step": 18500
         
     | 
| 1305 | 
         
            +
                },
         
     | 
| 1306 | 
         
            +
                {
         
     | 
| 1307 | 
         
            +
                  "epoch": 0.029759952384076187,
         
     | 
| 1308 | 
         
            +
                  "grad_norm": 235.71075439453125,
         
     | 
| 1309 | 
         
            +
                  "learning_rate": 5.947744836082623e-06,
         
     | 
| 1310 | 
         
            +
                  "loss": 3.1537,
         
     | 
| 1311 | 
         
            +
                  "step": 18600
         
     | 
| 1312 | 
         
            +
                },
         
     | 
| 1313 | 
         
            +
                {
         
     | 
| 1314 | 
         
            +
                  "epoch": 0.029919952128076594,
         
     | 
| 1315 | 
         
            +
                  "grad_norm": 139.5558319091797,
         
     | 
| 1316 | 
         
            +
                  "learning_rate": 5.979744324090815e-06,
         
     | 
| 1317 | 
         
            +
                  "loss": 2.9073,
         
     | 
| 1318 | 
         
            +
                  "step": 18700
         
     | 
| 1319 | 
         
            +
                },
         
     | 
| 1320 | 
         
            +
                {
         
     | 
| 1321 | 
         
            +
                  "epoch": 0.030079951872077005,
         
     | 
| 1322 | 
         
            +
                  "grad_norm": 204.2928924560547,
         
     | 
| 1323 | 
         
            +
                  "learning_rate": 6.011743812099007e-06,
         
     | 
| 1324 | 
         
            +
                  "loss": 3.3444,
         
     | 
| 1325 | 
         
            +
                  "step": 18800
         
     | 
| 1326 | 
         
            +
                },
         
     | 
| 1327 | 
         
            +
                {
         
     | 
| 1328 | 
         
            +
                  "epoch": 0.030239951616077416,
         
     | 
| 1329 | 
         
            +
                  "grad_norm": 165.4457244873047,
         
     | 
| 1330 | 
         
            +
                  "learning_rate": 6.043743300107199e-06,
         
     | 
| 1331 | 
         
            +
                  "loss": 3.1341,
         
     | 
| 1332 | 
         
            +
                  "step": 18900
         
     | 
| 1333 | 
         
            +
                },
         
     | 
| 1334 | 
         
            +
                {
         
     | 
| 1335 | 
         
            +
                  "epoch": 0.030399951360077823,
         
     | 
| 1336 | 
         
            +
                  "grad_norm": 66.5983657836914,
         
     | 
| 1337 | 
         
            +
                  "learning_rate": 6.07574278811539e-06,
         
     | 
| 1338 | 
         
            +
                  "loss": 2.8862,
         
     | 
| 1339 | 
         
            +
                  "step": 19000
         
     | 
| 1340 | 
         
            +
                },
         
     | 
| 1341 | 
         
            +
                {
         
     | 
| 1342 | 
         
            +
                  "epoch": 0.030559951104078234,
         
     | 
| 1343 | 
         
            +
                  "grad_norm": 219.95774841308594,
         
     | 
| 1344 | 
         
            +
                  "learning_rate": 6.1077422761235826e-06,
         
     | 
| 1345 | 
         
            +
                  "loss": 3.2033,
         
     | 
| 1346 | 
         
            +
                  "step": 19100
         
     | 
| 1347 | 
         
            +
                },
         
     | 
| 1348 | 
         
            +
                {
         
     | 
| 1349 | 
         
            +
                  "epoch": 0.03071995084807864,
         
     | 
| 1350 | 
         
            +
                  "grad_norm": 125.15766906738281,
         
     | 
| 1351 | 
         
            +
                  "learning_rate": 6.139741764131775e-06,
         
     | 
| 1352 | 
         
            +
                  "loss": 3.2764,
         
     | 
| 1353 | 
         
            +
                  "step": 19200
         
     | 
| 1354 | 
         
            +
                },
         
     | 
| 1355 | 
         
            +
                {
         
     | 
| 1356 | 
         
            +
                  "epoch": 0.030879950592079052,
         
     | 
| 1357 | 
         
            +
                  "grad_norm": 207.95970153808594,
         
     | 
| 1358 | 
         
            +
                  "learning_rate": 6.171741252139967e-06,
         
     | 
| 1359 | 
         
            +
                  "loss": 3.0725,
         
     | 
| 1360 | 
         
            +
                  "step": 19300
         
     | 
| 1361 | 
         
            +
                },
         
     | 
| 1362 | 
         
            +
                {
         
     | 
| 1363 | 
         
            +
                  "epoch": 0.031039950336079463,
         
     | 
| 1364 | 
         
            +
                  "grad_norm": 368.32781982421875,
         
     | 
| 1365 | 
         
            +
                  "learning_rate": 6.203740740148158e-06,
         
     | 
| 1366 | 
         
            +
                  "loss": 3.0436,
         
     | 
| 1367 | 
         
            +
                  "step": 19400
         
     | 
| 1368 | 
         
            +
                },
         
     | 
| 1369 | 
         
            +
                {
         
     | 
| 1370 | 
         
            +
                  "epoch": 0.03119995008007987,
         
     | 
| 1371 | 
         
            +
                  "grad_norm": 412.2764587402344,
         
     | 
| 1372 | 
         
            +
                  "learning_rate": 6.23574022815635e-06,
         
     | 
| 1373 | 
         
            +
                  "loss": 3.3493,
         
     | 
| 1374 | 
         
            +
                  "step": 19500
         
     | 
| 1375 | 
         
            +
                },
         
     | 
| 1376 | 
         
            +
                {
         
     | 
| 1377 | 
         
            +
                  "epoch": 0.03135994982408028,
         
     | 
| 1378 | 
         
            +
                  "grad_norm": 155.46766662597656,
         
     | 
| 1379 | 
         
            +
                  "learning_rate": 6.267739716164542e-06,
         
     | 
| 1380 | 
         
            +
                  "loss": 3.0141,
         
     | 
| 1381 | 
         
            +
                  "step": 19600
         
     | 
| 1382 | 
         
            +
                },
         
     | 
| 1383 | 
         
            +
                {
         
     | 
| 1384 | 
         
            +
                  "epoch": 0.03151994956808069,
         
     | 
| 1385 | 
         
            +
                  "grad_norm": 89.32569885253906,
         
     | 
| 1386 | 
         
            +
                  "learning_rate": 6.299739204172733e-06,
         
     | 
| 1387 | 
         
            +
                  "loss": 2.779,
         
     | 
| 1388 | 
         
            +
                  "step": 19700
         
     | 
| 1389 | 
         
            +
                },
         
     | 
| 1390 | 
         
            +
                {
         
     | 
| 1391 | 
         
            +
                  "epoch": 0.0316799493120811,
         
     | 
| 1392 | 
         
            +
                  "grad_norm": 241.4378204345703,
         
     | 
| 1393 | 
         
            +
                  "learning_rate": 6.3317386921809254e-06,
         
     | 
| 1394 | 
         
            +
                  "loss": 3.3543,
         
     | 
| 1395 | 
         
            +
                  "step": 19800
         
     | 
| 1396 | 
         
            +
                },
         
     | 
| 1397 | 
         
            +
                {
         
     | 
| 1398 | 
         
            +
                  "epoch": 0.03183994905608151,
         
     | 
| 1399 | 
         
            +
                  "grad_norm": 13.20569133758545,
         
     | 
| 1400 | 
         
            +
                  "learning_rate": 6.363738180189118e-06,
         
     | 
| 1401 | 
         
            +
                  "loss": 3.1526,
         
     | 
| 1402 | 
         
            +
                  "step": 19900
         
     | 
| 1403 | 
         
            +
                },
         
     | 
| 1404 | 
         
            +
                {
         
     | 
| 1405 | 
         
            +
                  "epoch": 0.03199994880008192,
         
     | 
| 1406 | 
         
            +
                  "grad_norm": 270.6402893066406,
         
     | 
| 1407 | 
         
            +
                  "learning_rate": 6.395737668197309e-06,
         
     | 
| 1408 | 
         
            +
                  "loss": 2.7896,
         
     | 
| 1409 | 
         
            +
                  "step": 20000
         
     | 
| 1410 | 
         
            +
                },
         
     | 
| 1411 | 
         
            +
                {
         
     | 
| 1412 | 
         
            +
                  "epoch": 0.03215994854408233,
         
     | 
| 1413 | 
         
            +
                  "grad_norm": 106.38632202148438,
         
     | 
| 1414 | 
         
            +
                  "learning_rate": 6.427737156205501e-06,
         
     | 
| 1415 | 
         
            +
                  "loss": 2.9398,
         
     | 
| 1416 | 
         
            +
                  "step": 20100
         
     | 
| 1417 | 
         
            +
                },
         
     | 
| 1418 | 
         
            +
                {
         
     | 
| 1419 | 
         
            +
                  "epoch": 0.03231994828808274,
         
     | 
| 1420 | 
         
            +
                  "grad_norm": 191.7210693359375,
         
     | 
| 1421 | 
         
            +
                  "learning_rate": 6.459416649333611e-06,
         
     | 
| 1422 | 
         
            +
                  "loss": 3.1254,
         
     | 
| 1423 | 
         
            +
                  "step": 20200
         
     | 
| 1424 | 
         
            +
                },
         
     | 
| 1425 | 
         
            +
                {
         
     | 
| 1426 | 
         
            +
                  "epoch": 0.03247994803208315,
         
     | 
| 1427 | 
         
            +
                  "grad_norm": 143.96151733398438,
         
     | 
| 1428 | 
         
            +
                  "learning_rate": 6.491416137341803e-06,
         
     | 
| 1429 | 
         
            +
                  "loss": 2.8832,
         
     | 
| 1430 | 
         
            +
                  "step": 20300
         
     | 
| 1431 | 
         
            +
                },
         
     | 
| 1432 | 
         
            +
                {
         
     | 
| 1433 | 
         
            +
                  "epoch": 0.03263994777608356,
         
     | 
| 1434 | 
         
            +
                  "grad_norm": 150.26368713378906,
         
     | 
| 1435 | 
         
            +
                  "learning_rate": 6.523415625349994e-06,
         
     | 
| 1436 | 
         
            +
                  "loss": 3.0542,
         
     | 
| 1437 | 
         
            +
                  "step": 20400
         
     | 
| 1438 | 
         
            +
                },
         
     | 
| 1439 | 
         
            +
                {
         
     | 
| 1440 | 
         
            +
                  "epoch": 0.032799947520083965,
         
     | 
| 1441 | 
         
            +
                  "grad_norm": 178.11705017089844,
         
     | 
| 1442 | 
         
            +
                  "learning_rate": 6.5554151133581865e-06,
         
     | 
| 1443 | 
         
            +
                  "loss": 2.9722,
         
     | 
| 1444 | 
         
            +
                  "step": 20500
         
     | 
| 1445 | 
         
            +
                },
         
     | 
| 1446 | 
         
            +
                {
         
     | 
| 1447 | 
         
            +
                  "epoch": 0.032959947264084376,
         
     | 
| 1448 | 
         
            +
                  "grad_norm": 222.4794921875,
         
     | 
| 1449 | 
         
            +
                  "learning_rate": 6.587414601366379e-06,
         
     | 
| 1450 | 
         
            +
                  "loss": 2.9321,
         
     | 
| 1451 | 
         
            +
                  "step": 20600
         
     | 
| 1452 | 
         
            +
                },
         
     | 
| 1453 | 
         
            +
                {
         
     | 
| 1454 | 
         
            +
                  "epoch": 0.03311994700808479,
         
     | 
| 1455 | 
         
            +
                  "grad_norm": 155.37796020507812,
         
     | 
| 1456 | 
         
            +
                  "learning_rate": 6.619414089374571e-06,
         
     | 
| 1457 | 
         
            +
                  "loss": 2.6448,
         
     | 
| 1458 | 
         
            +
                  "step": 20700
         
     | 
| 1459 | 
         
            +
                },
         
     | 
| 1460 | 
         
            +
                {
         
     | 
| 1461 | 
         
            +
                  "epoch": 0.0332799467520852,
         
     | 
| 1462 | 
         
            +
                  "grad_norm": 155.5786590576172,
         
     | 
| 1463 | 
         
            +
                  "learning_rate": 6.651413577382762e-06,
         
     | 
| 1464 | 
         
            +
                  "loss": 3.4006,
         
     | 
| 1465 | 
         
            +
                  "step": 20800
         
     | 
| 1466 | 
         
            +
                },
         
     | 
| 1467 | 
         
            +
                {
         
     | 
| 1468 | 
         
            +
                  "epoch": 0.03343994649608561,
         
     | 
| 1469 | 
         
            +
                  "grad_norm": 684.525146484375,
         
     | 
| 1470 | 
         
            +
                  "learning_rate": 6.6834130653909545e-06,
         
     | 
| 1471 | 
         
            +
                  "loss": 3.0022,
         
     | 
| 1472 | 
         
            +
                  "step": 20900
         
     | 
| 1473 | 
         
            +
                },
         
     | 
| 1474 | 
         
            +
                {
         
     | 
| 1475 | 
         
            +
                  "epoch": 0.03359994624008602,
         
     | 
| 1476 | 
         
            +
                  "grad_norm": 545.5623168945312,
         
     | 
| 1477 | 
         
            +
                  "learning_rate": 6.715412553399147e-06,
         
     | 
| 1478 | 
         
            +
                  "loss": 2.6366,
         
     | 
| 1479 | 
         
            +
                  "step": 21000
         
     | 
| 1480 | 
         
            +
                },
         
     | 
| 1481 | 
         
            +
                {
         
     | 
| 1482 | 
         
            +
                  "epoch": 0.03375994598408642,
         
     | 
| 1483 | 
         
            +
                  "grad_norm": 292.9093017578125,
         
     | 
| 1484 | 
         
            +
                  "learning_rate": 6.747412041407339e-06,
         
     | 
| 1485 | 
         
            +
                  "loss": 3.0112,
         
     | 
| 1486 | 
         
            +
                  "step": 21100
         
     | 
| 1487 | 
         
            +
                },
         
     | 
| 1488 | 
         
            +
                {
         
     | 
| 1489 | 
         
            +
                  "epoch": 0.033919945728086834,
         
     | 
| 1490 | 
         
            +
                  "grad_norm": 2.531680107116699,
         
     | 
| 1491 | 
         
            +
                  "learning_rate": 6.7794115294155294e-06,
         
     | 
| 1492 | 
         
            +
                  "loss": 2.7856,
         
     | 
| 1493 | 
         
            +
                  "step": 21200
         
     | 
| 1494 | 
         
            +
                },
         
     | 
| 1495 | 
         
            +
                {
         
     | 
| 1496 | 
         
            +
                  "epoch": 0.034079945472087245,
         
     | 
| 1497 | 
         
            +
                  "grad_norm": 216.7860565185547,
         
     | 
| 1498 | 
         
            +
                  "learning_rate": 6.811411017423722e-06,
         
     | 
| 1499 | 
         
            +
                  "loss": 3.0967,
         
     | 
| 1500 | 
         
            +
                  "step": 21300
         
     | 
| 1501 | 
         
            +
                },
         
     | 
| 1502 | 
         
            +
                {
         
     | 
| 1503 | 
         
            +
                  "epoch": 0.034239945216087656,
         
     | 
| 1504 | 
         
            +
                  "grad_norm": 138.73028564453125,
         
     | 
| 1505 | 
         
            +
                  "learning_rate": 6.843410505431913e-06,
         
     | 
| 1506 | 
         
            +
                  "loss": 2.8754,
         
     | 
| 1507 | 
         
            +
                  "step": 21400
         
     | 
| 1508 | 
         
            +
                },
         
     | 
| 1509 | 
         
            +
                {
         
     | 
| 1510 | 
         
            +
                  "epoch": 0.034399944960088066,
         
     | 
| 1511 | 
         
            +
                  "grad_norm": 78.2362060546875,
         
     | 
| 1512 | 
         
            +
                  "learning_rate": 6.875409993440105e-06,
         
     | 
| 1513 | 
         
            +
                  "loss": 3.1269,
         
     | 
| 1514 | 
         
            +
                  "step": 21500
         
     | 
| 1515 | 
         
            +
                }
         
     | 
| 1516 | 
         
            +
              ],
         
     | 
| 1517 | 
         
            +
              "logging_steps": 100,
         
     | 
| 1518 | 
         
            +
              "max_steps": 625001,
         
     | 
| 1519 | 
         
            +
              "num_input_tokens_seen": 0,
         
     | 
| 1520 | 
         
            +
              "num_train_epochs": 1,
         
     | 
| 1521 | 
         
            +
              "save_steps": 500,
         
     | 
| 1522 | 
         
            +
              "stateful_callbacks": {
         
     | 
| 1523 | 
         
            +
                "TrainerControl": {
         
     | 
| 1524 | 
         
            +
                  "args": {
         
     | 
| 1525 | 
         
            +
                    "should_epoch_stop": false,
         
     | 
| 1526 | 
         
            +
                    "should_evaluate": false,
         
     | 
| 1527 | 
         
            +
                    "should_log": false,
         
     | 
| 1528 | 
         
            +
                    "should_save": true,
         
     | 
| 1529 | 
         
            +
                    "should_training_stop": false
         
     | 
| 1530 | 
         
            +
                  },
         
     | 
| 1531 | 
         
            +
                  "attributes": {}
         
     | 
| 1532 | 
         
            +
                }
         
     | 
| 1533 | 
         
            +
              },
         
     | 
| 1534 | 
         
            +
              "total_flos": 0.0,
         
     | 
| 1535 | 
         
            +
              "train_batch_size": 128,
         
     | 
| 1536 | 
         
            +
              "trial_name": null,
         
     | 
| 1537 | 
         
            +
              "trial_params": null
         
     | 
| 1538 | 
         
            +
            }
         
     | 
    	
        checkpoint-21500/training_args.bin
    ADDED
    
    | 
         @@ -0,0 +1,3 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:14eb8a69d6ba5b2bb8d2148b585526b13da2e45effc438997c1d2d513d64b838
         
     | 
| 3 | 
         
            +
            size 5496
         
     | 
    	
        checkpoint-21500/vocab.txt
    ADDED
    
    | 
         The diff for this file is too large to render. 
		See raw diff 
     | 
| 
         |