bobox's picture
Training in progress, step 718, checkpoint
24c4f80 verified
metadata
base_model: BXresearch/DeBERTa2-0.9B-ST-v2
datasets:
  - sentence-transformers/stsb
language:
  - en
library_name: sentence-transformers
metrics:
  - pearson_cosine
  - spearman_cosine
  - pearson_manhattan
  - spearman_manhattan
  - pearson_euclidean
  - spearman_euclidean
  - pearson_dot
  - spearman_dot
  - pearson_max
  - spearman_max
  - cosine_accuracy
  - cosine_accuracy_threshold
  - cosine_f1
  - cosine_f1_threshold
  - cosine_precision
  - cosine_recall
  - cosine_ap
  - dot_accuracy
  - dot_accuracy_threshold
  - dot_f1
  - dot_f1_threshold
  - dot_precision
  - dot_recall
  - dot_ap
  - manhattan_accuracy
  - manhattan_accuracy_threshold
  - manhattan_f1
  - manhattan_f1_threshold
  - manhattan_precision
  - manhattan_recall
  - manhattan_ap
  - euclidean_accuracy
  - euclidean_accuracy_threshold
  - euclidean_f1
  - euclidean_f1_threshold
  - euclidean_precision
  - euclidean_recall
  - euclidean_ap
  - max_accuracy
  - max_accuracy_threshold
  - max_f1
  - max_f1_threshold
  - max_precision
  - max_recall
  - max_ap
pipeline_tag: sentence-similarity
tags:
  - sentence-transformers
  - sentence-similarity
  - feature-extraction
  - generated_from_trainer
  - dataset_size:5749
  - loss:AnglELoss
widget:
  - source_sentence: Left side of a silver train engine.
    sentences:
      - A close-up of a black train engine.
      - Two boys are in midair jumping into an inground pool.
      - An older Asian couple poses with a newborn baby at the dinner table.
  - source_sentence: Four girls in swimsuits are playing volleyball at the beach.
    sentences:
      - A little girl is walking down a hallway.
      - The man is erasing the chalk board.
      - Four women in bikinis are playing volleyball on the beach.
  - source_sentence: A woman is cooking meat.
    sentences:
      - The dogs are alone in the forest.
      - A man is speaking.
      - A dog jumps through a hoop.
  - source_sentence: A person is folding a square paper piece.
    sentences:
      - A woman is carrying her baby.
      - A person folds a piece of paper.
      - A dog is trying to get through his dog door.
  - source_sentence: The boy is playing the piano.
    sentences:
      - The woman is pouring oil into the pan.
      - A small black and white dog is swimming in water.
      - Two brown dogs are playing with each other in the snow.
model-index:
  - name: SentenceTransformer based on BXresearch/DeBERTa2-0.9B-ST-v2
    results:
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts test
          type: sts-test
        metrics:
          - type: pearson_cosine
            value: 0.9166868414572735
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.9288517457757245
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.9280078114609798
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.9298778921764931
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.9284572778345226
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.9303868482609214
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.8999594910673557
            name: Pearson Dot
          - type: spearman_dot
            value: 0.9052810779226765
            name: Spearman Dot
          - type: pearson_max
            value: 0.9284572778345226
            name: Pearson Max
          - type: spearman_max
            value: 0.9303868482609214
            name: Spearman Max
      - task:
          type: binary-classification
          name: Binary Classification
        dataset:
          name: allNLI dev
          type: allNLI-dev
        metrics:
          - type: cosine_accuracy
            value: 0.7578125
            name: Cosine Accuracy
          - type: cosine_accuracy_threshold
            value: 0.7991553544998169
            name: Cosine Accuracy Threshold
          - type: cosine_f1
            value: 0.6228070175438596
            name: Cosine F1
          - type: cosine_f1_threshold
            value: 0.6627379059791565
            name: Cosine F1 Threshold
          - type: cosine_precision
            value: 0.48299319727891155
            name: Cosine Precision
          - type: cosine_recall
            value: 0.8765432098765432
            name: Cosine Recall
          - type: cosine_ap
            value: 0.5935429428248785
            name: Cosine Ap
          - type: dot_accuracy
            value: 0.75
            name: Dot Accuracy
          - type: dot_accuracy_threshold
            value: 843.883544921875
            name: Dot Accuracy Threshold
          - type: dot_f1
            value: 0.6079295154185022
            name: Dot F1
          - type: dot_f1_threshold
            value: 691.1240234375
            name: Dot F1 Threshold
          - type: dot_precision
            value: 0.4726027397260274
            name: Dot Precision
          - type: dot_recall
            value: 0.8518518518518519
            name: Dot Recall
          - type: dot_ap
            value: 0.5773443196804586
            name: Dot Ap
          - type: manhattan_accuracy
            value: 0.75390625
            name: Manhattan Accuracy
          - type: manhattan_accuracy_threshold
            value: 638.1295166015625
            name: Manhattan Accuracy Threshold
          - type: manhattan_f1
            value: 0.6244343891402715
            name: Manhattan F1
          - type: manhattan_f1_threshold
            value: 805.15966796875
            name: Manhattan F1 Threshold
          - type: manhattan_precision
            value: 0.4928571428571429
            name: Manhattan Precision
          - type: manhattan_recall
            value: 0.8518518518518519
            name: Manhattan Recall
          - type: manhattan_ap
            value: 0.5966657639990006
            name: Manhattan Ap
          - type: euclidean_accuracy
            value: 0.7578125
            name: Euclidean Accuracy
          - type: euclidean_accuracy_threshold
            value: 20.63375473022461
            name: Euclidean Accuracy Threshold
          - type: euclidean_f1
            value: 0.6272727272727272
            name: Euclidean F1
          - type: euclidean_f1_threshold
            value: 25.803028106689453
            name: Euclidean F1 Threshold
          - type: euclidean_precision
            value: 0.49640287769784175
            name: Euclidean Precision
          - type: euclidean_recall
            value: 0.8518518518518519
            name: Euclidean Recall
          - type: euclidean_ap
            value: 0.5937801624670859
            name: Euclidean Ap
          - type: max_accuracy
            value: 0.7578125
            name: Max Accuracy
          - type: max_accuracy_threshold
            value: 843.883544921875
            name: Max Accuracy Threshold
          - type: max_f1
            value: 0.6272727272727272
            name: Max F1
          - type: max_f1_threshold
            value: 805.15966796875
            name: Max F1 Threshold
          - type: max_precision
            value: 0.49640287769784175
            name: Max Precision
          - type: max_recall
            value: 0.8765432098765432
            name: Max Recall
          - type: max_ap
            value: 0.5966657639990006
            name: Max Ap
      - task:
          type: binary-classification
          name: Binary Classification
        dataset:
          name: Qnli dev
          type: Qnli-dev
        metrics:
          - type: cosine_accuracy
            value: 0.72265625
            name: Cosine Accuracy
          - type: cosine_accuracy_threshold
            value: 0.6946593523025513
            name: Cosine Accuracy Threshold
          - type: cosine_f1
            value: 0.7279693486590038
            name: Cosine F1
          - type: cosine_f1_threshold
            value: 0.6946593523025513
            name: Cosine F1 Threshold
          - type: cosine_precision
            value: 0.6884057971014492
            name: Cosine Precision
          - type: cosine_recall
            value: 0.7723577235772358
            name: Cosine Recall
          - type: cosine_ap
            value: 0.7556080779923782
            name: Cosine Ap
          - type: dot_accuracy
            value: 0.6953125
            name: Dot Accuracy
          - type: dot_accuracy_threshold
            value: 729.351806640625
            name: Dot Accuracy Threshold
          - type: dot_f1
            value: 0.7050847457627119
            name: Dot F1
          - type: dot_f1_threshold
            value: 687.4432983398438
            name: Dot F1 Threshold
          - type: dot_precision
            value: 0.6046511627906976
            name: Dot Precision
          - type: dot_recall
            value: 0.8455284552845529
            name: Dot Recall
          - type: dot_ap
            value: 0.6970235044462813
            name: Dot Ap
          - type: manhattan_accuracy
            value: 0.7265625
            name: Manhattan Accuracy
          - type: manhattan_accuracy_threshold
            value: 757.5811767578125
            name: Manhattan Accuracy Threshold
          - type: manhattan_f1
            value: 0.7307692307692308
            name: Manhattan F1
          - type: manhattan_f1_threshold
            value: 795.5022583007812
            name: Manhattan F1 Threshold
          - type: manhattan_precision
            value: 0.6934306569343066
            name: Manhattan Precision
          - type: manhattan_recall
            value: 0.7723577235772358
            name: Manhattan Recall
          - type: manhattan_ap
            value: 0.7716838426822531
            name: Manhattan Ap
          - type: euclidean_accuracy
            value: 0.73046875
            name: Euclidean Accuracy
          - type: euclidean_accuracy_threshold
            value: 25.439434051513672
            name: Euclidean Accuracy Threshold
          - type: euclidean_f1
            value: 0.7315175097276265
            name: Euclidean F1
          - type: euclidean_f1_threshold
            value: 25.521507263183594
            name: Euclidean F1 Threshold
          - type: euclidean_precision
            value: 0.7014925373134329
            name: Euclidean Precision
          - type: euclidean_recall
            value: 0.7642276422764228
            name: Euclidean Recall
          - type: euclidean_ap
            value: 0.7677200801958495
            name: Euclidean Ap
          - type: max_accuracy
            value: 0.73046875
            name: Max Accuracy
          - type: max_accuracy_threshold
            value: 757.5811767578125
            name: Max Accuracy Threshold
          - type: max_f1
            value: 0.7315175097276265
            name: Max F1
          - type: max_f1_threshold
            value: 795.5022583007812
            name: Max F1 Threshold
          - type: max_precision
            value: 0.7014925373134329
            name: Max Precision
          - type: max_recall
            value: 0.8455284552845529
            name: Max Recall
          - type: max_ap
            value: 0.7716838426822531
            name: Max Ap

SentenceTransformer based on BXresearch/DeBERTa2-0.9B-ST-v2

This is a sentence-transformers model finetuned from BXresearch/DeBERTa2-0.9B-ST-v2 on the sentence-transformers/stsb dataset. It maps sentences & paragraphs to a 1536-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.

Model Details

Model Description

Model Sources

Full Model Architecture

SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: DebertaV2Model 
  (1): Pooling({'word_embedding_dimension': 1536, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
)

Usage

Direct Usage (Sentence Transformers)

First install the Sentence Transformers library:

pip install -U sentence-transformers

Then you can load this model and run inference.

from sentence_transformers import SentenceTransformer

# Download from the 🤗 Hub
model = SentenceTransformer("bobox/DeBERTa2-0.9B-ST-stsb-checkpoints-tmp")
# Run inference
sentences = [
    'The boy is playing the piano.',
    'The woman is pouring oil into the pan.',
    'A small black and white dog is swimming in water.',
]
embeddings = model.encode(sentences)
print(embeddings.shape)
# [3, 1536]

# Get the similarity scores for the embeddings
similarities = model.similarity(embeddings, embeddings)
print(similarities.shape)
# [3, 3]

Evaluation

Metrics

Semantic Similarity

Metric Value
pearson_cosine 0.9167
spearman_cosine 0.9289
pearson_manhattan 0.928
spearman_manhattan 0.9299
pearson_euclidean 0.9285
spearman_euclidean 0.9304
pearson_dot 0.9
spearman_dot 0.9053
pearson_max 0.9285
spearman_max 0.9304

Binary Classification

Metric Value
cosine_accuracy 0.7578
cosine_accuracy_threshold 0.7992
cosine_f1 0.6228
cosine_f1_threshold 0.6627
cosine_precision 0.483
cosine_recall 0.8765
cosine_ap 0.5935
dot_accuracy 0.75
dot_accuracy_threshold 843.8835
dot_f1 0.6079
dot_f1_threshold 691.124
dot_precision 0.4726
dot_recall 0.8519
dot_ap 0.5773
manhattan_accuracy 0.7539
manhattan_accuracy_threshold 638.1295
manhattan_f1 0.6244
manhattan_f1_threshold 805.1597
manhattan_precision 0.4929
manhattan_recall 0.8519
manhattan_ap 0.5967
euclidean_accuracy 0.7578
euclidean_accuracy_threshold 20.6338
euclidean_f1 0.6273
euclidean_f1_threshold 25.803
euclidean_precision 0.4964
euclidean_recall 0.8519
euclidean_ap 0.5938
max_accuracy 0.7578
max_accuracy_threshold 843.8835
max_f1 0.6273
max_f1_threshold 805.1597
max_precision 0.4964
max_recall 0.8765
max_ap 0.5967

Binary Classification

Metric Value
cosine_accuracy 0.7227
cosine_accuracy_threshold 0.6947
cosine_f1 0.728
cosine_f1_threshold 0.6947
cosine_precision 0.6884
cosine_recall 0.7724
cosine_ap 0.7556
dot_accuracy 0.6953
dot_accuracy_threshold 729.3518
dot_f1 0.7051
dot_f1_threshold 687.4433
dot_precision 0.6047
dot_recall 0.8455
dot_ap 0.697
manhattan_accuracy 0.7266
manhattan_accuracy_threshold 757.5812
manhattan_f1 0.7308
manhattan_f1_threshold 795.5023
manhattan_precision 0.6934
manhattan_recall 0.7724
manhattan_ap 0.7717
euclidean_accuracy 0.7305
euclidean_accuracy_threshold 25.4394
euclidean_f1 0.7315
euclidean_f1_threshold 25.5215
euclidean_precision 0.7015
euclidean_recall 0.7642
euclidean_ap 0.7677
max_accuracy 0.7305
max_accuracy_threshold 757.5812
max_f1 0.7315
max_f1_threshold 795.5023
max_precision 0.7015
max_recall 0.8455
max_ap 0.7717

Training Details

Training Dataset

sentence-transformers/stsb

  • Dataset: sentence-transformers/stsb at ab7a5ac
  • Size: 5,749 training samples
  • Columns: sentence1, sentence2, and score
  • Approximate statistics based on the first 1000 samples:
    sentence1 sentence2 score
    type string string float
    details
    • min: 6 tokens
    • mean: 9.81 tokens
    • max: 27 tokens
    • min: 5 tokens
    • mean: 9.74 tokens
    • max: 25 tokens
    • min: 0.0
    • mean: 0.54
    • max: 1.0
  • Samples:
    sentence1 sentence2 score
    A plane is taking off. An air plane is taking off. 1.0
    A man is playing a large flute. A man is playing a flute. 0.76
    A man is spreading shreded cheese on a pizza. A man is spreading shredded cheese on an uncooked pizza. 0.76
  • Loss: AnglELoss with these parameters:
    {
        "scale": 20.0,
        "similarity_fct": "pairwise_angle_sim"
    }
    

Evaluation Dataset

sentence-transformers/stsb

  • Dataset: sentence-transformers/stsb at ab7a5ac
  • Size: 512 evaluation samples
  • Columns: sentence1, sentence2, and score
  • Approximate statistics based on the first 1000 samples:
    sentence1 sentence2 score
    type string string float
    details
    • min: 6 tokens
    • mean: 11.16 tokens
    • max: 26 tokens
    • min: 6 tokens
    • mean: 11.17 tokens
    • max: 23 tokens
    • min: 0.0
    • mean: 0.47
    • max: 1.0
  • Samples:
    sentence1 sentence2 score
    A man with a hard hat is dancing. A man wearing a hard hat is dancing. 1.0
    A young child is riding a horse. A child is riding a horse. 0.95
    A man is feeding a mouse to a snake. The man is feeding a mouse to the snake. 1.0
  • Loss: AnglELoss with these parameters:
    {
        "scale": 20.0,
        "similarity_fct": "pairwise_angle_sim"
    }
    

Training Hyperparameters

Non-Default Hyperparameters

  • eval_strategy: steps
  • per_device_eval_batch_size: 256
  • gradient_accumulation_steps: 2
  • learning_rate: 1.5e-05
  • weight_decay: 5e-05
  • num_train_epochs: 2
  • lr_scheduler_type: cosine_with_min_lr
  • lr_scheduler_kwargs: {'num_cycles': 0.5, 'min_lr': 2e-06}
  • warmup_ratio: 0.2
  • save_safetensors: False
  • fp16: True
  • push_to_hub: True
  • hub_model_id: bobox/DeBERTa2-0.9B-ST-stsb-checkpoints-tmp
  • hub_strategy: all_checkpoints
  • batch_sampler: no_duplicates

All Hyperparameters

Click to expand
  • overwrite_output_dir: False
  • do_predict: False
  • eval_strategy: steps
  • prediction_loss_only: True
  • per_device_train_batch_size: 8
  • per_device_eval_batch_size: 256
  • per_gpu_train_batch_size: None
  • per_gpu_eval_batch_size: None
  • gradient_accumulation_steps: 2
  • eval_accumulation_steps: None
  • learning_rate: 1.5e-05
  • weight_decay: 5e-05
  • adam_beta1: 0.9
  • adam_beta2: 0.999
  • adam_epsilon: 1e-08
  • max_grad_norm: 1.0
  • num_train_epochs: 2
  • max_steps: -1
  • lr_scheduler_type: cosine_with_min_lr
  • lr_scheduler_kwargs: {'num_cycles': 0.5, 'min_lr': 2e-06}
  • warmup_ratio: 0.2
  • warmup_steps: 0
  • log_level: passive
  • log_level_replica: warning
  • log_on_each_node: True
  • logging_nan_inf_filter: True
  • save_safetensors: False
  • save_on_each_node: False
  • save_only_model: False
  • restore_callback_states_from_checkpoint: False
  • no_cuda: False
  • use_cpu: False
  • use_mps_device: False
  • seed: 42
  • data_seed: None
  • jit_mode_eval: False
  • use_ipex: False
  • bf16: False
  • fp16: True
  • fp16_opt_level: O1
  • half_precision_backend: auto
  • bf16_full_eval: False
  • fp16_full_eval: False
  • tf32: None
  • local_rank: 0
  • ddp_backend: None
  • tpu_num_cores: None
  • tpu_metrics_debug: False
  • debug: []
  • dataloader_drop_last: False
  • dataloader_num_workers: 0
  • dataloader_prefetch_factor: None
  • past_index: -1
  • disable_tqdm: False
  • remove_unused_columns: True
  • label_names: None
  • load_best_model_at_end: False
  • ignore_data_skip: False
  • fsdp: []
  • fsdp_min_num_params: 0
  • fsdp_config: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
  • fsdp_transformer_layer_cls_to_wrap: None
  • accelerator_config: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
  • deepspeed: None
  • label_smoothing_factor: 0.0
  • optim: adamw_torch
  • optim_args: None
  • adafactor: False
  • group_by_length: False
  • length_column_name: length
  • ddp_find_unused_parameters: None
  • ddp_bucket_cap_mb: None
  • ddp_broadcast_buffers: False
  • dataloader_pin_memory: True
  • dataloader_persistent_workers: False
  • skip_memory_metrics: True
  • use_legacy_prediction_loop: False
  • push_to_hub: True
  • resume_from_checkpoint: None
  • hub_model_id: bobox/DeBERTa2-0.9B-ST-stsb-checkpoints-tmp
  • hub_strategy: all_checkpoints
  • hub_private_repo: False
  • hub_always_push: False
  • gradient_checkpointing: False
  • gradient_checkpointing_kwargs: None
  • include_inputs_for_metrics: False
  • eval_do_concat_batches: True
  • fp16_backend: auto
  • push_to_hub_model_id: None
  • push_to_hub_organization: None
  • mp_parameters:
  • auto_find_batch_size: False
  • full_determinism: False
  • torchdynamo: None
  • ray_scope: last
  • ddp_timeout: 1800
  • torch_compile: False
  • torch_compile_backend: None
  • torch_compile_mode: None
  • dispatch_batches: None
  • split_batches: None
  • include_tokens_per_second: False
  • include_num_input_tokens_seen: False
  • neftune_noise_alpha: None
  • optim_target_modules: None
  • batch_eval_metrics: False
  • eval_on_start: False
  • batch_sampler: no_duplicates
  • multi_dataset_batch_sampler: proportional

Training Logs

Click to expand
Epoch Step Training Loss loss Qnli-dev_max_ap allNLI-dev_max_ap sts-test_spearman_cosine
0.0056 2 2.6549 - - - -
0.0111 4 2.7355 - - - -
0.0167 6 3.6211 - - - -
0.0223 8 3.0686 - - - -
0.0278 10 3.4113 - - - -
0.0334 12 2.4857 - - - -
0.0389 14 2.4288 - - - -
0.0445 16 2.6203 - - - -
0.0501 18 2.7441 - - - -
0.0556 20 3.4263 - - - -
0.0612 22 2.3565 - - - -
0.0668 24 2.5596 - - - -
0.0723 26 3.0866 - - - -
0.0779 28 3.223 - - - -
0.0834 30 2.012 - - - -
0.0890 32 3.2829 - - - -
0.0946 34 3.9277 - - - -
0.1001 36 2.785 2.6652 0.7960 0.6275 0.9294
0.1057 38 3.4966 - - - -
0.1113 40 2.5923 - - - -
0.1168 42 3.4418 - - - -
0.1224 44 2.6519 - - - -
0.1280 46 3.7746 - - - -
0.1335 48 2.6736 - - - -
0.1391 50 3.6764 - - - -
0.1446 52 3.5311 - - - -
0.1502 54 2.5869 - - - -
0.1558 56 3.183 - - - -
0.1613 58 2.747 - - - -
0.1669 60 1.965 - - - -
0.1725 62 2.1785 - - - -
0.1780 64 2.5788 - - - -
0.1836 66 3.1776 - - - -
0.1892 68 2.6464 - - - -
0.1947 70 2.7619 - - - -
0.2003 72 3.0911 2.6171 0.7923 0.6295 0.9276
0.2058 74 2.4308 - - - -
0.2114 76 3.2068 - - - -
0.2170 78 2.4081 - - - -
0.2225 80 2.3257 - - - -
0.2281 82 3.0499 - - - -
0.2337 84 3.2518 - - - -
0.2392 86 2.7876 - - - -
0.2448 88 2.7898 - - - -
0.2503 90 2.7116 - - - -
0.2559 92 3.0505 - - - -
0.2615 94 2.5901 - - - -
0.2670 96 1.9563 - - - -
0.2726 98 2.1006 - - - -
0.2782 100 2.1853 - - - -
0.2837 102 2.327 - - - -
0.2893 104 1.9937 - - - -
0.2949 106 2.543 - - - -
0.3004 108 1.9826 2.4596 0.7919 0.6329 0.9341
0.3060 110 3.0746 - - - -
0.3115 112 2.4145 - - - -
0.3171 114 2.244 - - - -
0.3227 116 2.78 - - - -
0.3282 118 2.8323 - - - -
0.3338 120 2.4639 - - - -
0.3394 122 2.9216 - - - -
0.3449 124 2.0747 - - - -
0.3505 126 2.7573 - - - -
0.3561 128 3.7019 - - - -
0.3616 130 3.3155 - - - -
0.3672 132 3.625 - - - -
0.3727 134 3.2889 - - - -
0.3783 136 3.5936 - - - -
0.3839 138 3.5932 - - - -
0.3894 140 3.0457 - - - -
0.3950 142 3.093 - - - -
0.4006 144 2.7189 2.4698 0.7752 0.5896 0.9346
0.4061 146 3.2578 - - - -
0.4117 148 3.3581 - - - -
0.4172 150 2.9734 - - - -
0.4228 152 3.0514 - - - -
0.4284 154 3.1983 - - - -
0.4339 156 2.9033 - - - -
0.4395 158 2.9279 - - - -
0.4451 160 3.1336 - - - -
0.4506 162 3.1467 - - - -
0.4562 164 3.0381 - - - -
0.4618 166 3.068 - - - -
0.4673 168 3.0261 - - - -
0.4729 170 3.2867 - - - -
0.4784 172 2.8474 - - - -
0.4840 174 2.7982 - - - -
0.4896 176 2.7945 - - - -
0.4951 178 3.1312 - - - -
0.5007 180 2.9704 2.4640 0.7524 0.6033 0.9242
0.5063 182 2.9856 - - - -
0.5118 184 3.014 - - - -
0.5174 186 3.0125 - - - -
0.5229 188 2.8149 - - - -
0.5285 190 2.7954 - - - -
0.5341 192 3.078 - - - -
0.5396 194 2.955 - - - -
0.5452 196 2.9468 - - - -
0.5508 198 3.0791 - - - -
0.5563 200 2.998 - - - -
0.5619 202 2.9068 - - - -
0.5675 204 2.8283 - - - -
0.5730 206 2.9216 - - - -
0.5786 208 3.3441 - - - -
0.5841 210 3.0 - - - -
0.5897 212 2.9023 - - - -
0.5953 214 2.8177 - - - -
0.6008 216 2.8262 2.4979 0.7899 0.6037 0.9260
0.6064 218 2.7832 - - - -
0.6120 220 3.0085 - - - -
0.6175 222 2.8762 - - - -
0.6231 224 3.147 - - - -
0.6287 226 3.4262 - - - -
0.6342 228 2.8271 - - - -
0.6398 230 2.4024 - - - -
0.6453 232 2.7556 - - - -
0.6509 234 3.4652 - - - -
0.6565 236 2.7235 - - - -
0.6620 238 2.6498 - - - -
0.6676 240 3.0933 - - - -
0.6732 242 3.1193 - - - -
0.6787 244 2.7249 - - - -
0.6843 246 2.8931 - - - -
0.6898 248 2.7913 - - - -
0.6954 250 2.6933 - - - -
0.7010 252 2.5632 2.4585 0.7700 0.6065 0.9298
0.7065 254 2.8347 - - - -
0.7121 256 2.3827 - - - -
0.7177 258 2.9065 - - - -
0.7232 260 2.8162 - - - -
0.7288 262 2.5485 - - - -
0.7344 264 2.5751 - - - -
0.7399 266 2.9056 - - - -
0.7455 268 3.1397 - - - -
0.7510 270 3.3107 - - - -
0.7566 272 2.9024 - - - -
0.7622 274 2.2307 - - - -
0.7677 276 3.0097 - - - -
0.7733 278 3.1406 - - - -
0.7789 280 2.6786 - - - -
0.7844 282 2.8882 - - - -
0.7900 284 2.7215 - - - -
0.7955 286 3.4188 - - - -
0.8011 288 2.9901 2.4414 0.7665 0.6023 0.9288
0.8067 290 2.5144 - - - -
0.8122 292 3.1932 - - - -
0.8178 294 2.9733 - - - -
0.8234 296 2.6895 - - - -
0.8289 298 2.678 - - - -
0.8345 300 2.5462 - - - -
0.8401 302 2.6911 - - - -
0.8456 304 2.8404 - - - -
0.8512 306 2.5358 - - - -
0.8567 308 3.1245 - - - -
0.8623 310 2.3404 - - - -
0.8679 312 3.0751 - - - -
0.8734 314 2.7005 - - - -
0.8790 316 2.7387 - - - -
0.8846 318 2.7227 - - - -
0.8901 320 2.9085 - - - -
0.8957 322 3.3239 - - - -
0.9013 324 2.4256 2.4106 0.7644 0.6087 0.9304
0.9068 326 2.5059 - - - -
0.9124 328 2.5387 - - - -
0.9179 330 2.899 - - - -
0.9235 332 2.7256 - - - -
0.9291 334 2.4862 - - - -
0.9346 336 3.0014 - - - -
0.9402 338 2.4164 - - - -
0.9458 340 2.3148 - - - -
0.9513 342 2.9414 - - - -
0.9569 344 2.4435 - - - -
0.9624 346 2.6286 - - - -
0.9680 348 2.1744 - - - -
0.9736 350 2.5866 - - - -
0.9791 352 2.8333 - - - -
0.9847 354 2.3544 - - - -
0.9903 356 2.5397 - - - -
0.9958 358 3.4058 - - - -
1.0014 360 2.2904 2.4089 0.7888 0.6104 0.9338
1.0070 362 2.7925 - - - -
1.0125 364 2.6415 - - - -
1.0181 366 2.724 - - - -
1.0236 368 2.569 - - - -
1.0292 370 2.808 - - - -
1.0348 372 2.4672 - - - -
1.0403 374 2.3964 - - - -
1.0459 376 2.3518 - - - -
1.0515 378 2.7617 - - - -
1.0570 380 2.5651 - - - -
1.0626 382 2.2623 - - - -
1.0682 384 2.2048 - - - -
1.0737 386 2.1426 - - - -
1.0793 388 1.8182 - - - -
1.0848 390 2.3166 - - - -
1.0904 392 2.4101 - - - -
1.0960 394 2.8932 - - - -
1.1015 396 3.0201 2.4217 0.7851 0.6205 0.9301
1.1071 398 2.6101 - - - -
1.1127 400 2.3627 - - - -
1.1182 402 2.5402 - - - -
1.1238 404 2.695 - - - -
1.1293 406 3.0563 - - - -
1.1349 408 2.2296 - - - -
1.1405 410 3.057 - - - -
1.1460 412 2.8023 - - - -
1.1516 414 2.6492 - - - -
1.1572 416 2.2406 - - - -
1.1627 418 1.7195 - - - -
1.1683 420 2.2773 - - - -
1.1739 422 2.3639 - - - -
1.1794 424 2.3348 - - - -
1.1850 426 2.6791 - - - -
1.1905 428 2.3621 - - - -
1.1961 430 2.5224 - - - -
1.2017 432 2.4063 2.4724 0.7628 0.6043 0.9270
1.2072 434 1.9713 - - - -
1.2128 436 2.4265 - - - -
1.2184 438 2.0827 - - - -
1.2239 440 2.0696 - - - -
1.2295 442 2.7507 - - - -
1.2350 444 2.5436 - - - -
1.2406 446 2.4039 - - - -
1.2462 448 2.4229 - - - -
1.2517 450 2.323 - - - -
1.2573 452 2.6099 - - - -
1.2629 454 2.0329 - - - -
1.2684 456 1.8797 - - - -
1.2740 458 1.4485 - - - -
1.2796 460 1.6794 - - - -
1.2851 462 2.0934 - - - -
1.2907 464 1.9579 - - - -
1.2962 466 1.9288 - - - -
1.3018 468 1.5874 2.5056 0.7833 0.5948 0.9345
1.3074 470 1.8715 - - - -
1.3129 472 1.3778 - - - -
1.3185 474 2.2242 - - - -
1.3241 476 2.4031 - - - -
1.3296 478 1.924 - - - -
1.3352 480 1.7895 - - - -
1.3408 482 2.0349 - - - -
1.3463 484 1.8116 - - - -
1.3519 486 2.353 - - - -
1.3574 488 3.4263 - - - -
1.3630 490 4.0606 - - - -
1.3686 492 2.7423 - - - -
1.3741 494 2.8461 - - - -
1.3797 496 3.0742 - - - -
1.3853 498 2.2054 - - - -
1.3908 500 2.6009 - - - -
1.3964 502 2.242 - - - -
1.4019 504 2.9416 2.5288 0.7969 0.6010 0.9323
1.4075 506 3.8179 - - - -
1.4131 508 3.0147 - - - -
1.4186 510 2.2185 - - - -
1.4242 512 3.0323 - - - -
1.4298 514 2.6922 - - - -
1.4353 516 2.6219 - - - -
1.4409 518 2.4365 - - - -
1.4465 520 3.1643 - - - -
1.4520 522 2.5548 - - - -
1.4576 524 2.3798 - - - -
1.4631 526 2.6361 - - - -
1.4687 528 2.6859 - - - -
1.4743 530 2.6071 - - - -
1.4798 532 2.2565 - - - -
1.4854 534 2.2415 - - - -
1.4910 536 2.4591 - - - -
1.4965 538 2.6729 - - - -
1.5021 540 2.3898 2.5025 0.7881 0.5978 0.9300
1.5076 542 2.4614 - - - -
1.5132 544 2.5447 - - - -
1.5188 546 2.502 - - - -
1.5243 548 2.1892 - - - -
1.5299 550 2.7081 - - - -
1.5355 552 2.5523 - - - -
1.5410 554 2.3571 - - - -
1.5466 556 2.7694 - - - -
1.5522 558 2.2 - - - -
1.5577 560 2.4179 - - - -
1.5633 562 2.3914 - - - -
1.5688 564 2.1722 - - - -
1.5744 566 2.345 - - - -
1.5800 568 3.0069 - - - -
1.5855 570 2.4231 - - - -
1.5911 572 2.3597 - - - -
1.5967 574 2.143 - - - -
1.6022 576 2.6288 2.5368 0.7943 0.6048 0.9265
1.6078 578 2.3905 - - - -
1.6134 580 2.1823 - - - -
1.6189 582 2.367 - - - -
1.6245 584 2.8189 - - - -
1.6300 586 2.6536 - - - -
1.6356 588 2.2134 - - - -
1.6412 590 1.6949 - - - -
1.6467 592 2.2029 - - - -
1.6523 594 3.0223 - - - -
1.6579 596 2.239 - - - -
1.6634 598 2.3388 - - - -
1.6690 600 2.3066 - - - -
1.6745 602 2.4762 - - - -
1.6801 604 1.9503 - - - -
1.6857 606 2.1252 - - - -
1.6912 608 1.8253 - - - -
1.6968 610 2.2938 - - - -
1.7024 612 1.9489 2.5747 0.7675 0.5964 0.9267
1.7079 614 1.9238 - - - -
1.7135 616 1.8171 - - - -
1.7191 618 2.2371 - - - -
1.7246 620 2.4901 - - - -
1.7302 622 1.8503 - - - -
1.7357 624 2.017 - - - -
1.7413 626 2.3069 - - - -
1.7469 628 2.444 - - - -
1.7524 630 1.9606 - - - -
1.7580 632 2.2364 - - - -
1.7636 634 1.8711 - - - -
1.7691 636 2.4233 - - - -
1.7747 638 2.4065 - - - -
1.7803 640 2.0725 - - - -
1.7858 642 2.0578 - - - -
1.7914 644 2.2066 - - - -
1.7969 646 1.7767 - - - -
1.8025 648 2.7388 2.5685 0.7663 0.5959 0.9292
1.8081 650 1.854 - - - -
1.8136 652 2.7337 - - - -
1.8192 654 2.4477 - - - -
1.8248 656 2.4818 - - - -
1.8303 658 1.8592 - - - -
1.8359 660 1.8396 - - - -
1.8414 662 2.3893 - - - -
1.8470 664 2.0139 - - - -
1.8526 666 2.8837 - - - -
1.8581 668 2.0342 - - - -
1.8637 670 1.8857 - - - -
1.8693 672 2.1147 - - - -
1.8748 674 1.6263 - - - -
1.8804 676 2.2987 - - - -
1.8860 678 1.9678 - - - -
1.8915 680 1.9999 - - - -
1.8971 682 2.2802 - - - -
1.9026 684 1.9666 2.5536 0.7717 0.5967 0.9289
1.9082 686 1.8156 - - - -
1.9138 688 1.9542 - - - -
1.9193 690 1.859 - - - -
1.9249 692 1.6237 - - - -
1.9305 694 2.3085 - - - -
1.9360 696 2.1461 - - - -
1.9416 698 1.7024 - - - -
1.9471 700 2.2181 - - - -
1.9527 702 2.4782 - - - -
1.9583 704 1.7378 - - - -
1.9638 706 2.0422 - - - -
1.9694 708 1.7577 - - - -
1.9750 710 2.0209 - - - -
1.9805 712 2.0372 - - - -
1.9861 714 2.0915 - - - -
1.9917 716 1.603 - - - -
1.9972 718 1.7111 - - - -

Framework Versions

  • Python: 3.10.12
  • Sentence Transformers: 3.0.1
  • Transformers: 4.42.4
  • PyTorch: 2.4.0+cu121
  • Accelerate: 0.32.1
  • Datasets: 2.21.0
  • Tokenizers: 0.19.1

Citation

BibTeX

Sentence Transformers

@inproceedings{reimers-2019-sentence-bert,
    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
    author = "Reimers, Nils and Gurevych, Iryna",
    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
    month = "11",
    year = "2019",
    publisher = "Association for Computational Linguistics",
    url = "https://arxiv.org/abs/1908.10084",
}

AnglELoss

@misc{li2023angleoptimized,
    title={AnglE-optimized Text Embeddings}, 
    author={Xianming Li and Jing Li},
    year={2023},
    eprint={2309.12871},
    archivePrefix={arXiv},
    primaryClass={cs.CL}
}