Training in progress, step 718, checkpoint

24c4f80 verified about 1 year ago

66.8 kB

metadata

base_model: BXresearch/DeBERTa2-0.9B-ST-v2
datasets:
  - sentence-transformers/stsb
language:
  - en
library_name: sentence-transformers
metrics:
  - pearson_cosine
  - spearman_cosine
  - pearson_manhattan
  - spearman_manhattan
  - pearson_euclidean
  - spearman_euclidean
  - pearson_dot
  - spearman_dot
  - pearson_max
  - spearman_max
  - cosine_accuracy
  - cosine_accuracy_threshold
  - cosine_f1
  - cosine_f1_threshold
  - cosine_precision
  - cosine_recall
  - cosine_ap
  - dot_accuracy
  - dot_accuracy_threshold
  - dot_f1
  - dot_f1_threshold
  - dot_precision
  - dot_recall
  - dot_ap
  - manhattan_accuracy
  - manhattan_accuracy_threshold
  - manhattan_f1
  - manhattan_f1_threshold
  - manhattan_precision
  - manhattan_recall
  - manhattan_ap
  - euclidean_accuracy
  - euclidean_accuracy_threshold
  - euclidean_f1
  - euclidean_f1_threshold
  - euclidean_precision
  - euclidean_recall
  - euclidean_ap
  - max_accuracy
  - max_accuracy_threshold
  - max_f1
  - max_f1_threshold
  - max_precision
  - max_recall
  - max_ap
pipeline_tag: sentence-similarity
tags:
  - sentence-transformers
  - sentence-similarity
  - feature-extraction
  - generated_from_trainer
  - dataset_size:5749
  - loss:AnglELoss
widget:
  - source_sentence: Left side of a silver train engine.
    sentences:
      - A close-up of a black train engine.
      - Two boys are in midair jumping into an inground pool.
      - An older Asian couple poses with a newborn baby at the dinner table.
  - source_sentence: Four girls in swimsuits are playing volleyball at the beach.
    sentences:
      - A little girl is walking down a hallway.
      - The man is erasing the chalk board.
      - Four women in bikinis are playing volleyball on the beach.
  - source_sentence: A woman is cooking meat.
    sentences:
      - The dogs are alone in the forest.
      - A man is speaking.
      - A dog jumps through a hoop.
  - source_sentence: A person is folding a square paper piece.
    sentences:
      - A woman is carrying her baby.
      - A person folds a piece of paper.
      - A dog is trying to get through his dog door.
  - source_sentence: The boy is playing the piano.
    sentences:
      - The woman is pouring oil into the pan.
      - A small black and white dog is swimming in water.
      - Two brown dogs are playing with each other in the snow.
model-index:
  - name: SentenceTransformer based on BXresearch/DeBERTa2-0.9B-ST-v2
    results:
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts test
          type: sts-test
        metrics:
          - type: pearson_cosine
            value: 0.9166868414572735
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.9288517457757245
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.9280078114609798
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.9298778921764931
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.9284572778345226
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.9303868482609214
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.8999594910673557
            name: Pearson Dot
          - type: spearman_dot
            value: 0.9052810779226765
            name: Spearman Dot
          - type: pearson_max
            value: 0.9284572778345226
            name: Pearson Max
          - type: spearman_max
            value: 0.9303868482609214
            name: Spearman Max
      - task:
          type: binary-classification
          name: Binary Classification
        dataset:
          name: allNLI dev
          type: allNLI-dev
        metrics:
          - type: cosine_accuracy
            value: 0.7578125
            name: Cosine Accuracy
          - type: cosine_accuracy_threshold
            value: 0.7991553544998169
            name: Cosine Accuracy Threshold
          - type: cosine_f1
            value: 0.6228070175438596
            name: Cosine F1
          - type: cosine_f1_threshold
            value: 0.6627379059791565
            name: Cosine F1 Threshold
          - type: cosine_precision
            value: 0.48299319727891155
            name: Cosine Precision
          - type: cosine_recall
            value: 0.8765432098765432
            name: Cosine Recall
          - type: cosine_ap
            value: 0.5935429428248785
            name: Cosine Ap
          - type: dot_accuracy
            value: 0.75
            name: Dot Accuracy
          - type: dot_accuracy_threshold
            value: 843.883544921875
            name: Dot Accuracy Threshold
          - type: dot_f1
            value: 0.6079295154185022
            name: Dot F1
          - type: dot_f1_threshold
            value: 691.1240234375
            name: Dot F1 Threshold
          - type: dot_precision
            value: 0.4726027397260274
            name: Dot Precision
          - type: dot_recall
            value: 0.8518518518518519
            name: Dot Recall
          - type: dot_ap
            value: 0.5773443196804586
            name: Dot Ap
          - type: manhattan_accuracy
            value: 0.75390625
            name: Manhattan Accuracy
          - type: manhattan_accuracy_threshold
            value: 638.1295166015625
            name: Manhattan Accuracy Threshold
          - type: manhattan_f1
            value: 0.6244343891402715
            name: Manhattan F1
          - type: manhattan_f1_threshold
            value: 805.15966796875
            name: Manhattan F1 Threshold
          - type: manhattan_precision
            value: 0.4928571428571429
            name: Manhattan Precision
          - type: manhattan_recall
            value: 0.8518518518518519
            name: Manhattan Recall
          - type: manhattan_ap
            value: 0.5966657639990006
            name: Manhattan Ap
          - type: euclidean_accuracy
            value: 0.7578125
            name: Euclidean Accuracy
          - type: euclidean_accuracy_threshold
            value: 20.63375473022461
            name: Euclidean Accuracy Threshold
          - type: euclidean_f1
            value: 0.6272727272727272
            name: Euclidean F1
          - type: euclidean_f1_threshold
            value: 25.803028106689453
            name: Euclidean F1 Threshold
          - type: euclidean_precision
            value: 0.49640287769784175
            name: Euclidean Precision
          - type: euclidean_recall
            value: 0.8518518518518519
            name: Euclidean Recall
          - type: euclidean_ap
            value: 0.5937801624670859
            name: Euclidean Ap
          - type: max_accuracy
            value: 0.7578125
            name: Max Accuracy
          - type: max_accuracy_threshold
            value: 843.883544921875
            name: Max Accuracy Threshold
          - type: max_f1
            value: 0.6272727272727272
            name: Max F1
          - type: max_f1_threshold
            value: 805.15966796875
            name: Max F1 Threshold
          - type: max_precision
            value: 0.49640287769784175
            name: Max Precision
          - type: max_recall
            value: 0.8765432098765432
            name: Max Recall
          - type: max_ap
            value: 0.5966657639990006
            name: Max Ap
      - task:
          type: binary-classification
          name: Binary Classification
        dataset:
          name: Qnli dev
          type: Qnli-dev
        metrics:
          - type: cosine_accuracy
            value: 0.72265625
            name: Cosine Accuracy
          - type: cosine_accuracy_threshold
            value: 0.6946593523025513
            name: Cosine Accuracy Threshold
          - type: cosine_f1
            value: 0.7279693486590038
            name: Cosine F1
          - type: cosine_f1_threshold
            value: 0.6946593523025513
            name: Cosine F1 Threshold
          - type: cosine_precision
            value: 0.6884057971014492
            name: Cosine Precision
          - type: cosine_recall
            value: 0.7723577235772358
            name: Cosine Recall
          - type: cosine_ap
            value: 0.7556080779923782
            name: Cosine Ap
          - type: dot_accuracy
            value: 0.6953125
            name: Dot Accuracy
          - type: dot_accuracy_threshold
            value: 729.351806640625
            name: Dot Accuracy Threshold
          - type: dot_f1
            value: 0.7050847457627119
            name: Dot F1
          - type: dot_f1_threshold
            value: 687.4432983398438
            name: Dot F1 Threshold
          - type: dot_precision
            value: 0.6046511627906976
            name: Dot Precision
          - type: dot_recall
            value: 0.8455284552845529
            name: Dot Recall
          - type: dot_ap
            value: 0.6970235044462813
            name: Dot Ap
          - type: manhattan_accuracy
            value: 0.7265625
            name: Manhattan Accuracy
          - type: manhattan_accuracy_threshold
            value: 757.5811767578125
            name: Manhattan Accuracy Threshold
          - type: manhattan_f1
            value: 0.7307692307692308
            name: Manhattan F1
          - type: manhattan_f1_threshold
            value: 795.5022583007812
            name: Manhattan F1 Threshold
          - type: manhattan_precision
            value: 0.6934306569343066
            name: Manhattan Precision
          - type: manhattan_recall
            value: 0.7723577235772358
            name: Manhattan Recall
          - type: manhattan_ap
            value: 0.7716838426822531
            name: Manhattan Ap
          - type: euclidean_accuracy
            value: 0.73046875
            name: Euclidean Accuracy
          - type: euclidean_accuracy_threshold
            value: 25.439434051513672
            name: Euclidean Accuracy Threshold
          - type: euclidean_f1
            value: 0.7315175097276265
            name: Euclidean F1
          - type: euclidean_f1_threshold
            value: 25.521507263183594
            name: Euclidean F1 Threshold
          - type: euclidean_precision
            value: 0.7014925373134329
            name: Euclidean Precision
          - type: euclidean_recall
            value: 0.7642276422764228
            name: Euclidean Recall
          - type: euclidean_ap
            value: 0.7677200801958495
            name: Euclidean Ap
          - type: max_accuracy
            value: 0.73046875
            name: Max Accuracy
          - type: max_accuracy_threshold
            value: 757.5811767578125
            name: Max Accuracy Threshold
          - type: max_f1
            value: 0.7315175097276265
            name: Max F1
          - type: max_f1_threshold
            value: 795.5022583007812
            name: Max F1 Threshold
          - type: max_precision
            value: 0.7014925373134329
            name: Max Precision
          - type: max_recall
            value: 0.8455284552845529
            name: Max Recall
          - type: max_ap
            value: 0.7716838426822531
            name: Max Ap

SentenceTransformer based on BXresearch/DeBERTa2-0.9B-ST-v2

This is a sentence-transformers model finetuned from BXresearch/DeBERTa2-0.9B-ST-v2 on the sentence-transformers/stsb dataset. It maps sentences & paragraphs to a 1536-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.

Model Details

Model Description

Model Type: Sentence Transformer
Base model: BXresearch/DeBERTa2-0.9B-ST-v2
Maximum Sequence Length: 512 tokens
Output Dimensionality: 1536 tokens
Similarity Function: Cosine Similarity
Training Dataset:
- sentence-transformers/stsb
Language: en

Model Sources

Documentation: Sentence Transformers Documentation
Repository: Sentence Transformers on GitHub
Hugging Face: Sentence Transformers on Hugging Face

Full Model Architecture

SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: DebertaV2Model 
  (1): Pooling({'word_embedding_dimension': 1536, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
)

Usage

Direct Usage (Sentence Transformers)

First install the Sentence Transformers library:

pip install -U sentence-transformers

Then you can load this model and run inference.

from sentence_transformers import SentenceTransformer

# Download from the 🤗 Hub
model = SentenceTransformer("bobox/DeBERTa2-0.9B-ST-stsb-checkpoints-tmp")
# Run inference
sentences = [
    'The boy is playing the piano.',
    'The woman is pouring oil into the pan.',
    'A small black and white dog is swimming in water.',
]
embeddings = model.encode(sentences)
print(embeddings.shape)
# [3, 1536]

# Get the similarity scores for the embeddings
similarities = model.similarity(embeddings, embeddings)
print(similarities.shape)
# [3, 3]

Evaluation

Metrics

Semantic Similarity

Dataset: sts-test
Evaluated with EmbeddingSimilarityEvaluator

Metric	Value
pearson_cosine	0.9167
spearman_cosine	0.9289
pearson_manhattan	0.928
spearman_manhattan	0.9299
pearson_euclidean	0.9285
spearman_euclidean	0.9304
pearson_dot	0.9
spearman_dot	0.9053
pearson_max	0.9285
spearman_max	0.9304

Binary Classification

Dataset: allNLI-dev
Evaluated with BinaryClassificationEvaluator

Metric	Value
cosine_accuracy	0.7578
cosine_accuracy_threshold	0.7992
cosine_f1	0.6228
cosine_f1_threshold	0.6627
cosine_precision	0.483
cosine_recall	0.8765
cosine_ap	0.5935
dot_accuracy	0.75
dot_accuracy_threshold	843.8835
dot_f1	0.6079
dot_f1_threshold	691.124
dot_precision	0.4726
dot_recall	0.8519
dot_ap	0.5773
manhattan_accuracy	0.7539
manhattan_accuracy_threshold	638.1295
manhattan_f1	0.6244
manhattan_f1_threshold	805.1597
manhattan_precision	0.4929
manhattan_recall	0.8519
manhattan_ap	0.5967
euclidean_accuracy	0.7578
euclidean_accuracy_threshold	20.6338
euclidean_f1	0.6273
euclidean_f1_threshold	25.803
euclidean_precision	0.4964
euclidean_recall	0.8519
euclidean_ap	0.5938
max_accuracy	0.7578
max_accuracy_threshold	843.8835
max_f1	0.6273
max_f1_threshold	805.1597
max_precision	0.4964
max_recall	0.8765
max_ap	0.5967

Binary Classification

Dataset: Qnli-dev
Evaluated with BinaryClassificationEvaluator

Metric	Value
cosine_accuracy	0.7227
cosine_accuracy_threshold	0.6947
cosine_f1	0.728
cosine_f1_threshold	0.6947
cosine_precision	0.6884
cosine_recall	0.7724
cosine_ap	0.7556
dot_accuracy	0.6953
dot_accuracy_threshold	729.3518
dot_f1	0.7051
dot_f1_threshold	687.4433
dot_precision	0.6047
dot_recall	0.8455
dot_ap	0.697
manhattan_accuracy	0.7266
manhattan_accuracy_threshold	757.5812
manhattan_f1	0.7308
manhattan_f1_threshold	795.5023
manhattan_precision	0.6934
manhattan_recall	0.7724
manhattan_ap	0.7717
euclidean_accuracy	0.7305
euclidean_accuracy_threshold	25.4394
euclidean_f1	0.7315
euclidean_f1_threshold	25.5215
euclidean_precision	0.7015
euclidean_recall	0.7642
euclidean_ap	0.7677
max_accuracy	0.7305
max_accuracy_threshold	757.5812
max_f1	0.7315
max_f1_threshold	795.5023
max_precision	0.7015
max_recall	0.8455
max_ap	0.7717

Training Details

Training Dataset

sentence-transformers/stsb

Dataset: sentence-transformers/stsb at ab7a5ac
Size: 5,749 training samples
Columns: sentence1, sentence2, and score
Approximate statistics based on the first 1000 samples:
sentence1 sentence2 score
type string string float
details
min: 6 tokens
mean: 9.81 tokens
max: 27 tokens

min: 5 tokens
mean: 9.74 tokens
max: 25 tokens

min: 0.0
mean: 0.54
max: 1.0

	sentence1	sentence2	score
type	string	string	float
details	min: 6 tokens mean: 9.81 tokens max: 27 tokens	min: 5 tokens mean: 9.74 tokens max: 25 tokens	min: 0.0 mean: 0.54 max: 1.0

Samples:

sentence1	sentence2	score
`A plane is taking off.`	`An air plane is taking off.`	`1.0`
`A man is playing a large flute.`	`A man is playing a flute.`	`0.76`
`A man is spreading shreded cheese on a pizza.`	`A man is spreading shredded cheese on an uncooked pizza.`	`0.76`

Loss: AnglELoss with these parameters:

{
    "scale": 20.0,
    "similarity_fct": "pairwise_angle_sim"
}

Evaluation Dataset

sentence-transformers/stsb

Dataset: sentence-transformers/stsb at ab7a5ac
Size: 512 evaluation samples
Columns: sentence1, sentence2, and score
Approximate statistics based on the first 1000 samples:
sentence1 sentence2 score
type string string float
details
min: 6 tokens
mean: 11.16 tokens
max: 26 tokens

min: 6 tokens
mean: 11.17 tokens
max: 23 tokens

min: 0.0
mean: 0.47
max: 1.0

	sentence1	sentence2	score
type	string	string	float
details	min: 6 tokens mean: 11.16 tokens max: 26 tokens	min: 6 tokens mean: 11.17 tokens max: 23 tokens	min: 0.0 mean: 0.47 max: 1.0

Samples:

sentence1	sentence2	score
`A man with a hard hat is dancing.`	`A man wearing a hard hat is dancing.`	`1.0`
`A young child is riding a horse.`	`A child is riding a horse.`	`0.95`
`A man is feeding a mouse to a snake.`	`The man is feeding a mouse to the snake.`	`1.0`

Loss: AnglELoss with these parameters:

{
    "scale": 20.0,
    "similarity_fct": "pairwise_angle_sim"
}

Training Hyperparameters

Non-Default Hyperparameters

eval_strategy: steps
per_device_eval_batch_size: 256
gradient_accumulation_steps: 2
learning_rate: 1.5e-05
weight_decay: 5e-05
num_train_epochs: 2
lr_scheduler_type: cosine_with_min_lr
lr_scheduler_kwargs: {'num_cycles': 0.5, 'min_lr': 2e-06}
warmup_ratio: 0.2
save_safetensors: False
fp16: True
push_to_hub: True
hub_model_id: bobox/DeBERTa2-0.9B-ST-stsb-checkpoints-tmp
hub_strategy: all_checkpoints
batch_sampler: no_duplicates

All Hyperparameters

Click to expand

overwrite_output_dir: False
do_predict: False
eval_strategy: steps
prediction_loss_only: True
per_device_train_batch_size: 8
per_device_eval_batch_size: 256
per_gpu_train_batch_size: None
per_gpu_eval_batch_size: None
gradient_accumulation_steps: 2
eval_accumulation_steps: None
learning_rate: 1.5e-05
weight_decay: 5e-05
adam_beta1: 0.9
adam_beta2: 0.999
adam_epsilon: 1e-08
max_grad_norm: 1.0
num_train_epochs: 2
max_steps: -1
lr_scheduler_type: cosine_with_min_lr
lr_scheduler_kwargs: {'num_cycles': 0.5, 'min_lr': 2e-06}
warmup_ratio: 0.2
warmup_steps: 0
log_level: passive
log_level_replica: warning
log_on_each_node: True
logging_nan_inf_filter: True
save_safetensors: False
save_on_each_node: False
save_only_model: False
restore_callback_states_from_checkpoint: False
no_cuda: False
use_cpu: False
use_mps_device: False
seed: 42
data_seed: None
jit_mode_eval: False
use_ipex: False
bf16: False
fp16: True
fp16_opt_level: O1
half_precision_backend: auto
bf16_full_eval: False
fp16_full_eval: False
tf32: None
local_rank: 0
ddp_backend: None
tpu_num_cores: None
tpu_metrics_debug: False
debug: []
dataloader_drop_last: False
dataloader_num_workers: 0
dataloader_prefetch_factor: None
past_index: -1
disable_tqdm: False
remove_unused_columns: True
label_names: None
load_best_model_at_end: False
ignore_data_skip: False
fsdp: []
fsdp_min_num_params: 0
fsdp_config: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
fsdp_transformer_layer_cls_to_wrap: None
accelerator_config: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
deepspeed: None
label_smoothing_factor: 0.0
optim: adamw_torch
optim_args: None
adafactor: False
group_by_length: False
length_column_name: length
ddp_find_unused_parameters: None
ddp_bucket_cap_mb: None
ddp_broadcast_buffers: False
dataloader_pin_memory: True
dataloader_persistent_workers: False
skip_memory_metrics: True
use_legacy_prediction_loop: False
push_to_hub: True
resume_from_checkpoint: None
hub_model_id: bobox/DeBERTa2-0.9B-ST-stsb-checkpoints-tmp
hub_strategy: all_checkpoints
hub_private_repo: False
hub_always_push: False
gradient_checkpointing: False
gradient_checkpointing_kwargs: None
include_inputs_for_metrics: False
eval_do_concat_batches: True
fp16_backend: auto
push_to_hub_model_id: None
push_to_hub_organization: None
mp_parameters:
auto_find_batch_size: False
full_determinism: False
torchdynamo: None
ray_scope: last
ddp_timeout: 1800
torch_compile: False
torch_compile_backend: None
torch_compile_mode: None
dispatch_batches: None
split_batches: None
include_tokens_per_second: False
include_num_input_tokens_seen: False
neftune_noise_alpha: None
optim_target_modules: None
batch_eval_metrics: False
eval_on_start: False
batch_sampler: no_duplicates
multi_dataset_batch_sampler: proportional

Training Logs

Click to expand

Epoch	Step	Training Loss	loss	Qnli-dev_max_ap	allNLI-dev_max_ap	sts-test_spearman_cosine
0.0056	2	2.6549	-	-	-	-
0.0111	4	2.7355	-	-	-	-
0.0167	6	3.6211	-	-	-	-
0.0223	8	3.0686	-	-	-	-
0.0278	10	3.4113	-	-	-	-
0.0334	12	2.4857	-	-	-	-
0.0389	14	2.4288	-	-	-	-
0.0445	16	2.6203	-	-	-	-
0.0501	18	2.7441	-	-	-	-
0.0556	20	3.4263	-	-	-	-
0.0612	22	2.3565	-	-	-	-
0.0668	24	2.5596	-	-	-	-
0.0723	26	3.0866	-	-	-	-
0.0779	28	3.223	-	-	-	-
0.0834	30	2.012	-	-	-	-
0.0890	32	3.2829	-	-	-	-
0.0946	34	3.9277	-	-	-	-
0.1001	36	2.785	2.6652	0.7960	0.6275	0.9294
0.1057	38	3.4966	-	-	-	-
0.1113	40	2.5923	-	-	-	-
0.1168	42	3.4418	-	-	-	-
0.1224	44	2.6519	-	-	-	-
0.1280	46	3.7746	-	-	-	-
0.1335	48	2.6736	-	-	-	-
0.1391	50	3.6764	-	-	-	-
0.1446	52	3.5311	-	-	-	-
0.1502	54	2.5869	-	-	-	-
0.1558	56	3.183	-	-	-	-
0.1613	58	2.747	-	-	-	-
0.1669	60	1.965	-	-	-	-
0.1725	62	2.1785	-	-	-	-
0.1780	64	2.5788	-	-	-	-
0.1836	66	3.1776	-	-	-	-
0.1892	68	2.6464	-	-	-	-
0.1947	70	2.7619	-	-	-	-
0.2003	72	3.0911	2.6171	0.7923	0.6295	0.9276
0.2058	74	2.4308	-	-	-	-
0.2114	76	3.2068	-	-	-	-
0.2170	78	2.4081	-	-	-	-
0.2225	80	2.3257	-	-	-	-
0.2281	82	3.0499	-	-	-	-
0.2337	84	3.2518	-	-	-	-
0.2392	86	2.7876	-	-	-	-
0.2448	88	2.7898	-	-	-	-
0.2503	90	2.7116	-	-	-	-
0.2559	92	3.0505	-	-	-	-
0.2615	94	2.5901	-	-	-	-
0.2670	96	1.9563	-	-	-	-
0.2726	98	2.1006	-	-	-	-
0.2782	100	2.1853	-	-	-	-
0.2837	102	2.327	-	-	-	-
0.2893	104	1.9937	-	-	-	-
0.2949	106	2.543	-	-	-	-
0.3004	108	1.9826	2.4596	0.7919	0.6329	0.9341
0.3060	110	3.0746	-	-	-	-
0.3115	112	2.4145	-	-	-	-
0.3171	114	2.244	-	-	-	-
0.3227	116	2.78	-	-	-	-
0.3282	118	2.8323	-	-	-	-
0.3338	120	2.4639	-	-	-	-
0.3394	122	2.9216	-	-	-	-
0.3449	124	2.0747	-	-	-	-
0.3505	126	2.7573	-	-	-	-
0.3561	128	3.7019	-	-	-	-
0.3616	130	3.3155	-	-	-	-
0.3672	132	3.625	-	-	-	-
0.3727	134	3.2889	-	-	-	-
0.3783	136	3.5936	-	-	-	-
0.3839	138	3.5932	-	-	-	-
0.3894	140	3.0457	-	-	-	-
0.3950	142	3.093	-	-	-	-
0.4006	144	2.7189	2.4698	0.7752	0.5896	0.9346
0.4061	146	3.2578	-	-	-	-
0.4117	148	3.3581	-	-	-	-
0.4172	150	2.9734	-	-	-	-
0.4228	152	3.0514	-	-	-	-
0.4284	154	3.1983	-	-	-	-
0.4339	156	2.9033	-	-	-	-
0.4395	158	2.9279	-	-	-	-
0.4451	160	3.1336	-	-	-	-
0.4506	162	3.1467	-	-	-	-
0.4562	164	3.0381	-	-	-	-
0.4618	166	3.068	-	-	-	-
0.4673	168	3.0261	-	-	-	-
0.4729	170	3.2867	-	-	-	-
0.4784	172	2.8474	-	-	-	-
0.4840	174	2.7982	-	-	-	-
0.4896	176	2.7945	-	-	-	-
0.4951	178	3.1312	-	-	-	-
0.5007	180	2.9704	2.4640	0.7524	0.6033	0.9242
0.5063	182	2.9856	-	-	-	-
0.5118	184	3.014	-	-	-	-
0.5174	186	3.0125	-	-	-	-
0.5229	188	2.8149	-	-	-	-
0.5285	190	2.7954	-	-	-	-
0.5341	192	3.078	-	-	-	-
0.5396	194	2.955	-	-	-	-
0.5452	196	2.9468	-	-	-	-
0.5508	198	3.0791	-	-	-	-
0.5563	200	2.998	-	-	-	-
0.5619	202	2.9068	-	-	-	-
0.5675	204	2.8283	-	-	-	-
0.5730	206	2.9216	-	-	-	-
0.5786	208	3.3441	-	-	-	-
0.5841	210	3.0	-	-	-	-
0.5897	212	2.9023	-	-	-	-
0.5953	214	2.8177	-	-	-	-
0.6008	216	2.8262	2.4979	0.7899	0.6037	0.9260
0.6064	218	2.7832	-	-	-	-
0.6120	220	3.0085	-	-	-	-
0.6175	222	2.8762	-	-	-	-
0.6231	224	3.147	-	-	-	-
0.6287	226	3.4262	-	-	-	-
0.6342	228	2.8271	-	-	-	-
0.6398	230	2.4024	-	-	-	-
0.6453	232	2.7556	-	-	-	-
0.6509	234	3.4652	-	-	-	-
0.6565	236	2.7235	-	-	-	-
0.6620	238	2.6498	-	-	-	-
0.6676	240	3.0933	-	-	-	-
0.6732	242	3.1193	-	-	-	-
0.6787	244	2.7249	-	-	-	-
0.6843	246	2.8931	-	-	-	-
0.6898	248	2.7913	-	-	-	-
0.6954	250	2.6933	-	-	-	-
0.7010	252	2.5632	2.4585	0.7700	0.6065	0.9298
0.7065	254	2.8347	-	-	-	-
0.7121	256	2.3827	-	-	-	-
0.7177	258	2.9065	-	-	-	-
0.7232	260	2.8162	-	-	-	-
0.7288	262	2.5485	-	-	-	-
0.7344	264	2.5751	-	-	-	-
0.7399	266	2.9056	-	-	-	-
0.7455	268	3.1397	-	-	-	-
0.7510	270	3.3107	-	-	-	-
0.7566	272	2.9024	-	-	-	-
0.7622	274	2.2307	-	-	-	-
0.7677	276	3.0097	-	-	-	-
0.7733	278	3.1406	-	-	-	-
0.7789	280	2.6786	-	-	-	-
0.7844	282	2.8882	-	-	-	-
0.7900	284	2.7215	-	-	-	-
0.7955	286	3.4188	-	-	-	-
0.8011	288	2.9901	2.4414	0.7665	0.6023	0.9288
0.8067	290	2.5144	-	-	-	-
0.8122	292	3.1932	-	-	-	-
0.8178	294	2.9733	-	-	-	-
0.8234	296	2.6895	-	-	-	-
0.8289	298	2.678	-	-	-	-
0.8345	300	2.5462	-	-	-	-
0.8401	302	2.6911	-	-	-	-
0.8456	304	2.8404	-	-	-	-
0.8512	306	2.5358	-	-	-	-
0.8567	308	3.1245	-	-	-	-
0.8623	310	2.3404	-	-	-	-
0.8679	312	3.0751	-	-	-	-
0.8734	314	2.7005	-	-	-	-
0.8790	316	2.7387	-	-	-	-
0.8846	318	2.7227	-	-	-	-
0.8901	320	2.9085	-	-	-	-
0.8957	322	3.3239	-	-	-	-
0.9013	324	2.4256	2.4106	0.7644	0.6087	0.9304
0.9068	326	2.5059	-	-	-	-
0.9124	328	2.5387	-	-	-	-
0.9179	330	2.899	-	-	-	-
0.9235	332	2.7256	-	-	-	-
0.9291	334	2.4862	-	-	-	-
0.9346	336	3.0014	-	-	-	-
0.9402	338	2.4164	-	-	-	-
0.9458	340	2.3148	-	-	-	-
0.9513	342	2.9414	-	-	-	-
0.9569	344	2.4435	-	-	-	-
0.9624	346	2.6286	-	-	-	-
0.9680	348	2.1744	-	-	-	-
0.9736	350	2.5866	-	-	-	-
0.9791	352	2.8333	-	-	-	-
0.9847	354	2.3544	-	-	-	-
0.9903	356	2.5397	-	-	-	-
0.9958	358	3.4058	-	-	-	-
1.0014	360	2.2904	2.4089	0.7888	0.6104	0.9338
1.0070	362	2.7925	-	-	-	-
1.0125	364	2.6415	-	-	-	-
1.0181	366	2.724	-	-	-	-
1.0236	368	2.569	-	-	-	-
1.0292	370	2.808	-	-	-	-
1.0348	372	2.4672	-	-	-	-
1.0403	374	2.3964	-	-	-	-
1.0459	376	2.3518	-	-	-	-
1.0515	378	2.7617	-	-	-	-
1.0570	380	2.5651	-	-	-	-
1.0626	382	2.2623	-	-	-	-
1.0682	384	2.2048	-	-	-	-
1.0737	386	2.1426	-	-	-	-
1.0793	388	1.8182	-	-	-	-
1.0848	390	2.3166	-	-	-	-
1.0904	392	2.4101	-	-	-	-
1.0960	394	2.8932	-	-	-	-
1.1015	396	3.0201	2.4217	0.7851	0.6205	0.9301
1.1071	398	2.6101	-	-	-	-
1.1127	400	2.3627	-	-	-	-
1.1182	402	2.5402	-	-	-	-
1.1238	404	2.695	-	-	-	-
1.1293	406	3.0563	-	-	-	-
1.1349	408	2.2296	-	-	-	-
1.1405	410	3.057	-	-	-	-
1.1460	412	2.8023	-	-	-	-
1.1516	414	2.6492	-	-	-	-
1.1572	416	2.2406	-	-	-	-
1.1627	418	1.7195	-	-	-	-
1.1683	420	2.2773	-	-	-	-
1.1739	422	2.3639	-	-	-	-
1.1794	424	2.3348	-	-	-	-
1.1850	426	2.6791	-	-	-	-
1.1905	428	2.3621	-	-	-	-
1.1961	430	2.5224	-	-	-	-
1.2017	432	2.4063	2.4724	0.7628	0.6043	0.9270
1.2072	434	1.9713	-	-	-	-
1.2128	436	2.4265	-	-	-	-
1.2184	438	2.0827	-	-	-	-
1.2239	440	2.0696	-	-	-	-
1.2295	442	2.7507	-	-	-	-
1.2350	444	2.5436	-	-	-	-
1.2406	446	2.4039	-	-	-	-
1.2462	448	2.4229	-	-	-	-
1.2517	450	2.323	-	-	-	-
1.2573	452	2.6099	-	-	-	-
1.2629	454	2.0329	-	-	-	-
1.2684	456	1.8797	-	-	-	-
1.2740	458	1.4485	-	-	-	-
1.2796	460	1.6794	-	-	-	-
1.2851	462	2.0934	-	-	-	-
1.2907	464	1.9579	-	-	-	-
1.2962	466	1.9288	-	-	-	-
1.3018	468	1.5874	2.5056	0.7833	0.5948	0.9345
1.3074	470	1.8715	-	-	-	-
1.3129	472	1.3778	-	-	-	-
1.3185	474	2.2242	-	-	-	-
1.3241	476	2.4031	-	-	-	-
1.3296	478	1.924	-	-	-	-
1.3352	480	1.7895	-	-	-	-
1.3408	482	2.0349	-	-	-	-
1.3463	484	1.8116	-	-	-	-
1.3519	486	2.353	-	-	-	-
1.3574	488	3.4263	-	-	-	-
1.3630	490	4.0606	-	-	-	-
1.3686	492	2.7423	-	-	-	-
1.3741	494	2.8461	-	-	-	-
1.3797	496	3.0742	-	-	-	-
1.3853	498	2.2054	-	-	-	-
1.3908	500	2.6009	-	-	-	-
1.3964	502	2.242	-	-	-	-
1.4019	504	2.9416	2.5288	0.7969	0.6010	0.9323
1.4075	506	3.8179	-	-	-	-
1.4131	508	3.0147	-	-	-	-
1.4186	510	2.2185	-	-	-	-
1.4242	512	3.0323	-	-	-	-
1.4298	514	2.6922	-	-	-	-
1.4353	516	2.6219	-	-	-	-
1.4409	518	2.4365	-	-	-	-
1.4465	520	3.1643	-	-	-	-
1.4520	522	2.5548	-	-	-	-
1.4576	524	2.3798	-	-	-	-
1.4631	526	2.6361	-	-	-	-
1.4687	528	2.6859	-	-	-	-
1.4743	530	2.6071	-	-	-	-
1.4798	532	2.2565	-	-	-	-
1.4854	534	2.2415	-	-	-	-
1.4910	536	2.4591	-	-	-	-
1.4965	538	2.6729	-	-	-	-
1.5021	540	2.3898	2.5025	0.7881	0.5978	0.9300
1.5076	542	2.4614	-	-	-	-
1.5132	544	2.5447	-	-	-	-
1.5188	546	2.502	-	-	-	-
1.5243	548	2.1892	-	-	-	-
1.5299	550	2.7081	-	-	-	-
1.5355	552	2.5523	-	-	-	-
1.5410	554	2.3571	-	-	-	-
1.5466	556	2.7694	-	-	-	-
1.5522	558	2.2	-	-	-	-
1.5577	560	2.4179	-	-	-	-
1.5633	562	2.3914	-	-	-	-
1.5688	564	2.1722	-	-	-	-
1.5744	566	2.345	-	-	-	-
1.5800	568	3.0069	-	-	-	-
1.5855	570	2.4231	-	-	-	-
1.5911	572	2.3597	-	-	-	-
1.5967	574	2.143	-	-	-	-
1.6022	576	2.6288	2.5368	0.7943	0.6048	0.9265
1.6078	578	2.3905	-	-	-	-
1.6134	580	2.1823	-	-	-	-
1.6189	582	2.367	-	-	-	-
1.6245	584	2.8189	-	-	-	-
1.6300	586	2.6536	-	-	-	-
1.6356	588	2.2134	-	-	-	-
1.6412	590	1.6949	-	-	-	-
1.6467	592	2.2029	-	-	-	-
1.6523	594	3.0223	-	-	-	-
1.6579	596	2.239	-	-	-	-
1.6634	598	2.3388	-	-	-	-
1.6690	600	2.3066	-	-	-	-
1.6745	602	2.4762	-	-	-	-
1.6801	604	1.9503	-	-	-	-
1.6857	606	2.1252	-	-	-	-
1.6912	608	1.8253	-	-	-	-
1.6968	610	2.2938	-	-	-	-
1.7024	612	1.9489	2.5747	0.7675	0.5964	0.9267
1.7079	614	1.9238	-	-	-	-
1.7135	616	1.8171	-	-	-	-
1.7191	618	2.2371	-	-	-	-
1.7246	620	2.4901	-	-	-	-
1.7302	622	1.8503	-	-	-	-
1.7357	624	2.017	-	-	-	-
1.7413	626	2.3069	-	-	-	-
1.7469	628	2.444	-	-	-	-
1.7524	630	1.9606	-	-	-	-
1.7580	632	2.2364	-	-	-	-
1.7636	634	1.8711	-	-	-	-
1.7691	636	2.4233	-	-	-	-
1.7747	638	2.4065	-	-	-	-
1.7803	640	2.0725	-	-	-	-
1.7858	642	2.0578	-	-	-	-
1.7914	644	2.2066	-	-	-	-
1.7969	646	1.7767	-	-	-	-
1.8025	648	2.7388	2.5685	0.7663	0.5959	0.9292
1.8081	650	1.854	-	-	-	-
1.8136	652	2.7337	-	-	-	-
1.8192	654	2.4477	-	-	-	-
1.8248	656	2.4818	-	-	-	-
1.8303	658	1.8592	-	-	-	-
1.8359	660	1.8396	-	-	-	-
1.8414	662	2.3893	-	-	-	-
1.8470	664	2.0139	-	-	-	-
1.8526	666	2.8837	-	-	-	-
1.8581	668	2.0342	-	-	-	-
1.8637	670	1.8857	-	-	-	-
1.8693	672	2.1147	-	-	-	-
1.8748	674	1.6263	-	-	-	-
1.8804	676	2.2987	-	-	-	-
1.8860	678	1.9678	-	-	-	-
1.8915	680	1.9999	-	-	-	-
1.8971	682	2.2802	-	-	-	-
1.9026	684	1.9666	2.5536	0.7717	0.5967	0.9289
1.9082	686	1.8156	-	-	-	-
1.9138	688	1.9542	-	-	-	-
1.9193	690	1.859	-	-	-	-
1.9249	692	1.6237	-	-	-	-
1.9305	694	2.3085	-	-	-	-
1.9360	696	2.1461	-	-	-	-
1.9416	698	1.7024	-	-	-	-
1.9471	700	2.2181	-	-	-	-
1.9527	702	2.4782	-	-	-	-
1.9583	704	1.7378	-	-	-	-
1.9638	706	2.0422	-	-	-	-
1.9694	708	1.7577	-	-	-	-
1.9750	710	2.0209	-	-	-	-
1.9805	712	2.0372	-	-	-	-
1.9861	714	2.0915	-	-	-	-
1.9917	716	1.603	-	-	-	-
1.9972	718	1.7111	-	-	-	-

Framework Versions

Python: 3.10.12
Sentence Transformers: 3.0.1
Transformers: 4.42.4
PyTorch: 2.4.0+cu121
Accelerate: 0.32.1
Datasets: 2.21.0
Tokenizers: 0.19.1

Citation

BibTeX

Sentence Transformers

@inproceedings{reimers-2019-sentence-bert,
    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
    author = "Reimers, Nils and Gurevych, Iryna",
    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
    month = "11",
    year = "2019",
    publisher = "Association for Computational Linguistics",
    url = "https://arxiv.org/abs/1908.10084",
}

AnglELoss

@misc{li2023angleoptimized,
    title={AnglE-optimized Text Embeddings}, 
    author={Xianming Li and Jing Li},
    year={2023},
    eprint={2309.12871},
    archivePrefix={arXiv},
    primaryClass={cs.CL}
}