yasserrmd commited on Sep 17

Commit

c165868

verified ·

1 Parent(s): 3006713

Initial commit: Fine-tuned embedding-gemma-300m on GeoGPT-QA dataset

Browse files

Files changed (17) hide show

.gitattributes +1 -0
1_Pooling/config.json +10 -0
2_Dense/config.json +6 -0
2_Dense/model.safetensors +3 -0
3_Dense/config.json +6 -0
3_Dense/model.safetensors +3 -0
README.md +453 -0
added_tokens.json +3 -0
config.json +60 -0
config_sentence_transformers.json +26 -0
model.safetensors +3 -0
modules.json +32 -0
sentence_bert_config.json +4 -0
special_tokens_map.json +33 -0
tokenizer.json +3 -0
tokenizer.model +3 -0
tokenizer_config.json +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

1_Pooling/config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "word_embedding_dimension": 768,
+    "pooling_mode_cls_token": false,
+    "pooling_mode_mean_tokens": true,
+    "pooling_mode_max_tokens": false,
+    "pooling_mode_mean_sqrt_len_tokens": false,
+    "pooling_mode_weightedmean_tokens": false,
+    "pooling_mode_lasttoken": false,
+    "include_prompt": true
+}

2_Dense/config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+    "in_features": 768,
+    "out_features": 3072,
+    "bias": false,
+    "activation_function": "torch.nn.modules.linear.Identity"
+}

2_Dense/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:35a3f472babbb893d61a6b242edcb6dbe3fb7b582c6d194ac2638a61818b313c
+size 9437272

3_Dense/config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+    "in_features": 3072,
+    "out_features": 768,
+    "bias": false,
+    "activation_function": "torch.nn.modules.linear.Identity"
+}

3_Dense/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:17281bd8b6d9a7f2c14ebf4ecc332a739a6a8e09a43e551ff7168eb56d47d6ed
+size 9437272

README.md ADDED Viewed

	@@ -0,0 +1,453 @@

+---
+tags:
+- sentence-transformers
+- sentence-similarity
+- feature-extraction
+- dense
+- generated_from_trainer
+- dataset_size:20000
+- loss:MultipleNegativesRankingLoss
+base_model: google/embeddinggemma-300m
+widget:
+- source_sentence: 'What is the Dialysis Symptom Index (DSI) and why is it important
+    for assessing symptoms in hemodialysis patients?
+    '
+  sentences:
+  - Proteinuria in LCDD patients is measured from a 24-hour urine collection. Nephrotic
+    range proteinuria (NRP) is defined as 24-hour proteinuria equal to or greater
+    than 3 grams. This indicates significant protein loss in the urine, which is a
+    characteristic feature of LCDD.
+  - The Dialysis Symptom Index (DSI) is a self-reported index that assesses the presence
+    and severity of symptoms in patients with end-stage renal disease undergoing hemodialysis.
+    It contains 30 items targeting specific physical and emotional symptoms. The DSI
+    is important for accurately assessing symptoms in hemodialysis patients, both
+    for research and practice purposes, and to improve the care provided to these
+    patients.
+  - A meta-analysis of trials that utilized high-dose NAC found that it was associated
+    with a lower risk of CIAKI compared to controls. The analysis showed no significant
+    heterogeneity or publication bias. However, the effectiveness of NAC for the prevention
+    of CIAKI is still uncertain, and no definitive conclusions can be drawn at the
+    current time. If a beneficial effect exists, it may be related to the use of higher
+    doses of NAC. Large clinical trials are needed to better define the clinical utility
+    of this agent.
+- source_sentence: 'What is the role of Doppler ultrasound in kidney transplant recipients?
+    '
+  sentences:
+  - The most common diagnoses for emergency department visits among patients receiving
+    maintenance in-center hemodialysis are heart failure, throat and chest pain, and
+    abdominal pain. These conditions account for a significant proportion of emergency
+    department visits in this patient population.
+  - 'Complications associated with leflunomide treatment in kidney transplant patients
+    include leukopenia, thrombocytopenia, hepatotoxicity, and anemia. In this particular
+    study, 11 out of 28 patients (39%) developed complications while receiving leflunomide. '
+  - Doppler ultrasound is a non-invasive imaging method commonly used in kidney transplant
+    recipients. It helps verify the patency of vascular anastomoses and exclude thrombotic
+    complications in the early period after transplantation. Doppler ultrasound also
+    measures the spectrum of blood flow within the kidney graft's segmental arteries,
+    providing parameters of vascular resistance such as pulsatility and resistance
+    indices. These indices can help detect complications like delayed graft function
+    (DGF) and predict the severity and duration of acute tubular necrosis (ATN) in
+    kidney transplant recipients.
+- source_sentence: 'What is the natural history of analgesic-associated nephropathy
+    (AAN)?
+    '
+  sentences:
+  - In glomerulonephritis associated with the nephrotic syndrome, a progressive decline
+    in proteinuria to less than 2 g/day (or less) is associated with a favorable prognostic
+    outlook, whether the reduction occurs spontaneously or in response to treatment.
+    This means that a decrease in proteinuria to a lower level is indicative of a
+    better prognosis in patients with glomerulonephritis and the nephrotic syndrome.
+  - Infection is the second-leading cause of death in hemodialysis patients. Hemodialysis
+    patients have various risk factors that impair the immune system, putting them
+    at increased risk of infection and its related mortality. Malnutrition is a major
+    contributor to the development and fatality of infections in CKD patients. Patients
+    with malnutrition have weakened immune systems, making them more susceptible to
+    infections and increasing the risk of infection-related mortality. Severe dietary
+    protein restriction, which is often necessary to manage hyperphosphatemia in CKD
+    patients, can lead to protein-energy malnutrition. However, the use of phosphate
+    binders to manage hyperphosphatemia can improve phosphate management with less
+    risk of malnutrition compared to dietary protein restriction. Therefore, phosphate
+    binders are expected to allow patients to maintain a better nutritional state
+    while decreasing the chance of infection, thereby reducing the risk of infection-related
+    mortality.
+  - The natural history of AAN is poorly understood, especially since the withdrawal
+    of phenacetin. However, it is known that AAN can progress slowly and lead to end-stage
+    chronic renal failure (ESCRF). The incidence of AAN appears to be declining, but
+    it remains an important and preventable cause of ESRF in many areas.
+- source_sentence: What are the treatment options for membranous glomerulonephritis?
+  sentences:
+  - Chronic kidney disease (CKD) can impact the management of gout by limiting the
+    dosage or hampering the use of urate-lowering drugs (ULD), colchicine, and nonsteroidal
+    anti-inflammatory agents. The frequent prescription of diuretics in CKD patients
+    can also affect outcomes. Diuretics can increase serum urate levels and interfere
+    with the effectiveness of allopurinol, a commonly used ULD. This knowledge is
+    important for managing gout in patients with CKD who are taking diuretics.
+  - Treatment for membranous glomerulonephritis may include the use of corticosteroids,
+    such as metacorten or prednisone, to reduce inflammation and proteinuria. However,
+    in some cases, treatment may not be effective, and the disease may progress to
+    advanced or chronic stages.
+  - Some factors that may benefit twice-weekly HD treatment include a longer HD session
+    time, a higher spKt/V (a measure of dialysis adequacy), the use of high flux dialyzers,
+    and the use of ultrapure dialysate. These factors can contribute to optimal solute
+    clearance and improve outcomes for patients undergoing twice-weekly HD.
+- source_sentence: How do some participants believe that reimbursement or compensation
+    for living kidney donors can help minimize disadvantage?
+  sentences:
+  - Urinary L-PGDS excretions have been found to be superior to other markers, including
+    urinary excretions of type-IV collagen, beta-2 microglobulin, and NAG, as well
+    as serum creatinine levels, in predicting renal injury in type-2 diabetes. Studies
+    have shown that urinary L-PGDS excretions better predict ≥30 mg/gCr albuminuria
+    in type-2 diabetes. The use of urinary L-PGDS excretions as a marker for renal
+    injury in type-2 diabetes is supported by its ability to reflect a slight change
+    in glomerular permeability and its positive correlation with albuminuria.
+  - The time in therapeutic range (TTR) of INR (International Normalized Ratio) is
+    an important factor in determining the risk of hemorrhagic and ischemic events
+    in hemodialysis patients. If the INR is below 1.5, there is an increased risk
+    of hemorrhagic events, while an INR above 5 increases the risk of ischemic events.
+    Maintaining the INR within the therapeutic range is challenging but crucial in
+    minimizing these risks.
+  - Some participants believe that reimbursement or compensation can effectively help
+    donors and recipients who are socioeconomically disadvantaged by removing financial
+    barriers to donation. They advocate for government subsidies or special paid leave
+    to support potential donors who may not be able to take leave or afford donation-related
+    expenses. The goal is to ensure that financial constraints do not penalize individuals
+    who are willing to donate.
+pipeline_tag: sentence-similarity
+library_name: sentence-transformers
+---
+# SentenceTransformer based on google/embeddinggemma-300m
+This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [google/embeddinggemma-300m](https://huggingface.co/google/embeddinggemma-300m). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
+## Model Details
+### Model Description
+- **Model Type:** Sentence Transformer
+- **Base model:** [google/embeddinggemma-300m](https://huggingface.co/google/embeddinggemma-300m) <!-- at revision c5cfa06e5e282a820e85d57f7fb053207494f41d -->
+- **Maximum Sequence Length:** 2048 tokens
+- **Output Dimensionality:** 768 dimensions
+- **Similarity Function:** Cosine Similarity
+<!-- - **Training Dataset:** Unknown -->
+<!-- - **Language:** Unknown -->
+<!-- - **License:** Unknown -->
+### Model Sources
+- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
+- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
+- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
+### Full Model Architecture
+```
+SentenceTransformer(
+  (0): Transformer({'max_seq_length': 2048, 'do_lower_case': False, 'architecture': 'Gemma3TextModel'})
+  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
+  (2): Dense({'in_features': 768, 'out_features': 3072, 'bias': False, 'activation_function': 'torch.nn.modules.linear.Identity'})
+  (3): Dense({'in_features': 3072, 'out_features': 768, 'bias': False, 'activation_function': 'torch.nn.modules.linear.Identity'})
+  (4): Normalize()
+)
+```
+## Usage
+### Direct Usage (Sentence Transformers)
+First install the Sentence Transformers library:
+```bash
+pip install -U sentence-transformers
+```
+Then you can load this model and run inference.
+```python
+from sentence_transformers import SentenceTransformer
+# Download from the 🤗 Hub
+model = SentenceTransformer("yasserrmd/nephrology-gemma-300m-emb")
+# Run inference
+queries = [
+    "How do some participants believe that reimbursement or compensation for living kidney donors can help minimize disadvantage?",
+]
+documents = [
+    'Some participants believe that reimbursement or compensation can effectively help donors and recipients who are socioeconomically disadvantaged by removing financial barriers to donation. They advocate for government subsidies or special paid leave to support potential donors who may not be able to take leave or afford donation-related expenses. The goal is to ensure that financial constraints do not penalize individuals who are willing to donate.',
+    'The time in therapeutic range (TTR) of INR (International Normalized Ratio) is an important factor in determining the risk of hemorrhagic and ischemic events in hemodialysis patients. If the INR is below 1.5, there is an increased risk of hemorrhagic events, while an INR above 5 increases the risk of ischemic events. Maintaining the INR within the therapeutic range is challenging but crucial in minimizing these risks.',
+    'Urinary L-PGDS excretions have been found to be superior to other markers, including urinary excretions of type-IV collagen, beta-2 microglobulin, and NAG, as well as serum creatinine levels, in predicting renal injury in type-2 diabetes. Studies have shown that urinary L-PGDS excretions better predict ≥30 mg/gCr albuminuria in type-2 diabetes. The use of urinary L-PGDS excretions as a marker for renal injury in type-2 diabetes is supported by its ability to reflect a slight change in glomerular permeability and its positive correlation with albuminuria.',
+]
+query_embeddings = model.encode_query(queries)
+document_embeddings = model.encode_document(documents)
+print(query_embeddings.shape, document_embeddings.shape)
+# [1, 768] [3, 768]
+# Get the similarity scores for the embeddings
+similarities = model.similarity(query_embeddings, document_embeddings)
+print(similarities)
+# tensor([[0.6341, 0.0019, 0.0465]])
+```
+<!--
+### Direct Usage (Transformers)
+<details><summary>Click to see the direct usage in Transformers</summary>
+</details>
+-->
+<!--
+### Downstream Usage (Sentence Transformers)
+You can finetune this model on your own dataset.
+<details><summary>Click to expand</summary>
+</details>
+-->
+<!--
+### Out-of-Scope Use
+*List how the model may foreseeably be misused and address what users ought not to do with the model.*
+-->
+<!--
+## Bias, Risks and Limitations
+*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
+-->
+<!--
+### Recommendations
+*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
+-->
+## Training Details
+### Training Dataset
+#### Unnamed Dataset
+* Size: 20,000 training samples
+* Columns: <code>sentence_0</code> and <code>sentence_1</code>
+* Approximate statistics based on the first 1000 samples:
+  |         | sentence_0                                                                         | sentence_1                                                                         |
+  |:--------|:-----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|
+  | type    | string                                                                             | string                                                                             |
+  | details | <ul><li>min: 10 tokens</li><li>mean: 22.05 tokens</li><li>max: 56 tokens</li></ul> | <ul><li>min: 20 tokens</li><li>mean: 91.9 tokens</li><li>max: 281 tokens</li></ul> |
+* Samples:
+  | sentence_0                                                                                                                         | sentence_1                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
+  |:-----------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+  | <code>How do the CKD-EPI and Japanese equations compare to Ccr and CGF in estimating renal function in cancer patients?<br></code> | <code>The CKD-EPI and Japanese equations provide more accurate estimates of renal function compared to 24-hour Ccr and CGF in cancer patients before and after chemotherapy with cisplatin. These new equations have lower bias and higher precision values, indicating better estimation of glomerular filtration rate (GFR). The CKD-EPI and Japanese equations were developed as better estimates of GFR than Ccr and CGF, which were mostly developed in chronic kidney disease (CKD) patients without cancer. The accuracy of the CKD-EPI and Japanese equations in estimating GFR in cancer patients is consistent with previous studies. Therefore, it is recommended to replace Ccr and CGF with these new equations for the evaluation of renal function in cancer patients undergoing cisplatin-containing chemotherapy.</code> |
+  | <code>What are the clinical phenotypes of Bartter-like syndrome?<br></code>                                                        | <code>Bartter-like syndrome can be divided into at least three different clinical phenotypes: classic Bartter syndrome, Gitelman syndrome, and antenatal (neonatal) Bartter syndrome. Classic Bartter syndrome and Gitelman syndrome have renal tubular hypokalemic alkalosis, while antenatal Bartter syndrome also has profound systemic manifestations such as polyhydramnios, premature delivery, severe water and salt wasting, hypokalemic metabolic alkalosis, severe hypercalciuria, and marked growth retardation.</code>                                                                                                                                                                                                                                                                                                        |
+  | <code>What is granulomatous interstitial nephritis (GIN), and how frequently does it occur in patients with sarcoidosis?</code>    | <code>Granulomatous interstitial nephritis (GIN) is a form of renal inflammation characterized by the presence of granulomas in the interstitial tissue of the kidneys. In patients with sarcoidosis, GIN is reportedly present in approximately one-third of patients with clinical evidence of renal disease. Post-mortem series have shown that between 7 and 27% of all patients with sarcoidosis may have GIN. It is important to note that GIN can occur in sarcoidosis patients even in the absence of obvious clinical renal disease.</code>                                                                                                                                                                                                                                                                                      |
+* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
+  ```json
+  {
+      "scale": 20.0,
+      "similarity_fct": "cos_sim",
+      "gather_across_devices": false
+  }
+  ```
+### Training Hyperparameters
+#### Non-Default Hyperparameters
+- `per_device_train_batch_size`: 6
+- `per_device_eval_batch_size`: 6
+- `num_train_epochs`: 1
+- `multi_dataset_batch_sampler`: round_robin
+#### All Hyperparameters
+<details><summary>Click to expand</summary>
+- `overwrite_output_dir`: False
+- `do_predict`: False
+- `eval_strategy`: no
+- `prediction_loss_only`: True
+- `per_device_train_batch_size`: 6
+- `per_device_eval_batch_size`: 6
+- `per_gpu_train_batch_size`: None
+- `per_gpu_eval_batch_size`: None
+- `gradient_accumulation_steps`: 1
+- `eval_accumulation_steps`: None
+- `torch_empty_cache_steps`: None
+- `learning_rate`: 5e-05
+- `weight_decay`: 0.0
+- `adam_beta1`: 0.9
+- `adam_beta2`: 0.999
+- `adam_epsilon`: 1e-08
+- `max_grad_norm`: 1
+- `num_train_epochs`: 1
+- `max_steps`: -1
+- `lr_scheduler_type`: linear
+- `lr_scheduler_kwargs`: {}
+- `warmup_ratio`: 0.0
+- `warmup_steps`: 0
+- `log_level`: passive
+- `log_level_replica`: warning
+- `log_on_each_node`: True
+- `logging_nan_inf_filter`: True
+- `save_safetensors`: True
+- `save_on_each_node`: False
+- `save_only_model`: False
+- `restore_callback_states_from_checkpoint`: False
+- `no_cuda`: False
+- `use_cpu`: False
+- `use_mps_device`: False
+- `seed`: 42
+- `data_seed`: None
+- `jit_mode_eval`: False
+- `use_ipex`: False
+- `bf16`: False
+- `fp16`: False
+- `fp16_opt_level`: O1
+- `half_precision_backend`: auto
+- `bf16_full_eval`: False
+- `fp16_full_eval`: False
+- `tf32`: None
+- `local_rank`: 0
+- `ddp_backend`: None
+- `tpu_num_cores`: None
+- `tpu_metrics_debug`: False
+- `debug`: []
+- `dataloader_drop_last`: False
+- `dataloader_num_workers`: 0
+- `dataloader_prefetch_factor`: None
+- `past_index`: -1
+- `disable_tqdm`: False
+- `remove_unused_columns`: True
+- `label_names`: None
+- `load_best_model_at_end`: False
+- `ignore_data_skip`: False
+- `fsdp`: []
+- `fsdp_min_num_params`: 0
+- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
+- `fsdp_transformer_layer_cls_to_wrap`: None
+- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
+- `parallelism_config`: None
+- `deepspeed`: None
+- `label_smoothing_factor`: 0.0
+- `optim`: adamw_torch_fused
+- `optim_args`: None
+- `adafactor`: False
+- `group_by_length`: False
+- `length_column_name`: length
+- `ddp_find_unused_parameters`: None
+- `ddp_bucket_cap_mb`: None
+- `ddp_broadcast_buffers`: False
+- `dataloader_pin_memory`: True
+- `dataloader_persistent_workers`: False
+- `skip_memory_metrics`: True
+- `use_legacy_prediction_loop`: False
+- `push_to_hub`: False
+- `resume_from_checkpoint`: None
+- `hub_model_id`: None
+- `hub_strategy`: every_save
+- `hub_private_repo`: None
+- `hub_always_push`: False
+- `hub_revision`: None
+- `gradient_checkpointing`: False
+- `gradient_checkpointing_kwargs`: None
+- `include_inputs_for_metrics`: False
+- `include_for_metrics`: []
+- `eval_do_concat_batches`: True
+- `fp16_backend`: auto
+- `push_to_hub_model_id`: None
+- `push_to_hub_organization`: None
+- `mp_parameters`:
+- `auto_find_batch_size`: False
+- `full_determinism`: False
+- `torchdynamo`: None
+- `ray_scope`: last
+- `ddp_timeout`: 1800
+- `torch_compile`: False
+- `torch_compile_backend`: None
+- `torch_compile_mode`: None
+- `include_tokens_per_second`: False
+- `include_num_input_tokens_seen`: False
+- `neftune_noise_alpha`: None
+- `optim_target_modules`: None
+- `batch_eval_metrics`: False
+- `eval_on_start`: False
+- `use_liger_kernel`: False
+- `liger_kernel_config`: None
+- `eval_use_gather_object`: False
+- `average_tokens_across_devices`: False
+- `prompts`: None
+- `batch_sampler`: batch_sampler
+- `multi_dataset_batch_sampler`: round_robin
+- `router_mapping`: {}
+- `learning_rate_mapping`: {}
+</details>
+### Training Logs
+| Epoch  | Step | Training Loss |
+|:------:|:----:|:-------------:|
+| 0.1500 | 500  | 0.0296        |
+| 0.2999 | 1000 | 0.0138        |
+| 0.4499 | 1500 | 0.0108        |
+| 0.5999 | 2000 | 0.0107        |
+| 0.7499 | 2500 | 0.0061        |
+| 0.8998 | 3000 | 0.0052        |
+### Framework Versions
+- Python: 3.12.11
+- Sentence Transformers: 5.1.0
+- Transformers: 4.56.1
+- PyTorch: 2.8.0+cu128
+- Accelerate: 1.10.1
+- Datasets: 4.0.0
+- Tokenizers: 0.22.0
+## Citation
+### BibTeX
+#### Sentence Transformers
+```bibtex
+@inproceedings{reimers-2019-sentence-bert,
+    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
+    author = "Reimers, Nils and Gurevych, Iryna",
+    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
+    month = "11",
+    year = "2019",
+    publisher = "Association for Computational Linguistics",
+    url = "https://arxiv.org/abs/1908.10084",
+}
+```
+#### MultipleNegativesRankingLoss
+```bibtex
+@misc{henderson2017efficient,
+    title={Efficient Natural Language Response Suggestion for Smart Reply},
+    author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
+    year={2017},
+    eprint={1705.00652},
+    archivePrefix={arXiv},
+    primaryClass={cs.CL}
+}
+```
+<!--
+## Glossary
+*Clearly define terms in order to be accessible across audiences.*
+-->
+<!--
+## Model Card Authors
+*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
+-->
+<!--
+## Model Card Contact
+*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
+-->

added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "<image_soft_token>": 262144
+}

config.json ADDED Viewed

	@@ -0,0 +1,60 @@

+{
+  "_sliding_window_pattern": 6,
+  "architectures": [
+    "Gemma3TextModel"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "attn_logit_softcapping": null,
+  "bos_token_id": 2,
+  "dtype": "float32",
+  "eos_token_id": 1,
+  "final_logit_softcapping": null,
+  "head_dim": 256,
+  "hidden_activation": "gelu_pytorch_tanh",
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 1152,
+  "layer_types": [
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 2048,
+  "model_type": "gemma3_text",
+  "num_attention_heads": 3,
+  "num_hidden_layers": 24,
+  "num_key_value_heads": 1,
+  "pad_token_id": 0,
+  "query_pre_attn_scalar": 256,
+  "rms_norm_eps": 1e-06,
+  "rope_local_base_freq": 10000.0,
+  "rope_scaling": null,
+  "rope_theta": 1000000.0,
+  "sliding_window": 512,
+  "transformers_version": "4.56.1",
+  "use_bidirectional_attention": true,
+  "use_cache": true,
+  "vocab_size": 262144
+}

config_sentence_transformers.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "model_type": "SentenceTransformer",
+  "__version__": {
+    "sentence_transformers": "5.1.0",
+    "transformers": "4.56.1",
+    "pytorch": "2.8.0+cu128"
+  },
+  "prompts": {
+    "query": "task: search result | query: ",
+    "document": "title: none | text: ",
+    "BitextMining": "task: search result | query: ",
+    "Clustering": "task: clustering | query: ",
+    "Classification": "task: classification | query: ",
+    "InstructionRetrieval": "task: code retrieval | query: ",
+    "MultilabelClassification": "task: classification | query: ",
+    "PairClassification": "task: sentence similarity | query: ",
+    "Reranking": "task: search result | query: ",
+    "Retrieval": "task: search result | query: ",
+    "Retrieval-query": "task: search result | query: ",
+    "Retrieval-document": "title: none | text: ",
+    "STS": "task: sentence similarity | query: ",
+    "Summarization": "task: summarization | query: "
+  },
+  "default_prompt_name": null,
+  "similarity_fn_name": "cosine"
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dd003923e6e618cdda5547e7493e52f6ceb7fc31b895ee6fd9b9fed0a30b7fa8
+size 1211486072

modules.json ADDED Viewed

	@@ -0,0 +1,32 @@

+[
+  {
+    "idx": 0,
+    "name": "0",
+    "path": "",
+    "type": "sentence_transformers.models.Transformer"
+  },
+  {
+    "idx": 1,
+    "name": "1",
+    "path": "1_Pooling",
+    "type": "sentence_transformers.models.Pooling"
+  },
+  {
+    "idx": 2,
+    "name": "2",
+    "path": "2_Dense",
+    "type": "sentence_transformers.models.Dense"
+  },
+  {
+    "idx": 3,
+    "name": "3",
+    "path": "3_Dense",
+    "type": "sentence_transformers.models.Dense"
+  },
+  {
+    "idx": 4,
+    "name": "4",
+    "path": "4_Normalize",
+    "type": "sentence_transformers.models.Normalize"
+  }
+]

sentence_bert_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+    "max_seq_length": 2048,
+    "do_lower_case": false
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "boi_token": "<start_of_image>",
+  "bos_token": {
+    "content": "<bos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eoi_token": "<end_of_image>",
+  "eos_token": {
+    "content": "<eos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "image_token": "<image_soft_token>",
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:216e2a79606fe879c9f17c529c71cd241338407fd5646b595ffd3c4b9ea1d503
+size 33385262

tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c
+size 4689074

tokenizer_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff