Automatically add EOS via Tokenizer, integrate Sentence Transformers (#1)
Browse files- Automatically add EOS via Tokenizer, integrate Sentence Transformers (fd17b9cd89d6cc5b416d4b66ea25da0bea7f2bb0)
- Remove eod_id line from README (7bd6fbe3c54b9ec2b4b1cc3a052720a76fcf0d90)
- 1_Pooling/config.json +10 -0
- README.md +54 -11
- config_sentence_transformers.json +8 -0
- modules.json +20 -0
- tokenizer.json +2 -2
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 4096,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": false,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": true,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
CHANGED
@@ -2,7 +2,11 @@
|
|
2 |
license: apache-2.0
|
3 |
base_model:
|
4 |
- Qwen/Qwen3-8B-Base
|
5 |
-
|
|
|
|
|
|
|
|
|
6 |
---
|
7 |
# Qwen3-Embedding-8B
|
8 |
|
@@ -53,6 +57,47 @@ With Transformers versions earlier than 4.51.0, you may encounter the following
|
|
53 |
KeyError: 'qwen3'
|
54 |
```
|
55 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
### Transformers Usage
|
57 |
|
58 |
```python
|
@@ -79,14 +124,6 @@ def last_token_pool(last_hidden_states: Tensor,
|
|
79 |
def get_detailed_instruct(task_description: str, query: str) -> str:
|
80 |
return f'Instruct: {task_description}\nQuery:{query}'
|
81 |
|
82 |
-
def tokenize(tokenizer, input_texts, eod_id, max_length):
|
83 |
-
batch_dict = tokenizer(input_texts, padding=False, truncation=True, max_length=max_length-2)
|
84 |
-
for seq, att in zip(batch_dict["input_ids"], batch_dict["attention_mask"]):
|
85 |
-
seq.append(eod_id)
|
86 |
-
att.append(1)
|
87 |
-
batch_dict = tokenizer.pad(batch_dict, padding=True, return_tensors="pt")
|
88 |
-
return batch_dict
|
89 |
-
|
90 |
# Each query must come with a one-sentence instruction that describes the task
|
91 |
task = 'Given a web search query, retrieve relevant passages that answer the query'
|
92 |
|
@@ -107,11 +144,16 @@ model = AutoModel.from_pretrained('Qwen/Qwen3-Embedding-8B')
|
|
107 |
# We recommend enabling flash_attention_2 for better acceleration and memory saving.
|
108 |
# model = AutoModel.from_pretrained('Qwen/Qwen3-Embedding-8B', attn_implementation="flash_attention_2", torch_dtype=torch.float16).cuda()
|
109 |
|
110 |
-
eod_id = tokenizer.convert_tokens_to_ids("<|endoftext|>")
|
111 |
max_length = 8192
|
112 |
|
113 |
# Tokenize the input texts
|
114 |
-
batch_dict =
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
batch_dict.to(model.device)
|
116 |
outputs = model(**batch_dict)
|
117 |
embeddings = last_token_pool(outputs.last_hidden_state, batch_dict['attention_mask'])
|
@@ -120,6 +162,7 @@ embeddings = last_token_pool(outputs.last_hidden_state, batch_dict['attention_ma
|
|
120 |
embeddings = F.normalize(embeddings, p=2, dim=1)
|
121 |
scores = (embeddings[:2] @ embeddings[2:].T)
|
122 |
print(scores.tolist())
|
|
|
123 |
```
|
124 |
📌 **Tip**: We recommend that developers customize the `instruct` according to their specific scenarios, tasks, and languages. Our tests have shown that in most retrieval scenarios, not using an `instruct` on the query side can lead to a drop in retrieval performance by approximately 1% to 5%.
|
125 |
|
|
|
2 |
license: apache-2.0
|
3 |
base_model:
|
4 |
- Qwen/Qwen3-8B-Base
|
5 |
+
tags:
|
6 |
+
- transformers
|
7 |
+
- sentence-transformers
|
8 |
+
- sentence-similarity
|
9 |
+
- feature-extraction
|
10 |
---
|
11 |
# Qwen3-Embedding-8B
|
12 |
|
|
|
57 |
KeyError: 'qwen3'
|
58 |
```
|
59 |
|
60 |
+
### Sentence Transformers Usage
|
61 |
+
|
62 |
+
```python
|
63 |
+
# Requires transformers>=4.51.0
|
64 |
+
|
65 |
+
from sentence_transformers import SentenceTransformer
|
66 |
+
|
67 |
+
# Load the model
|
68 |
+
model = SentenceTransformer("Qwen/Qwen3-Embedding-8B")
|
69 |
+
|
70 |
+
# We recommend enabling flash_attention_2 for better acceleration and memory saving,
|
71 |
+
# together with setting `padding_side` to "left":
|
72 |
+
# model = SentenceTransformer(
|
73 |
+
# "Qwen/Qwen3-Embedding-8B",
|
74 |
+
# model_kwargs={"attn_implementation": "flash_attention_2", "device_map": "auto"},
|
75 |
+
# tokenizer_kwargs={"padding_side": "left"},
|
76 |
+
# )
|
77 |
+
|
78 |
+
# The queries and documents to embed
|
79 |
+
queries = [
|
80 |
+
"What is the capital of China?",
|
81 |
+
"Explain gravity",
|
82 |
+
]
|
83 |
+
documents = [
|
84 |
+
"The capital of China is Beijing.",
|
85 |
+
"Gravity is a force that attracts two bodies towards each other. It gives weight to physical objects and is responsible for the movement of planets around the sun.",
|
86 |
+
]
|
87 |
+
|
88 |
+
# Encode the queries and documents. Note that queries benefit from using a prompt
|
89 |
+
# Here we use the prompt called "query" stored under `model.prompts`, but you can
|
90 |
+
# also pass your own prompt via the `prompt` argument
|
91 |
+
query_embeddings = model.encode(queries, prompt_name="query")
|
92 |
+
document_embeddings = model.encode(documents)
|
93 |
+
|
94 |
+
# Compute the (cosine) similarity between the query and document embeddings
|
95 |
+
similarity = model.similarity(query_embeddings, document_embeddings)
|
96 |
+
print(similarity)
|
97 |
+
# tensor([[0.7493, 0.0751],
|
98 |
+
# [0.0880, 0.6318]])
|
99 |
+
```
|
100 |
+
|
101 |
### Transformers Usage
|
102 |
|
103 |
```python
|
|
|
124 |
def get_detailed_instruct(task_description: str, query: str) -> str:
|
125 |
return f'Instruct: {task_description}\nQuery:{query}'
|
126 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
127 |
# Each query must come with a one-sentence instruction that describes the task
|
128 |
task = 'Given a web search query, retrieve relevant passages that answer the query'
|
129 |
|
|
|
144 |
# We recommend enabling flash_attention_2 for better acceleration and memory saving.
|
145 |
# model = AutoModel.from_pretrained('Qwen/Qwen3-Embedding-8B', attn_implementation="flash_attention_2", torch_dtype=torch.float16).cuda()
|
146 |
|
|
|
147 |
max_length = 8192
|
148 |
|
149 |
# Tokenize the input texts
|
150 |
+
batch_dict = tokenizer(
|
151 |
+
input_texts,
|
152 |
+
padding=True,
|
153 |
+
truncation=True,
|
154 |
+
max_length=max_length,
|
155 |
+
return_tensors="pt",
|
156 |
+
)
|
157 |
batch_dict.to(model.device)
|
158 |
outputs = model(**batch_dict)
|
159 |
embeddings = last_token_pool(outputs.last_hidden_state, batch_dict['attention_mask'])
|
|
|
162 |
embeddings = F.normalize(embeddings, p=2, dim=1)
|
163 |
scores = (embeddings[:2] @ embeddings[2:].T)
|
164 |
print(scores.tolist())
|
165 |
+
# [[0.7493016123771667, 0.0750647559762001], [0.08795969933271408, 0.6318399906158447]]
|
166 |
```
|
167 |
📌 **Tip**: We recommend that developers customize the `instruct` according to their specific scenarios, tasks, and languages. Our tests have shown that in most retrieval scenarios, not using an `instruct` on the query side can lead to a drop in retrieval performance by approximately 1% to 5%.
|
168 |
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"prompts": {
|
3 |
+
"query": "Instruct: Given a web search query, retrieve relevant passages that answer the query\nQuery:",
|
4 |
+
"document": ""
|
5 |
+
},
|
6 |
+
"default_prompt_name": null,
|
7 |
+
"similarity_fn_name": "cosine"
|
8 |
+
}
|
modules.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"idx": 2,
|
16 |
+
"name": "2",
|
17 |
+
"path": "2_Normalize",
|
18 |
+
"type": "sentence_transformers.models.Normalize"
|
19 |
+
}
|
20 |
+
]
|
tokenizer.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:83cdf8c3a34f68862319cb1810ee7b1e2c0a44e0864ae930194ddb76bb7feb8d
|
3 |
+
size 11422947
|