File size: 616 Bytes
112213c
f93be82
603b835
f93be82
112213c
 
 
 
 
 
 
8a84bf0
112213c
619a263
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
from transformers import AutoTokenizer,TFAutoModel
import torch
from torch import nn
import tensorflow



model_ckpt = "sentence-transformers/multi-qa-mpnet-base-dot-v1"

tokenizer=AutoTokenizer.from_pretrained(model_ckpt)

model=TFAutoModel.from_pretrained(model_ckpt,from_pt=True)


def cls_pool(model):

  return model.last_hidden_state[:,0,:]

def sample_embedding(example):

  token_output=tokenizer(example,padding=True,truncation=True,return_tensors="tf")

  token_output={k:v for k,v in token_output.items()}
  

  model_output=model(**token_output)

  return {"embedding":cls_pool(model_output).numpy()[0]}