science-lab / embed.py
LawalAfeez's picture
Update embed.py
603b835
raw
history blame contribute delete
616 Bytes
from transformers import AutoTokenizer,TFAutoModel
import torch
from torch import nn
import tensorflow
model_ckpt = "sentence-transformers/multi-qa-mpnet-base-dot-v1"
tokenizer=AutoTokenizer.from_pretrained(model_ckpt)
model=TFAutoModel.from_pretrained(model_ckpt,from_pt=True)
def cls_pool(model):
return model.last_hidden_state[:,0,:]
def sample_embedding(example):
token_output=tokenizer(example,padding=True,truncation=True,return_tensors="tf")
token_output={k:v for k,v in token_output.items()}
model_output=model(**token_output)
return {"embedding":cls_pool(model_output).numpy()[0]}