indobert-embeddings / similarity.py
cassador's picture
First commit
c83926e verified
raw
history blame
684 Bytes
import torch
from sentence_transformers import SentenceTransformer, util
# Path to your model `.bin` file
model_path = "pytorch_model.bin"
# Path to your tokenizer `.json` file
tokenizer_path = "tokenizer.json"
# Load the model
model = SentenceTransformer(model_path)
# Load the tokenizer
tokenizer = util.load_tokenizer(tokenizer_path)
# Your sentences
sentences = ["This is an example sentence", "Each sentence is converted"]
# Preprocess the sentences using the tokenizer
encoded_sentences = tokenizer.encode(sentences, batch_size=None, return_tensors="pt")
# Get the embeddings from the model
embeddings = model(encoded_sentences)
# Print the embeddings
print(embeddings)