In [1]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('all-MiniLM-L6-v2')

#Our sentences we like to encode
sentences = ['This framework generates embeddings for each input sentence',
 'Sentences are passed as a list of string.',
 'The quick brown fox jumps over the lazy dog.']

#Sentences are encoded by calling model.encode()
embeddings = model.encode(sentences)

#Print the embeddings
for sentence, embedding in zip(sentences, embeddings):
 print("Sentence:", sentence)
 print("Embedding:", type(embedding), embedding.size)
 print("")

Sentence: This framework generates embeddings for each input sentence
Embedding: 384

Sentence: Sentences are passed as a list of string.
Embedding: 384

Sentence: The quick brown fox jumps over the lazy dog.
Embedding: 384



In [2]:

from sentence_transformers import SentenceTransformer, util
model = SentenceTransformer('all-MiniLM-L6-v2')

#Sentences are encoded by calling model.encode()
emb1 = model.encode("This is a red cat with a hat.")
emb2 = model.encode("Have you seen my red cat?")

cos_sim = util.cos_sim(emb1, emb2)
print("Cosine-Similarity:", cos_sim)

Cosine-Similarity: tensor([[0.6153]])


In [3]:
from sentence_transformers import SentenceTransformer, util
model = SentenceTransformer('all-MiniLM-L6-v2')

sentences = ['A man is eating food.',
 'A man is eating a piece of bread.',
 'The girl is carrying a baby.',
 'A man is riding a horse.',
 'A woman is playing violin.',
 'Two men pushed carts through the woods.',
 'A man is riding a white horse on an enclosed ground.',
 'A monkey is playing drums.',
 'Someone in a gorilla costume is playing a set of drums.'
 ]

#Encode all sentences
embeddings = model.encode(sentences)

#Compute cosine similarity between all pairs
cos_sim = util.cos_sim(embeddings, embeddings)

#Add all pairs to a list with their cosine similarity score
all_sentence_combinations = []
for i in range(len(cos_sim)-1):
 for j in range(i+1, len(cos_sim)):
 all_sentence_combinations.append([cos_sim[i][j], i, j])

#Sort list by the highest cosine similarity score
all_sentence_combinations = sorted(all_sentence_combinations, key=lambda x: x[0], reverse=True)

print("Top-5 most similar pairs:")
for score, i, j in all_sentence_combinations[0:5]:
 print("{} \t {} \t {:.4f}".format(sentences[i], sentences[j], cos_sim[i][j]))

Top-5 most similar pairs:
A man is eating food. 	 A man is eating a piece of bread. 	 0.7553
A man is riding a horse. 	 A man is riding a white horse on an enclosed ground. 	 0.7369
A monkey is playing drums. 	 Someone in a gorilla costume is playing a set of drums. 	 0.6433
A woman is playing violin. 	 Someone in a gorilla costume is playing a set of drums. 	 0.2564
A man is eating food. 	 A man is riding a horse. 	 0.2474


In [31]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('all-distilroberta-v1')

In [32]:
sentences = ['This framework generates embeddings for each input sentence',
 'Sentences are passed as a list of string.',
 'The quick brown fox jumps over the lazy dog.']

embeddings = model.encode(sentences)

for sentence, embedding in zip(sentences, embeddings):
 print("Sentence:", sentence)
 print("Embedding:", type(embedding), embedding.size)
 print("")

Sentence: This framework generates embeddings for each input sentence
Embedding: 768

Sentence: Sentences are passed as a list of string.
Embedding: 768

Sentence: The quick brown fox jumps over the lazy dog.
Embedding: 768



In [None]:
from sentence_transformers import SentenceTransformer, models

word_embedding_model = models.Transformer('distilroberta-base')

## Step 2: use a pool function over the token embeddings
pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension())

## Join steps 1 and 2 using the modules argument
model = SentenceTransformer(modules=[word_embedding_model, pooling_model])