# Evaluate Embedding Similarity Metrics

In [2]:
import openai, numpy as np

In [3]:
def cosine_distance(a, b):
 """Calculate the cosine distance between two numpy arrays.
 
 Parameters:
 a (numpy array): First input array.
 b (numpy array): Second input array.
 
 Returns:
 float: Cosine distance between a and b.
 """
 # Calculate dot product and magnitudes of the input arrays
 dot = np.dot(a, b)
 a_mag = np.linalg.norm(a)
 b_mag = np.linalg.norm(b)
 
 if np.isclose(a_mag, 0, rtol=1e-9, atol=1e-12):
 print(f"a_mag is very small: {a_mag}")
 if np.isclose(b_mag, 0, rtol=1e-9, atol=1e-12):
 print(f"b_mag is very small: {b_mag}")
 
 # Calculate and return the cosine distance
 return 1.0 - (dot / (a_mag * b_mag))

In [4]:
def semantically_similar(string1, string2):
 response = openai.Embedding.create(
 input=[string1, string2],
 engine="text-similarity-davinci-001"
 )
 embedding_a = response['data'][0]['embedding']
 embedding_b = response['data'][1]['embedding']
 similarity_score = cosine_distance(embedding_a, embedding_b)
 print(f"similarity: {similarity_score}")

 return similarity_score < 0.2

In [5]:
semantically_similar("fight a war", "water supply"),
semantically_similar("fight a war", "solar energy"),
semantically_similar("fight a war", "defend a country"),
semantically_similar("fight a war", "win a battle"),

similarity: 0.22501948669661986
similarity: 0.2318907843871436
similarity: 0.12933868208210475
similarity: 0.10699853725782704


(True,)

In [6]:
semantically_similar("the sky is blue", "I like to eat")

similarity: 0.2496415604648079


False

In [7]:
semantically_similar("the cat meows", "the feline animal says")

similarity: 0.10193029028713485


True

In [8]:
semantically_similar("what is the best way to win a war?", "strategizing a war")

similarity: 0.19759407795526762


True

In [9]:
semantically_similar("what is the best way to win a war?", "fight a war")

similarity: 0.1949772795717004


True