Spaces:
Runtime error
Runtime error
# Import necessary libraries | |
import numpy as np | |
import matplotlib.pyplot as plt | |
from sentence_transformers import SentenceTransformer | |
from sklearn.metrics.pairwise import euclidean_distances | |
class SentenceEuclideanDistanceCalculator: | |
""" | |
A class to calculate and analyze Euclidean distance between an original sentence and paraphrased sentences. | |
""" | |
def __init__(self, original_sentence, paraphrased_sentences): | |
""" | |
Initialize the calculator with the original sentence and a list of paraphrased sentences. | |
""" | |
self.original_sentence = original_sentence | |
self.paraphrased_sentences = paraphrased_sentences | |
# Load SentenceTransformer model for embedding calculation | |
self.model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') | |
# Precompute the original sentence embedding | |
self.original_embedding = self.model.encode(original_sentence, convert_to_tensor=True) | |
# Calculate Euclidean distances and normalize them | |
self.euclidean_distances = self._calculate_all_metrics() | |
self.normalized_euclidean = self._normalize_dict(self.euclidean_distances) | |
def _calculate_all_metrics(self): | |
""" | |
Calculate Euclidean distance between the original and each paraphrased sentence. | |
""" | |
distances = {} | |
paraphrase_embeddings = self.model.encode(self.paraphrased_sentences, convert_to_tensor=True) | |
for idx, paraphrase_embedding in enumerate(paraphrase_embeddings): | |
key = f"Sentence_{idx + 1}" | |
distances[key] = euclidean_distances([self.original_embedding], [paraphrase_embedding])[0][0] | |
return distances | |
def _normalize_dict(self, metric_dict): | |
""" | |
Normalize the values in a dictionary to be between 0 and 1. | |
""" | |
values = np.array(list(metric_dict.values())) | |
min_val, max_val = values.min(), values.max() | |
# Normalize values | |
normalized_values = (values - min_val) / (max_val - min_val) if max_val > min_val else np.zeros_like(values) | |
return dict(zip(metric_dict.keys(), normalized_values)) | |
def plot_metrics(self): | |
""" | |
Plot the normalized Euclidean distances in a graph. | |
""" | |
keys = list(self.normalized_euclidean.keys()) | |
indices = np.arange(len(keys)) | |
plt.figure(figsize=(12, 6)) | |
plt.plot(indices, [self.normalized_euclidean[key] for key in keys], marker='o', color=np.random.rand(3,)) | |
plt.xlabel('Sentence Index') | |
plt.ylabel('Normalized Euclidean Distance (0-1)') | |
plt.title('Normalized Euclidean Distance') | |
plt.grid(True) | |
plt.tight_layout() | |
plt.show() | |
# Getter methods | |
def get_normalized_metrics(self): | |
""" | |
Get the normalized Euclidean distances as a dictionary. | |
""" | |
return self.normalized_euclidean | |