# Import necessary libraries import numpy as np import matplotlib.pyplot as plt from sentence_transformers import SentenceTransformer from sklearn.metrics.pairwise import euclidean_distances class SentenceEuclideanDistanceCalculator: """ A class to calculate and analyze Euclidean distance between an original sentence and paraphrased sentences. """ def __init__(self, original_sentence, paraphrased_sentences): """ Initialize the calculator with the original sentence and a list of paraphrased sentences. """ self.original_sentence = original_sentence self.paraphrased_sentences = paraphrased_sentences # Load SentenceTransformer model for embedding calculation self.model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') # Precompute the original sentence embedding self.original_embedding = self.model.encode(original_sentence, convert_to_tensor=True) # Calculate Euclidean distances and normalize them self.euclidean_distances = self._calculate_all_metrics() self.normalized_euclidean = self._normalize_dict(self.euclidean_distances) def _calculate_all_metrics(self): """ Calculate Euclidean distance between the original and each paraphrased sentence. """ distances = {} paraphrase_embeddings = self.model.encode(self.paraphrased_sentences, convert_to_tensor=True) for idx, paraphrase_embedding in enumerate(paraphrase_embeddings): key = f"Sentence_{idx + 1}" distances[key] = euclidean_distances([self.original_embedding], [paraphrase_embedding])[0][0] return distances def _normalize_dict(self, metric_dict): """ Normalize the values in a dictionary to be between 0 and 1. """ values = np.array(list(metric_dict.values())) min_val, max_val = values.min(), values.max() # Normalize values normalized_values = (values - min_val) / (max_val - min_val) if max_val > min_val else np.zeros_like(values) return dict(zip(metric_dict.keys(), normalized_values)) def plot_metrics(self): """ Plot the normalized Euclidean distances in a graph. """ keys = list(self.normalized_euclidean.keys()) indices = np.arange(len(keys)) plt.figure(figsize=(12, 6)) plt.plot(indices, [self.normalized_euclidean[key] for key in keys], marker='o', color=np.random.rand(3,)) plt.xlabel('Sentence Index') plt.ylabel('Normalized Euclidean Distance (0-1)') plt.title('Normalized Euclidean Distance') plt.grid(True) plt.tight_layout() plt.show() # Getter methods def get_normalized_metrics(self): """ Get the normalized Euclidean distances as a dictionary. """ return self.normalized_euclidean