import streamlit as st import torch from sentence_transformers import SentenceTransformer # Load SBERT model (choose a suitable model from https://www.sbert.net/docs/pretrained_models.html) @st.cache_resource def load_sbert(): model = SentenceTransformer('all-MiniLM-L6-v2') # Example model return model model = load_sbert() def calculate_similarity(word1, word2): embeddings1 = model.encode(word1) embeddings2 = model.encode(word2) # Convert NumPy arrays to tensors embeddings1 = torch.tensor(embeddings1) embeddings2 = torch.tensor(embeddings2) cos_sim = torch.nn.functional.cosine_similarity(embeddings1, embeddings2, dim=0) return cos_sim.item() def display_top_5(similarities): # Sort by similarity (descending) top_5_similarities = sorted(similarities, key=lambda item: item[1], reverse=True)[:5] st.subheader("Top 5 Most Similar Words:") for word, similarity in top_5_similarities: st.write(f"- '{word}': {similarity:.4f}") # Streamlit interface st.title("Sentence Similarity Checker") # Instructions in the sidebar st.sidebar.title("Instructions") st.sidebar.write(""" 1. **Enter the Reference Sentence**: Input the sentence or phrase you want to compare against others. 2. **Input a List of Sentences**: Enter multiple sentences or phrases, each on a new line, in the text area. 3. **Click 'Analyze'**: The app will calculate and display the top 5 most similar sentences. 4. **Results**: The top 5 similar sentences along with their similarity scores will be displayed. 5. **Warnings**: If either the reference sentence or the list of sentences is missing, a warning will be shown. """) # Main input area reference_word = st.text_input("Enter the reference Sentence:") word_list = st.text_area("Enter a list of sentences or phrases (one word per line):") if st.button("Analyze"): if reference_word and word_list: # Calculate similarities for the reference phrase against the word list similarities = [] for word in word_list.splitlines(): similarity = calculate_similarity(reference_word, word) similarities.append((word, similarity)) # Find top 5 (We should only do this once outside the loop) display_top_5(similarities) else: st.warning("Please enter a reference word and a list of words.")