peter2000 commited on
Commit
aae2963
1 Parent(s): 90dfdac

Update apps/similarity.py

Browse files
Files changed (1) hide show
  1. apps/similarity.py +12 -2
apps/similarity.py CHANGED
@@ -5,6 +5,15 @@ from sentence_transformers import SentenceTransformer
5
 
6
  def app():
7
  st.title("Text Similarity")
 
 
 
 
 
 
 
 
 
8
 
9
  model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
10
 
@@ -15,12 +24,13 @@ def app():
15
  with col2:
16
  word_to_embed2 = st.text_input("Text 2", value="COP27 opens amid compounding crises of war, warming and economic instability.",)
17
 
18
- if st.button("Embed"):
19
  with st.spinner("Embedding comparing your inputs"):
20
 
21
  document = [word_to_embed1 ,word_to_embed2]
 
22
  #Encode paragraphs
23
- document_embeddings = model.encode(document, show_progress_bar=False)
24
  #Compute cosine similarity between labels sentences and paragraphs
25
  similarity_matrix = cosine_similarity(document_embeddings)
26
 
 
5
 
6
  def app():
7
  st.title("Text Similarity")
8
+ with st.expander("ℹ️ - About this app", expanded=True):
9
+
10
+ st.write(
11
+ """
12
+ Information cartography - Get your word/phrase/sentence/paragraph embedded and visualized.
13
+ The (English) sentence-transformers model "all-MiniLM-L6-v2" maps sentences & paragraphs to a 384 dimensional dense vector space This is normally used for tasks like clustering or semantic search, but in this case, we use it to calculate the (cosine) similarity. The sentence transformer is context sensitive and works best with whole sentences, to account for that we extend your text with "The book is about <text>" if its less than 15 characters.
14
+
15
+ Simply put in your text and press COMPARE, the higher the similarity the closer the text in the embedding space (max 1).
16
+ """)
17
 
18
  model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
19
 
 
24
  with col2:
25
  word_to_embed2 = st.text_input("Text 2", value="COP27 opens amid compounding crises of war, warming and economic instability.",)
26
 
27
+ if st.button("Comapre"):
28
  with st.spinner("Embedding comparing your inputs"):
29
 
30
  document = [word_to_embed1 ,word_to_embed2]
31
+ documents_embed = ["The book is about "+ wte for wte in document if len(wte) <15]
32
  #Encode paragraphs
33
+ document_embeddings = model.encode(documents_embed , show_progress_bar=False)
34
  #Compute cosine similarity between labels sentences and paragraphs
35
  similarity_matrix = cosine_similarity(document_embeddings)
36