peter2000 commited on
Commit
90dfdac
1 Parent(s): 65131af

Update apps/intro.py

Browse files
Files changed (1) hide show
  1. apps/intro.py +2 -2
apps/intro.py CHANGED
@@ -29,7 +29,7 @@ def app():
29
  st.write(
30
  """
31
  Information cartography - Get your word/phrase/sentence/paragraph embedded and visualized.
32
- The (English) sentence-transformers model "all-MiniLM-L6-v2" maps sentences & paragraphs to a 384 dimensional dense vector space This is normally used for tasks like clustering or semantic search, but in this case, we use it to place your text to a 3D map. Before plotting, the dimension needs to be reduced to three so we can actually plot it, but preserve as much information as possible. For this, we use a technology called umap.
33
 
34
  Simply put in your text and press EMBED, your examples will add up. You can use the category for different coloring.
35
  """)
@@ -59,7 +59,7 @@ def app():
59
  cat_list .append(cat)
60
  st.session_state['cat_list '] = cat_list
61
 
62
- phrase_to_embed = ["The book is about "+ wte for wte in word_to_embed_list]
63
  examples_embeddings = model.encode(phrase_to_embed)
64
 
65
  examples_umap = umap_model.transform(examples_embeddings)
 
29
  st.write(
30
  """
31
  Information cartography - Get your word/phrase/sentence/paragraph embedded and visualized.
32
+ The (English) sentence-transformers model "all-MiniLM-L6-v2" maps sentences & paragraphs to a 384 dimensional dense vector space This is normally used for tasks like clustering or semantic search, but in this case, we use it to place your text to a 3D map. Before plotting, the dimension needs to be reduced to three so we can actually plot it, but preserve as much information as possible. For this, we use a technology called umap. The sentence transformer is context sensitive and works best with whole sentences, to account for that we extend your text with "The book is about <text>" if its less than 15 characters.
33
 
34
  Simply put in your text and press EMBED, your examples will add up. You can use the category for different coloring.
35
  """)
 
59
  cat_list .append(cat)
60
  st.session_state['cat_list '] = cat_list
61
 
62
+ phrase_to_embed = ["The book is about "+ wte for wte in word_to_embed_list if len(wte) <15]
63
  examples_embeddings = model.encode(phrase_to_embed)
64
 
65
  examples_umap = umap_model.transform(examples_embeddings)