Spaces:

GIZ
/

embedding_visualisation

Running

App Files Files Community

embedding_visualisation / apps /intro.py

peter2000

Update apps/intro.py

90dfdac over 2 years ago

raw

history blame

3.53 kB

	import plotly.express as px
	import streamlit as st
	from sentence_transformers import SentenceTransformer
	from huggingface_hub import hf_hub_url, cached_download
	import umap.umap_ as umap
	import pandas as pd
	import os
	import joblib

	def init_models():
	model_name = 'sentence-transformers/all-MiniLM-L6-v2'
	model = SentenceTransformer(model_name)
	REPO_ID = "peter2000/umap_embed_3d_all-MiniLM-L6-v2"
	FILENAME = "umap_embed_3d_all-MiniLM-L6-v2.sav"
	umap_model= joblib.load(cached_download(hf_hub_url(REPO_ID, FILENAME)))
	return model, umap_model

	def app():

	with st.container():
	st.markdown("<h1 style='text-align: center; \
	color: black;'> Text Embedder</h1>",
	unsafe_allow_html=True)
	st.write(' ')
	st.write(' ')

	with st.expander("ℹ️ - About this app", expanded=True):

	st.write(
	"""
	Information cartography - Get your word/phrase/sentence/paragraph embedded and visualized.
	The (English) sentence-transformers model "all-MiniLM-L6-v2" maps sentences & paragraphs to a 384 dimensional dense vector space This is normally used for tasks like clustering or semantic search, but in this case, we use it to place your text to a 3D map. Before plotting, the dimension needs to be reduced to three so we can actually plot it, but preserve as much information as possible. For this, we use a technology called umap. The sentence transformer is context sensitive and works best with whole sentences, to account for that we extend your text with "The book is about <text>" if its less than 15 characters.

	Simply put in your text and press EMBED, your examples will add up. You can use the category for different coloring.
	""")

	st.markdown("")

	word_to_embed_list = st.session_state['embed_list']
	cat_list = st.session_state['cat_list']

	with st.container():
	col1, col2 = st.columns(2)
	with col1:
	word_to_embed= st.text_input("Please enter your text here and we will embed it for you.", value="",)
	with col2:
	cat= st.selectbox('Category', ('1', '2', '3', '4', '5'))


	if st.button("Embed"):
	with st.spinner("👑 Embedding your input"):

	model, umap_model = init_models()


	word_to_embed_list.append(word_to_embed)

	st.session_state['embed_list'] = word_to_embed_list
	cat_list .append(cat)
	st.session_state['cat_list '] = cat_list

	phrase_to_embed = ["The book is about "+ wte for wte in word_to_embed_list if len(wte) <15]
	examples_embeddings = model.encode(phrase_to_embed)

	examples_umap = umap_model.transform(examples_embeddings)

	#st.write(len(examples_umap))

	with st.spinner("👑 create visualisation"):
	fig = px.scatter_3d(
	examples_umap[1:] , x=0, y=1, z=2,
	color=cat_list[1:] ,
	opacity = .7, hover_data=[word_to_embed_list[1:]])
	fig.update_scenes(xaxis_visible=False, yaxis_visible=False,zaxis_visible=False )
	fig.update_traces(marker_size=4)
	st.plotly_chart(fig)