Text Embedder

import plotly.express as px
import streamlit as st
from sentence_transformers import SentenceTransformer
from huggingface_hub import hf_hub_url, cached_download
import umap.umap_ as umap
import pandas as pd
import os
import joblib

def init_models():
    model_name = 'sentence-transformers/all-MiniLM-L6-v2'
    model = SentenceTransformer(model_name)
    REPO_ID = "peter2000/umap_embed_3d_all-MiniLM-L6-v2"
    FILENAME = "umap_embed_3d_all-MiniLM-L6-v2.sav"
    umap_model= joblib.load(cached_download(hf_hub_url(REPO_ID, FILENAME)))
    return model, umap_model

def app():

    with st.container():
        st.markdown("<h1 style='text-align: center;  \
                      color: black;'> Text Embedder</h1>", 
                      unsafe_allow_html=True)
        st.write(' ')
        st.write(' ')

    with st.expander("ℹ️ - About this app", expanded=True):

        st.write(
            """     
            Information cartography - Get your word/phrase/sentence/paragraph embedded and visualized.
            The (English) sentence-transformers model "all-MiniLM-L6-v2" maps sentences & paragraphs to a 384-dimensional dense vector space This is normally used for tasks like clustering or semantic search, but in this case, we use it to place your text to a 3D map. Before plotting, the dimension needs to be reduced to three so we can actually plot it, but preserve as much information as possible. For this, we use a technology called umap. The sentence transformer is context-sensitive and works best with whole sentences, to account for that we extend your text with "The book is about <text>".
            
            Simply put in your text and press EMBED, your examples will add up. You can use the category for different coloring.
            """)

        st.markdown("")
        
    word_to_embed_list =  st.session_state['embed_list']
    cat_list =  st.session_state['cat_list']
    
    with st.container():
        col1, col2 = st.columns(2)
        with col1:
            word_to_embed= st.text_input("Please enter your text here and we will embed it for you.", value="",)
        with col2:                                 
            cat= st.selectbox('Category',  ('1', '2', '3', '4', '5'))

        
        if st.button("Embed"):
            with st.spinner("👑 Embedding your input"):
                
                model, umap_model = init_models()
                
              
                word_to_embed_list.append(word_to_embed)
                
                st.session_state['embed_list'] = word_to_embed_list
                cat_list .append(cat)
                st.session_state['cat_list '] = cat_list 
                
                phrase_to_embed = ["The book is about "+ wte for wte in word_to_embed_list]
                examples_embeddings = model.encode(phrase_to_embed)
 
                examples_umap = umap_model.transform(examples_embeddings)

                #st.write(len(examples_umap))
                
                with st.spinner("👑 create visualisation"):  
                      fig = px.scatter_3d(
                          examples_umap[1:] , x=0, y=1, z=2,
                           color=cat_list[1:] ,
                          opacity = .7,    hover_data=[word_to_embed_list[1:]])
                      fig.update_scenes(xaxis_visible=False, yaxis_visible=False,zaxis_visible=False )
                      fig.update_traces(marker_size=4)
                      st.plotly_chart(fig)