import streamlit as st import pandas as pd from backend import inference from backend.config import MODELS_ID, QA_MODELS_ID, SEARCH_MODELS_ID st.title('Demo using Flax-Sentence-Tranformers') st.sidebar.title('Tasks') menu = st.sidebar.radio("", options=["Sentence Similarity", "Asymmetric QA", "Search / Cluster"], index=0) st.markdown(''' Hi! This is the demo for the [flax sentence embeddings](https://huggingface.co/flax-sentence-embeddings) created for the **Flax/JAX community week 🤗**. We are going to use three flax-sentence-embeddings models: a **distilroberta base**, a **mpnet base** and a **minilm-l6**. All were trained on all the dataset of the 1B+ train corpus with the v3 setup. ''') if menu == "Sentence Similarity": st.header('Sentence Similarity') st.markdown(''' **Instructions**: You can compare the similarity of a main text with other texts of your choice. In the background, we'll create an embedding for each text, and then we'll use the cosine similarity function to calculate a similarity metric between our main sentence and the others. For more cool information on sentence embeddings, see the [sBert project](https://www.sbert.net/examples/applications/computing-embeddings/README.html). ''') select_models = st.multiselect("Choose models", options=list(MODELS_ID), default=list(MODELS_ID)[0]) anchor = st.text_input( 'Please enter here the main text you want to compare:' ) n_texts = st.number_input( f'''How many texts you want to compare with: '{anchor}'?''', value=2, min_value=2) inputs = [] for i in range(int(n_texts)): input = st.text_input(f'Text {i + 1}:') inputs.append(input) if st.button('Tell me the similarity.'): results = {model: inference.text_similarity(anchor, inputs, model, MODELS_ID) for model in select_models} df_results = {model: results[model] for model in results} index = [f"{idx + 1}:{input[:min(15, len(input))]}..." for idx, input in enumerate(inputs)] df_total = pd.DataFrame(index=index) for key, value in df_results.items(): df_total[key] = list(value['score'].values) st.write('Here are the results for selected models:') st.write(df_total) st.write('Visualize the results of each model:') st.line_chart(df_total) elif menu == "Asymmetric QA": st.header('Asymmetric QA') st.markdown(''' **Instructions**: You can compare the Answer likeliness of a given Query with answer candidates of your choice. In the background, we'll create an embedding for each answers, and then we'll use the cosine similarity function to calculate a similarity metric between our query sentence and the others. `mpnet_asymmetric_qa` model works best for hard negative answers or distinguishing similar queries due to separate models applied for encoding questions and answers. For more cool information on sentence embeddings, see the [sBert project](https://www.sbert.net/examples/applications/computing-embeddings/README.html). ''') select_models = st.multiselect("Choose models", options=list(QA_MODELS_ID), default=list(QA_MODELS_ID)[0]) anchor = st.text_input( 'Please enter here the query you want to compare with given answers:', value="What is the weather in Paris?" ) n_texts = st.number_input( f'''How many answers you want to compare with: '{anchor}'?''', value=10, min_value=2) inputs = [] defaults = ["It is raining in Paris right now with 70 F temperature.", "What is the weather in Berlin?", "I have 3 brothers."] for i in range(int(n_texts)): input = st.text_input(f'Answer {i + 1}:', value=defaults[i] if i < len(defaults) else "") inputs.append(input) if st.button('Tell me Answer likeliness.'): results = {model: inference.text_similarity(anchor, inputs, model, QA_MODELS_ID) for model in select_models} df_results = {model: results[model] for model in results} index = [f"{idx + 1}:{input[:min(15, len(input))]}..." for idx, input in enumerate(inputs)] df_total = pd.DataFrame(index=index) for key, value in df_results.items(): df_total[key] = list(value['score'].values) st.write('Here are the results for selected models:') st.write(df_total) st.write('Visualize the results of each model:') st.line_chart(df_total) elif menu == "Search / Cluster": st.header('Search / Cluster') st.markdown(''' **Instructions**: Make a query for anything related to "Python" and the model you choose will return you similar queries. For more cool information on sentence embeddings, see the [sBert project](https://www.sbert.net/examples/applications/computing-embeddings/README.html). ''') select_models = st.multiselect("Choose models", options=list(SEARCH_MODELS_ID), default=list(SEARCH_MODELS_ID)[0]) anchor = st.text_input( 'Please enter here your query about "Python", we will look for similar ones:', value="How do I sort a dataframe by column" ) n_texts = st.number_input( f'''How many similar queries you want?''', value=3, min_value=2) if st.button('Give me my search.'): results = {model: inference.text_search(anchor, n_texts, model, QA_MODELS_ID) for model in select_models} st.table(pd.DataFrame(results[select_models[0]]).T) if st.button('3D Clustering of search result (new window)'): fig = inference.text_cluster(anchor, 1000, select_models[0], QA_MODELS_ID) fig.show()