import streamlit as st import pandas as pd import streamlit.components.v1 as components st.sidebar.image("images/logo.png", use_column_width=True) st.sidebar.write("Bunka Summarizes & Visualizes Information as Maps using LLMs.") st.sidebar.title("Github Page") st.sidebar.write( "Have a look at the following package on GitHub: https://github.com/charlesdedampierre/BunkaTopics" ) st.sidebar.title("Dataset") st.sidebar.write( "We used a subset of Wikipedia dataset: https://huggingface.co/datasets/OpenAssistant/oasst2" ) st.title("How to understand large textual datasets?") df = pd.read_csv("data/data_sample.csv", index_col=[0]) df = df[["message_id", "text"]] df = df.head(300) st.dataframe(df, use_container_width=True) st.title("Inside the OASST2 dataset") element = open("images/map_prompt.html", "r", encoding="utf-8") components.html(element.read(), height=900, width=900) st.title("Some insights by territory") df_info = pd.read_csv("data/topics_info.csv", index_col=[0]) df_info = df_info[["name", "size", "percent"]] df_info["percent"] = df_info["percent"].apply(lambda x: str(int(x)) + "%") df_info = df_info.reset_index(drop=True) st.dataframe(df_info, use_container_width=True) st.title("Bunka Exploration Engine") st.image( "images/pipeline.png", use_column_width=True, )