Spaces:
Runtime error
Runtime error
Edit app.py to add textual content
Browse files
app.py
CHANGED
@@ -10,19 +10,26 @@ st.sidebar.write(
|
|
10 |
)
|
11 |
st.sidebar.title("Dataset")
|
12 |
st.sidebar.write(
|
13 |
-
"We used a subset of
|
14 |
)
|
15 |
|
16 |
st.title("How to understand large textual datasets?")
|
17 |
-
|
|
|
|
|
18 |
df = pd.read_csv("data/data_sample.csv", index_col=[0])
|
19 |
df = df[["message_id", "text"]]
|
20 |
df = df.head(300)
|
21 |
st.dataframe(df, use_container_width=True)
|
22 |
st.title("Inside the OASST2 dataset")
|
23 |
-
element = open("images/map_prompt.html", "r", encoding="utf-8"
|
|
|
24 |
|
25 |
components.html(element.read(), height=900, width=900)
|
|
|
|
|
|
|
|
|
26 |
|
27 |
st.title("Some insights by territory")
|
28 |
df_info = pd.read_csv("data/topics_info.csv", index_col=[0])
|
|
|
10 |
)
|
11 |
st.sidebar.title("Dataset")
|
12 |
st.sidebar.write(
|
13 |
+
"We used a subset of the Open Assistant 2 dataset: https://huggingface.co/datasets/OpenAssistant/oasst2"
|
14 |
)
|
15 |
|
16 |
st.title("How to understand large textual datasets?")
|
17 |
+
st.info(
|
18 |
+
"We sampled every prompt from the English subset of the oasst2 dataset. Here is a sample:"
|
19 |
+
)
|
20 |
df = pd.read_csv("data/data_sample.csv", index_col=[0])
|
21 |
df = df[["message_id", "text"]]
|
22 |
df = df.head(300)
|
23 |
st.dataframe(df, use_container_width=True)
|
24 |
st.title("Inside the OASST2 dataset")
|
25 |
+
element = open("images/map_prompt.html", "r", encoding="utf-8",
|
26 |
+
caption="This mapping can be extended to include the assistant answers, and the prompts can be selected on a topic basis through the python package, allowing to filter and curate the data.")
|
27 |
|
28 |
components.html(element.read(), height=900, width=900)
|
29 |
+
st.info(
|
30 |
+
"The different clusters allow to explore the main topics evoked by the prompt. For instance, in the blink of an eye, one may see which topics are dealt with and which topics are lacking content. This visualisation can therefore be used as a stepping stone to investigate bias or content, providing resources to fuel the discussion open by the OASS paper available here : https://drive.google.com/file/d/10iR5hKwFqAKhL3umx8muOWSRm7hs5FqX/view"
|
31 |
+
)
|
32 |
+
|
33 |
|
34 |
st.title("Some insights by territory")
|
35 |
df_info = pd.read_csv("data/topics_info.csv", index_col=[0])
|