Mariusz Kossakowski commited on
Commit
47923e1
1 Parent(s): 39df437

Add searching for a specific word

Browse files
Files changed (1) hide show
  1. app.py +23 -2
app.py CHANGED
@@ -83,6 +83,7 @@ DATA_DICT, DATA_DESCRIPTION = load_hf_dataset()
83
  header = st.container()
84
  description = st.container()
85
  dataframe_head = st.container()
 
86
  dataset_statistics = st.container()
87
 
88
  with header:
@@ -96,11 +97,17 @@ with dataframe_head:
96
  filtering_options = DATA_DICT["train"]["target"].unique().tolist()
97
  filtering_options.append("All classes")
98
 
99
- st.header("First 10 observations of train subset")
100
  class_to_show = st.selectbox(
101
  label="Select class to show", options=filtering_options
102
  )
103
- df_to_show = DATA_DICT["train"].copy()
 
 
 
 
 
 
104
  if class_to_show == "All classes":
105
  df_to_show = df_to_show.head(10)
106
  else:
@@ -108,6 +115,20 @@ with dataframe_head:
108
  st.dataframe(df_to_show)
109
  st.text_area(label="Latex code", value=df_to_show.style.to_latex())
110
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  with dataset_statistics:
112
  st.header("Dataset statistics")
113
  st.subheader("Number of samples in each data split")
 
83
  header = st.container()
84
  description = st.container()
85
  dataframe_head = st.container()
86
+ word_searching = st.container()
87
  dataset_statistics = st.container()
88
 
89
  with header:
 
97
  filtering_options = DATA_DICT["train"]["target"].unique().tolist()
98
  filtering_options.append("All classes")
99
 
100
+ st.header("First 10 observations of a chosen class")
101
  class_to_show = st.selectbox(
102
  label="Select class to show", options=filtering_options
103
  )
104
+ df_to_show = pd.concat(
105
+ [
106
+ DATA_DICT["train"].copy(),
107
+ DATA_DICT["validation"].copy(),
108
+ DATA_DICT["test"].copy(),
109
+ ]
110
+ )
111
  if class_to_show == "All classes":
112
  df_to_show = df_to_show.head(10)
113
  else:
 
115
  st.dataframe(df_to_show)
116
  st.text_area(label="Latex code", value=df_to_show.style.to_latex())
117
 
118
+ with word_searching:
119
+ st.header("Observations containing a chosen word")
120
+ searched_word = st.text_input(label="Enter the word you are looking for below")
121
+ df_to_show = pd.concat(
122
+ [
123
+ DATA_DICT["train"].copy(),
124
+ DATA_DICT["validation"].copy(),
125
+ DATA_DICT["test"].copy(),
126
+ ]
127
+ )
128
+ df_to_show = df_to_show.loc[df_to_show["text"].str.contains(searched_word)]
129
+ st.dataframe(df_to_show)
130
+ st.text_area(label="Latex code", value=df_to_show.style.to_latex())
131
+
132
  with dataset_statistics:
133
  st.header("Dataset statistics")
134
  st.subheader("Number of samples in each data split")