Mariusz Kossakowski commited on
Commit
41b0597
1 Parent(s): 8ea97b8

Add displaying selected domain and text type to polemo2 dataset

Browse files
Files changed (1) hide show
  1. app.py +27 -0
app.py CHANGED
@@ -115,6 +115,33 @@ with dataframe_head:
115
  st.dataframe(df_to_show)
116
  st.text_area(label="Latex code", value=df_to_show.style.to_latex())
117
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  with word_searching:
119
  st.header("Observations containing a chosen word")
120
  searched_word = st.text_input(label="Enter the word you are looking for below")
 
115
  st.dataframe(df_to_show)
116
  st.text_area(label="Latex code", value=df_to_show.style.to_latex())
117
 
118
+ if selected_dataset == "clarin-pl/polemo2-official":
119
+ st.subheader("First 10 observations of a chosen domain and text type")
120
+ domain = st.selectbox(
121
+ label="Select domain",
122
+ options=["all", "hotels", "medicine", "products", "reviews"],
123
+ )
124
+ text_type = st.selectbox(
125
+ label="Select text type", options=["Full text", "Tokenized to sentences"]
126
+ )
127
+ text_type_mapping_dict = {
128
+ "Full text": "text",
129
+ "Tokenized to sentences": "sentence",
130
+ }
131
+
132
+ polemo_subset = load_dataset(
133
+ selected_dataset, f"{domain}_{text_type_mapping_dict[text_type]}"
134
+ )
135
+ df = pd.concat(
136
+ [
137
+ polemo_subset["train"].to_pandas(),
138
+ polemo_subset["validation"].to_pandas(),
139
+ polemo_subset["test"].to_pandas(),
140
+ ]
141
+ ).head(10)
142
+ st.dataframe(df)
143
+ st.text_area(label="Latex code", value=df.style.to_latex())
144
+
145
  with word_searching:
146
  st.header("Observations containing a chosen word")
147
  searched_word = st.text_input(label="Enter the word you are looking for below")