nazneen commited on
Commit
39f8f41
·
1 Parent(s): 19bff85
app.py CHANGED
@@ -80,7 +80,7 @@ def data_comparison(df):
80
  ).interactive()
81
 
82
  legend = alt.Chart(df).mark_point().encode(
83
- y=alt.Y('cluster:N', axis=alt.Axis(orient='right'), title=""),
84
  x=alt.X("label"),
85
  shape=alt.Shape('label', scale=alt.Scale(
86
  range=['circle', 'diamond']), legend=None),
@@ -89,7 +89,7 @@ def data_comparison(df):
89
  selection
90
  )
91
 
92
- layered = scatter | legend
93
 
94
  layered = layered.configure_axis(
95
  grid=False
@@ -99,7 +99,7 @@ def data_comparison(df):
99
 
100
  return layered
101
 
102
-
103
  def quant_panel(embedding_df):
104
  """ Quantitative Panel Layout"""
105
 
@@ -112,6 +112,7 @@ def quant_panel(embedding_df):
112
  st.markdown("* The **shape** of each point reflects the label category -- positive (diamond) or negative sentiment (circle).")
113
  st.altair_chart(data_comparison(down_samp(embedding_df)))
114
 
 
115
  def frequent_tokens(data, tokenizer, loss_quantile=0.95, top_k=200, smoothing=0.005):
116
  unique_tokens = []
117
  tokens = []
@@ -171,6 +172,7 @@ def clustering(data,num_clusters):
171
 
172
  return data, assigned_clusters
173
 
 
174
  def kmeans(df, num_clusters=3):
175
  data_hl = df.loc[df['slice'] == 'high-loss']
176
  data_kmeans,clusters = clustering(data_hl,num_clusters)
@@ -276,10 +278,11 @@ if __name__ == "__main__":
276
  st.write(dataframe,width=900, height=300)
277
 
278
  with rcol:
279
- st.markdown('<h3>Word Distribution in Error Slice</h3>', unsafe_allow_html=True)
280
- commontokens = frequent_tokens(merged, tokenizer, loss_quantile=loss_quantile)
281
- with st.expander("How to read the table:"):
282
- st.markdown("* The table displays the most frequent tokens in error slices, relative to their frequencies in the val set.")
283
- st.write(commontokens)
284
-
285
- quant_panel(merged)
 
 
80
  ).interactive()
81
 
82
  legend = alt.Chart(df).mark_point().encode(
83
+ y=alt.Y('cluster:N', axis=alt.Axis(orient='left'), title=""),
84
  x=alt.X("label"),
85
  shape=alt.Shape('label', scale=alt.Scale(
86
  range=['circle', 'diamond']), legend=None),
 
89
  selection
90
  )
91
 
92
+ layered = legend | scatter
93
 
94
  layered = layered.configure_axis(
95
  grid=False
 
99
 
100
  return layered
101
 
102
+ @st.cache(ttl=600)
103
  def quant_panel(embedding_df):
104
  """ Quantitative Panel Layout"""
105
 
 
112
  st.markdown("* The **shape** of each point reflects the label category -- positive (diamond) or negative sentiment (circle).")
113
  st.altair_chart(data_comparison(down_samp(embedding_df)))
114
 
115
+ @st.cache(ttl=600)
116
  def frequent_tokens(data, tokenizer, loss_quantile=0.95, top_k=200, smoothing=0.005):
117
  unique_tokens = []
118
  tokens = []
 
172
 
173
  return data, assigned_clusters
174
 
175
+ @st.cache(ttl=600)
176
  def kmeans(df, num_clusters=3):
177
  data_hl = df.loc[df['slice'] == 'high-loss']
178
  data_kmeans,clusters = clustering(data_hl,num_clusters)
 
278
  st.write(dataframe,width=900, height=300)
279
 
280
  with rcol:
281
+ with st.spinner(text='loading...'):
282
+ st.markdown('<h3>Word Distribution in Error Slice</h3>', unsafe_allow_html=True)
283
+ commontokens = frequent_tokens(merged, tokenizer, loss_quantile=loss_quantile)
284
+ with st.expander("How to read the table:"):
285
+ st.markdown("* The table displays the most frequent tokens in error slices, relative to their frequencies in the val set.")
286
+ st.write(commontokens)
287
+ with st.spinner(text='visualizing...'):
288
+ quant_panel(merged)
error_analysis/utils/__pycache__/style_hacks.cpython-39.pyc CHANGED
Binary files a/error_analysis/utils/__pycache__/style_hacks.cpython-39.pyc and b/error_analysis/utils/__pycache__/style_hacks.cpython-39.pyc differ