Spaces:
Runtime error
Runtime error
added offline files
Browse files
app.py
CHANGED
@@ -140,16 +140,15 @@ def frequent_tokens(data, tokenizer, loss_quantile=0.95, top_k=200, smoothing=0.
|
|
140 |
|
141 |
|
142 |
@st.cache(ttl=600)
|
143 |
-
def get_data(
|
144 |
-
preds =
|
145 |
-
losses =
|
146 |
embeddings = pd.DataFrame(emb, columns=['x', 'y'])
|
147 |
num_examples = len(losses)
|
148 |
# dataset_labels = [dataset[i]['label'] for i in range(num_examples)]
|
149 |
return pd.concat([pd.DataFrame(np.transpose(np.vstack([dataset[:num_examples]['content'],
|
150 |
dataset[:num_examples]['label'], preds, losses])), columns=['content', 'label', 'pred', 'loss']), embeddings], axis=1)
|
151 |
|
152 |
-
@st.cache(ttl=600)
|
153 |
def clustering(data,num_clusters):
|
154 |
X = np.array(data['embedding'].tolist())
|
155 |
kclusterer = KMeansClusterer(
|
@@ -158,11 +157,8 @@ def clustering(data,num_clusters):
|
|
158 |
assigned_clusters = kclusterer.cluster(X, assign_clusters=True)
|
159 |
data['cluster'] = pd.Series(assigned_clusters, index=data.index).astype('int')
|
160 |
data['centroid'] = data['cluster'].apply(lambda x: kclusterer.means()[x])
|
161 |
-
|
162 |
-
|
163 |
return data, assigned_clusters
|
164 |
|
165 |
-
@st.cache(ttl=600)
|
166 |
def kmeans(df, num_clusters=3):
|
167 |
data_hl = df.loc[df['slice'] == 'high-loss']
|
168 |
data_kmeans,clusters = clustering(data_hl,num_clusters)
|
@@ -171,7 +167,6 @@ def kmeans(df, num_clusters=3):
|
|
171 |
merged['cluster'] = merged['cluster'].fillna(num_clusters).astype('int')
|
172 |
return merged
|
173 |
|
174 |
-
@st.cache(ttl=600)
|
175 |
def distance_from_centroid(row):
|
176 |
return sdist.norm(row['embedding'] - row['centroid'].tolist())
|
177 |
|
@@ -249,7 +244,7 @@ if __name__ == "__main__":
|
|
249 |
high_loss = losses.quantile(loss_quantile)
|
250 |
data_df['slice'] = 'high-loss'
|
251 |
data_df['slice'] = data_df['slice'].where(data_df['loss'] > high_loss, 'low-loss')
|
252 |
-
|
253 |
with rcol:
|
254 |
with st.spinner(text='loading...'):
|
255 |
st.markdown('<h3>Word Distribution in Error Slice</h3>', unsafe_allow_html=True)
|
|
|
140 |
|
141 |
|
142 |
@st.cache(ttl=600)
|
143 |
+
def get_data(inference, emb):
|
144 |
+
preds = inference.outputs.numpy()
|
145 |
+
losses = inference.losses.numpy()
|
146 |
embeddings = pd.DataFrame(emb, columns=['x', 'y'])
|
147 |
num_examples = len(losses)
|
148 |
# dataset_labels = [dataset[i]['label'] for i in range(num_examples)]
|
149 |
return pd.concat([pd.DataFrame(np.transpose(np.vstack([dataset[:num_examples]['content'],
|
150 |
dataset[:num_examples]['label'], preds, losses])), columns=['content', 'label', 'pred', 'loss']), embeddings], axis=1)
|
151 |
|
|
|
152 |
def clustering(data,num_clusters):
|
153 |
X = np.array(data['embedding'].tolist())
|
154 |
kclusterer = KMeansClusterer(
|
|
|
157 |
assigned_clusters = kclusterer.cluster(X, assign_clusters=True)
|
158 |
data['cluster'] = pd.Series(assigned_clusters, index=data.index).astype('int')
|
159 |
data['centroid'] = data['cluster'].apply(lambda x: kclusterer.means()[x])
|
|
|
|
|
160 |
return data, assigned_clusters
|
161 |
|
|
|
162 |
def kmeans(df, num_clusters=3):
|
163 |
data_hl = df.loc[df['slice'] == 'high-loss']
|
164 |
data_kmeans,clusters = clustering(data_hl,num_clusters)
|
|
|
167 |
merged['cluster'] = merged['cluster'].fillna(num_clusters).astype('int')
|
168 |
return merged
|
169 |
|
|
|
170 |
def distance_from_centroid(row):
|
171 |
return sdist.norm(row['embedding'] - row['centroid'].tolist())
|
172 |
|
|
|
244 |
high_loss = losses.quantile(loss_quantile)
|
245 |
data_df['slice'] = 'high-loss'
|
246 |
data_df['slice'] = data_df['slice'].where(data_df['loss'] > high_loss, 'low-loss')
|
247 |
+
|
248 |
with rcol:
|
249 |
with st.spinner(text='loading...'):
|
250 |
st.markdown('<h3>Word Distribution in Error Slice</h3>', unsafe_allow_html=True)
|