Spaces:
Runtime error
Runtime error
carolanderson
commited on
merge changes in user sessions and AVID reporting
Browse files- .gitignore +5 -0
- audit_utils.py +378 -729
- indie_label_svelte/public/global.css +16 -5
- indie_label_svelte/src/App.svelte +4 -57
- indie_label_svelte/src/AppOld.svelte +0 -127
- indie_label_svelte/src/Auditing.svelte +28 -53
- indie_label_svelte/src/ClusterResults.svelte +7 -5
- indie_label_svelte/src/CommentTable.svelte +16 -14
- indie_label_svelte/src/Explore.svelte +0 -1
- indie_label_svelte/src/Hunch.svelte +0 -26
- indie_label_svelte/src/HypothesisPanel.svelte +282 -307
- indie_label_svelte/src/IterativeClustering.svelte +0 -164
- indie_label_svelte/src/KeywordSearch.svelte +3 -10
- indie_label_svelte/src/Labeling.svelte +9 -18
- indie_label_svelte/src/MainPanel.svelte +11 -36
- indie_label_svelte/src/ModelPerf.svelte +14 -55
- indie_label_svelte/src/OverallResults.svelte +0 -79
- indie_label_svelte/src/Results.svelte +0 -206
- indie_label_svelte/src/SelectUserDialog.svelte +0 -66
- indie_label_svelte/src/StudyLinks.svelte +0 -59
- indie_label_svelte/src/SubmitReportDialog.svelte +120 -0
- indie_label_svelte/src/TopicTraining.svelte +1 -9
- indie_label_svelte/src/stores/all_users_store.js +0 -6
- indie_label_svelte/src/stores/cur_user_store.js +0 -3
- server.py +174 -244
.gitignore
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
__pycache__/
|
2 |
+
.DS_Store
|
3 |
+
data/
|
4 |
+
test_nbs/
|
5 |
+
data_zips/
|
audit_utils.py
CHANGED
@@ -23,6 +23,7 @@ import time
|
|
23 |
from sentence_transformers import SentenceTransformer, util
|
24 |
import torch
|
25 |
from bertopic import BERTopic
|
|
|
26 |
|
27 |
########################################
|
28 |
# PRE-LOADING
|
@@ -37,61 +38,39 @@ alt.renderers.enable('altair_saver', fmts=['vega-lite', 'png'])
|
|
37 |
|
38 |
# Data-loading
|
39 |
module_dir = "./"
|
40 |
-
|
41 |
-
|
42 |
-
# # TEMP reset
|
43 |
-
# with open(os.path.join(module_dir, "./data/all_model_names.pkl"), "wb") as f:
|
44 |
-
# all_model_names = []
|
45 |
-
# pickle.dump(all_model_names, f)
|
46 |
-
# with open(f"./data/users_to_models.pkl", "wb") as f:
|
47 |
-
# users_to_models = {}
|
48 |
-
# pickle.dump(users_to_models, f)
|
49 |
-
|
50 |
-
|
51 |
-
with open(os.path.join(module_dir, "data/ids_to_comments.pkl"), "rb") as f:
|
52 |
ids_to_comments = pickle.load(f)
|
53 |
-
with open(os.path.join(module_dir, "data/comments_to_ids.pkl"), "rb") as f:
|
54 |
comments_to_ids = pickle.load(f)
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
sys_eval_df = pd.read_pickle(os.path.join(module_dir, "data/split_data/sys_eval_df.pkl"))
|
59 |
-
train_df = pd.read_pickle(os.path.join(module_dir, "data/split_data/train_df.pkl"))
|
60 |
train_df_ids = train_df["item_id"].unique().tolist()
|
61 |
-
model_eval_df = pd.read_pickle(os.path.join(module_dir, "data/split_data/model_eval_df.pkl"))
|
62 |
-
ratings_df_full = pd.read_pickle(os.path.join(module_dir, "data/ratings_df_full.pkl"))
|
|
|
63 |
|
64 |
-
|
65 |
-
|
66 |
-
with open(f"./data/users_to_models.pkl", "rb") as f:
|
67 |
-
users_to_models = pickle.load(f)
|
68 |
-
|
69 |
-
with open("data/perf_1000_topics.pkl", "rb") as f:
|
70 |
-
perf_1000_topics = pickle.load(f)
|
71 |
-
with open("data/perf_1000_tox_cat.pkl", "rb") as f:
|
72 |
-
perf_1000_tox_cat = pickle.load(f)
|
73 |
-
with open("data/perf_1000_tox_severity.pkl", "rb") as f:
|
74 |
-
perf_1000_tox_severity = pickle.load(f)
|
75 |
-
with open("data/user_perf_metrics.pkl", "rb") as f:
|
76 |
-
user_perf_metrics = pickle.load(f)
|
77 |
-
|
78 |
-
topic_ids = comments_grouped_full_topic_cat.topic_id
|
79 |
-
topics = comments_grouped_full_topic_cat.topic
|
80 |
topic_ids_to_topics = {topic_ids[i]: topics[i] for i in range(len(topic_ids))}
|
81 |
topics_to_topic_ids = {topics[i]: topic_ids[i] for i in range(len(topic_ids))}
|
82 |
-
unique_topics_ids = sorted(
|
83 |
unique_topics = [topic_ids_to_topics[topic_id] for topic_id in range(len(topic_ids_to_topics) - 1)]
|
84 |
|
85 |
def get_toxic_threshold():
|
86 |
return TOXIC_THRESHOLD
|
87 |
|
88 |
-
def
|
89 |
-
|
90 |
-
|
91 |
-
|
|
|
|
|
|
|
92 |
else:
|
93 |
-
# Fetch the user
|
94 |
-
|
|
|
95 |
user_models.sort()
|
96 |
return user_models
|
97 |
|
@@ -99,7 +78,7 @@ def get_unique_topics():
|
|
99 |
return unique_topics
|
100 |
|
101 |
def get_large_clusters(min_n):
|
102 |
-
counts_df =
|
103 |
counts_df = counts_df[counts_df["counts"] >= min_n]
|
104 |
return [topic_ids_to_topics[t_id] for t_id in sorted(counts_df["topic_id"].tolist()[1:])]
|
105 |
|
@@ -137,32 +116,64 @@ readable_to_internal = {
|
|
137 |
}
|
138 |
internal_to_readable = {v: k for k, v in readable_to_internal.items()}
|
139 |
|
140 |
-
# Embeddings for neighbor retrieval
|
141 |
-
model_name = "paraphrase-MiniLM-L6-v2"
|
142 |
-
model = SentenceTransformer(model_name)
|
143 |
-
with open("./data/comments.pkl", "rb") as f:
|
144 |
-
comments = pickle.load(f)
|
145 |
-
embeddings = torch.load("./data/embeddings/21_10_embeddings.pt")
|
146 |
-
|
147 |
-
# Perspective API recalibration
|
148 |
-
def recalib_v1(s):
|
149 |
-
# convert Perspective score to 0-4 toxicity score
|
150 |
-
# map 0 persp to 0 (not at all toxic); 0.5 persp to 1 (slightly toxic), 1.0 persp to 4 (extremely toxic)
|
151 |
-
if s < 0.5:
|
152 |
-
return (s * 2.)
|
153 |
-
else:
|
154 |
-
return ((s - 0.5) * 6.) + 1
|
155 |
-
|
156 |
-
def recalib_v2(s):
|
157 |
-
# convert Perspective score to 0-4 toxicity score
|
158 |
-
# just 4x the perspective score
|
159 |
-
return (s * 4.)
|
160 |
-
|
161 |
-
comments_grouped_full_topic_cat["rating_avg_orig"] = comments_grouped_full_topic_cat["rating"]
|
162 |
-
comments_grouped_full_topic_cat["rating"] = [recalib_v2(score) for score in comments_grouped_full_topic_cat["persp_score"].tolist()]
|
163 |
|
164 |
-
|
165 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
166 |
|
167 |
########################################
|
168 |
# General utils
|
@@ -192,22 +203,6 @@ def my_bootstrap(vals, n_boot, alpha):
|
|
192 |
|
193 |
########################################
|
194 |
# GET_AUDIT utils
|
195 |
-
def other_users_perf(perf_metrics, metric, user_metric, alpha=0.95, n_boot=501):
|
196 |
-
ind = get_metric_ind(metric)
|
197 |
-
|
198 |
-
metric_vals = [metric_vals[ind] for metric_vals in perf_metrics.values()]
|
199 |
-
metric_avg = np.median(metric_vals)
|
200 |
-
|
201 |
-
# Future: use provided sample to perform bootstrap sampling
|
202 |
-
ci_1 = mne.stats.bootstrap_confidence_interval(np.array(metric_vals), ci=alpha, n_bootstraps=n_boot, stat_fun="median")
|
203 |
-
|
204 |
-
bs_samples, ci = my_bootstrap(metric_vals, n_boot, alpha)
|
205 |
-
|
206 |
-
# Get user's percentile
|
207 |
-
percentile = stats.percentileofscore(bs_samples, user_metric)
|
208 |
-
|
209 |
-
return metric_avg, ci, percentile, metric_vals
|
210 |
-
|
211 |
def plot_metric_histogram(metric, user_metric, other_metric_vals, n_bins=10):
|
212 |
hist, bin_edges = np.histogram(other_metric_vals, bins=n_bins, density=False)
|
213 |
data = pd.DataFrame({
|
@@ -239,394 +234,38 @@ def plot_metric_histogram(metric, user_metric, other_metric_vals, n_bins=10):
|
|
239 |
|
240 |
return (bar + rule).interactive()
|
241 |
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
cur_user_df = user_df[user_df["prediction_bin"] == severity_i]
|
255 |
-
y_true_user = cur_user_df.pred.to_numpy() # user's label
|
256 |
-
y_pred = cur_user_df.rating_avg.to_numpy() # system's label (avg)
|
257 |
-
|
258 |
-
if len(y_true_user) > 0:
|
259 |
-
used_bins.append(bin_labels[severity_i])
|
260 |
-
metric_user = calc_metric_user(y_true_user, y_pred, perf_metric)
|
261 |
-
y_user.append(metric_user)
|
262 |
-
y_other.append(metric_other)
|
263 |
-
other_ci_low.append(ci_low)
|
264 |
-
other_ci_high.append(ci_high)
|
265 |
-
|
266 |
-
return y_user, y_other, used_bins, other_ci_low, other_ci_high
|
267 |
-
|
268 |
-
def get_topic_bins(perf_metric, user_df, other_dfs, n_topics, ci=0.95, n_boot=501):
|
269 |
-
# Note: not using other_dfs anymore
|
270 |
-
y_user = []
|
271 |
-
y_other = []
|
272 |
-
used_bins = []
|
273 |
-
other_ci_low = []
|
274 |
-
other_ci_high = []
|
275 |
-
selected_topics = unique_topics_ids[1:(n_topics + 1)]
|
276 |
-
|
277 |
-
for topic_id in selected_topics:
|
278 |
-
cur_topic = topic_ids_to_topics[topic_id]
|
279 |
-
metric_others = [metrics[get_metric_ind(perf_metric)] for metrics in perf_1000_topics[topic_id].values() if metrics[get_metric_ind(perf_metric)]]
|
280 |
-
ci_low, ci_high = mne.stats.bootstrap_confidence_interval(np.array(metric_others), ci=ci, n_bootstraps=n_boot, stat_fun='median')
|
281 |
-
metric_other = np.median(metric_others)
|
282 |
-
|
283 |
-
cur_user_df = user_df[user_df["topic"] == cur_topic]
|
284 |
-
y_true_user = cur_user_df.pred.to_numpy() # user's label
|
285 |
-
y_pred = cur_user_df.rating_avg.to_numpy() # system's label (avg)
|
286 |
-
|
287 |
-
if len(y_true_user) > 0:
|
288 |
-
used_bins.append(cur_topic)
|
289 |
-
metric_user = calc_metric_user(y_true_user, y_pred, perf_metric)
|
290 |
-
y_user.append(metric_user)
|
291 |
-
y_other.append(metric_other)
|
292 |
-
other_ci_low.append(ci_low)
|
293 |
-
other_ci_high.append(ci_high)
|
294 |
-
|
295 |
-
return y_user, y_other, used_bins, other_ci_low, other_ci_high
|
296 |
-
|
297 |
-
def calc_metric_user(y_true_user, y_pred, perf_metric):
|
298 |
-
if perf_metric == "MAE":
|
299 |
-
metric_user = mean_absolute_error(y_true_user, y_pred)
|
300 |
-
|
301 |
-
elif perf_metric == "MSE":
|
302 |
-
metric_user = mean_squared_error(y_true_user, y_pred)
|
303 |
-
|
304 |
-
elif perf_metric == "RMSE":
|
305 |
-
metric_user = mean_squared_error(y_true_user, y_pred, squared=False)
|
306 |
-
|
307 |
-
elif perf_metric == "avg_diff":
|
308 |
-
metric_user = np.mean(y_true_user - y_pred)
|
309 |
-
|
310 |
-
return metric_user
|
311 |
-
|
312 |
-
def get_toxicity_category_bins(perf_metric, user_df, other_dfs, threshold=0.5, ci=0.95, n_boot=501):
|
313 |
-
# Note: not using other_dfs anymore; threshold from pre-calculation is 0.5
|
314 |
-
cat_cols = ["is_profane_frac", "is_threat_frac", "is_identity_attack_frac", "is_insult_frac", "is_sexual_harassment_frac"]
|
315 |
-
cat_labels = ["Profanity", "Threats", "Identity Attacks", "Insults", "Sexual Harassment"]
|
316 |
-
y_user = []
|
317 |
-
y_other = []
|
318 |
-
used_bins = []
|
319 |
-
other_ci_low = []
|
320 |
-
other_ci_high = []
|
321 |
-
for i, cur_col_name in enumerate(cat_cols):
|
322 |
-
metric_others = [metrics[get_metric_ind(perf_metric)] for metrics in perf_1000_tox_cat[cur_col_name].values() if metrics[get_metric_ind(perf_metric)]]
|
323 |
-
ci_low, ci_high = mne.stats.bootstrap_confidence_interval(np.array(metric_others), ci=ci, n_bootstraps=n_boot, stat_fun='median')
|
324 |
-
metric_other = np.median(metric_others)
|
325 |
-
|
326 |
-
# Filter to rows where a comment received an average label >= the provided threshold for the category
|
327 |
-
cur_user_df = user_df[user_df[cur_col_name] >= threshold]
|
328 |
-
y_true_user = cur_user_df.pred.to_numpy() # user's label
|
329 |
-
y_pred = cur_user_df.rating_avg.to_numpy() # system's label (avg)
|
330 |
-
|
331 |
-
if len(y_true_user) > 0:
|
332 |
-
used_bins.append(cat_labels[i])
|
333 |
-
metric_user = calc_metric_user(y_true_user, y_pred, perf_metric)
|
334 |
-
y_user.append(metric_user)
|
335 |
-
y_other.append(metric_other)
|
336 |
-
other_ci_low.append(ci_low)
|
337 |
-
other_ci_high.append(ci_high)
|
338 |
-
|
339 |
-
return y_user, y_other, used_bins, other_ci_low, other_ci_high
|
340 |
-
|
341 |
-
def plot_class_cond_results(preds_df, breakdown_axis, perf_metric, other_ids, sort_bars, n_topics, worker_id="A"):
|
342 |
-
# Note: preds_df already has binned results
|
343 |
-
# Prepare dfs
|
344 |
-
user_df = preds_df[preds_df.user_id == worker_id].sort_values(by=["item_id"]).reset_index()
|
345 |
-
other_dfs = [preds_df[preds_df.user_id == other_id].sort_values(by=["item_id"]).reset_index() for other_id in other_ids]
|
346 |
-
|
347 |
-
if breakdown_axis == "toxicity_severity":
|
348 |
-
y_user, y_other, used_bins, other_ci_low, other_ci_high = get_toxicity_severity_bins(perf_metric, user_df, other_dfs)
|
349 |
-
elif breakdown_axis == "topic":
|
350 |
-
y_user, y_other, used_bins, other_ci_low, other_ci_high = get_topic_bins(perf_metric, user_df, other_dfs, n_topics)
|
351 |
-
elif breakdown_axis == "toxicity_category":
|
352 |
-
y_user, y_other, used_bins, other_ci_low, other_ci_high = get_toxicity_category_bins(perf_metric, user_df, other_dfs)
|
353 |
-
|
354 |
-
diffs = list(np.array(y_user) - np.array(y_other))
|
355 |
-
|
356 |
-
# Generate bar chart
|
357 |
-
data = pd.DataFrame({
|
358 |
-
"metric_val": y_user + y_other,
|
359 |
-
"Labeler": ["You" for _ in range(len(y_user))] + ["Other users" for _ in range(len(y_user))],
|
360 |
-
"used_bins": used_bins + used_bins,
|
361 |
-
"diffs": diffs + diffs,
|
362 |
-
"lower_cis": y_user + other_ci_low,
|
363 |
-
"upper_cis": y_user + other_ci_high,
|
364 |
-
})
|
365 |
-
|
366 |
-
color_domain = ['You', 'Other users']
|
367 |
-
color_range = [YOUR_COLOR, OTHER_USERS_COLOR]
|
368 |
-
|
369 |
-
base = alt.Chart()
|
370 |
-
chart_title=f"{internal_to_readable[breakdown_axis]} Results"
|
371 |
-
x_axis = alt.X("Labeler:O", sort=("You", "Other users"), title=None, axis=None)
|
372 |
-
y_axis = alt.Y("metric_val:Q", title=internal_to_readable[perf_metric])
|
373 |
-
if sort_bars:
|
374 |
-
col_content = alt.Column("used_bins:O", sort=alt.EncodingSortField(field="diffs", op="mean", order='descending'))
|
375 |
-
else:
|
376 |
-
col_content = alt.Column("used_bins:O")
|
377 |
-
|
378 |
-
if n_topics is not None and n_topics > 10:
|
379 |
-
# Change to horizontal bar chart
|
380 |
-
bar = base.mark_bar(lineBreak="_").encode(
|
381 |
-
y=x_axis,
|
382 |
-
x=y_axis,
|
383 |
-
color=alt.Color("Labeler:O", scale=alt.Scale(domain=color_domain, range=color_range)),
|
384 |
-
tooltip=[
|
385 |
-
alt.Tooltip('Labeler:O', title='Labeler'),
|
386 |
-
alt.Tooltip('metric_val:Q', title=perf_metric, format=".3f"),
|
387 |
-
]
|
388 |
-
)
|
389 |
-
error_bars = base.mark_errorbar().encode(
|
390 |
-
y=x_axis,
|
391 |
-
x = alt.X("lower_cis:Q", title=internal_to_readable[perf_metric]),
|
392 |
-
x2 = alt.X2("upper_cis:Q", title=None),
|
393 |
-
tooltip=[
|
394 |
-
alt.Tooltip('lower_cis:Q', title='Lower CI', format=".3f"),
|
395 |
-
alt.Tooltip('upper_cis:Q', title='Upper CI', format=".3f"),
|
396 |
-
]
|
397 |
-
)
|
398 |
-
combined = alt.layer(
|
399 |
-
bar, error_bars, data=data
|
400 |
-
).facet(
|
401 |
-
row=col_content
|
402 |
-
).properties(
|
403 |
-
title=chart_title,
|
404 |
-
).interactive()
|
405 |
else:
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
|
410 |
-
|
411 |
-
|
412 |
-
|
413 |
-
|
414 |
-
|
415 |
-
error_bars = base.mark_errorbar().encode(
|
416 |
-
x=x_axis,
|
417 |
-
y = alt.Y("lower_cis:Q", title=internal_to_readable[perf_metric]),
|
418 |
-
y2 = alt.Y2("upper_cis:Q", title=None),
|
419 |
-
tooltip=[
|
420 |
-
alt.Tooltip('lower_cis:Q', title='Lower CI', format=".3f"),
|
421 |
-
alt.Tooltip('upper_cis:Q', title='Upper CI', format=".3f"),
|
422 |
-
]
|
423 |
-
)
|
424 |
-
combined = alt.layer(
|
425 |
-
bar, error_bars, data=data
|
426 |
-
).facet(
|
427 |
-
column=col_content
|
428 |
-
).properties(
|
429 |
-
title=chart_title,
|
430 |
-
).interactive()
|
431 |
-
|
432 |
-
return combined
|
433 |
-
|
434 |
-
def show_overall_perf(variant, error_type, cur_user, threshold=TOXIC_THRESHOLD, breakdown_axis=None, topic_vis_method="median"):
|
435 |
-
# Your perf (calculate using model and testset)
|
436 |
-
breakdown_axis = readable_to_internal[breakdown_axis]
|
437 |
-
|
438 |
-
if breakdown_axis is not None:
|
439 |
-
with open(os.path.join(module_dir, f"data/preds_dfs/{variant}.pkl"), "rb") as f:
|
440 |
-
preds_df = pickle.load(f)
|
441 |
-
|
442 |
-
# Read from file
|
443 |
-
chart_dir = "./data/charts"
|
444 |
-
chart_file = os.path.join(chart_dir, f"{cur_user}_{variant}.pkl")
|
445 |
-
if os.path.isfile(chart_file):
|
446 |
-
with open(chart_file, "r") as f:
|
447 |
-
topic_overview_plot_json = json.load(f)
|
448 |
-
else:
|
449 |
-
preds_df_mod = preds_df.merge(comments_grouped_full_topic_cat, on="item_id", how="left", suffixes=('_', '_avg'))
|
450 |
-
if topic_vis_method == "median":
|
451 |
-
preds_df_mod_grp = preds_df_mod.groupby(["topic_", "user_id"]).median()
|
452 |
-
elif topic_vis_method == "mean":
|
453 |
-
preds_df_mod_grp = preds_df_mod.groupby(["topic_", "user_id"]).mean()
|
454 |
-
topic_overview_plot_json = plot_overall_vis(preds_df=preds_df_mod_grp, n_topics=200, threshold=threshold, error_type=error_type, cur_user=cur_user, cur_model=variant)
|
455 |
|
456 |
return {
|
457 |
"topic_overview_plot_json": json.loads(topic_overview_plot_json),
|
458 |
}
|
459 |
|
460 |
-
########################################
|
461 |
-
# GET_CLUSTER_RESULTS utils
|
462 |
-
def get_overall_perf3(preds_df, perf_metric, other_ids, worker_id="A"):
|
463 |
-
# Prepare dataset to calculate performance
|
464 |
-
# Note: true is user and pred is system
|
465 |
-
y_true = preds_df[preds_df["user_id"] == worker_id].pred.to_numpy()
|
466 |
-
y_pred_user = preds_df[preds_df["user_id"] == worker_id].rating_avg.to_numpy()
|
467 |
-
|
468 |
-
y_true_others = y_pred_others = [preds_df[preds_df["user_id"] == other_id].pred.to_numpy() for other_id in other_ids]
|
469 |
-
y_pred_others = [preds_df[preds_df["user_id"] == other_id].rating_avg.to_numpy() for other_id in other_ids]
|
470 |
-
|
471 |
-
# Get performance for user's model and for other users
|
472 |
-
if perf_metric == "MAE":
|
473 |
-
user_perf = mean_absolute_error(y_true, y_pred_user)
|
474 |
-
other_perfs = [mean_absolute_error(y_true_others[i], y_pred_others[i]) for i in range(len(y_true_others))]
|
475 |
-
elif perf_metric == "MSE":
|
476 |
-
user_perf = mean_squared_error(y_true, y_pred_user)
|
477 |
-
other_perfs = [mean_squared_error(y_true_others[i], y_pred_others[i]) for i in range(len(y_true_others))]
|
478 |
-
elif perf_metric == "RMSE":
|
479 |
-
user_perf = mean_squared_error(y_true, y_pred_user, squared=False)
|
480 |
-
other_perfs = [mean_squared_error(y_true_others[i], y_pred_others[i], squared=False) for i in range(len(y_true_others))]
|
481 |
-
elif perf_metric == "avg_diff":
|
482 |
-
user_perf = np.mean(y_true - y_pred_user)
|
483 |
-
other_perfs = [np.mean(y_true_others[i] - y_pred_others[i]) for i in range(len(y_true_others))]
|
484 |
-
|
485 |
-
other_perf = np.mean(other_perfs) # average across all other users
|
486 |
-
return user_perf, other_perf
|
487 |
-
|
488 |
-
def style_color_difference(row):
|
489 |
-
full_opacity_diff = 3.
|
490 |
-
pred_user_col = "Your predicted rating"
|
491 |
-
pred_other_col = "Other users' predicted rating"
|
492 |
-
pred_system_col = "Status-quo system rating"
|
493 |
-
diff_user = row[pred_user_col] - row[pred_system_col]
|
494 |
-
diff_other = row[pred_other_col] - row[pred_system_col]
|
495 |
-
red = "234, 133, 125"
|
496 |
-
green = "142, 205, 162"
|
497 |
-
bkgd_user = green if diff_user < 0 else red # red if more toxic; green if less toxic
|
498 |
-
opac_user = min(abs(diff_user / full_opacity_diff), 1.)
|
499 |
-
bkgd_other = green if diff_other < 0 else red # red if more toxic; green if less toxic
|
500 |
-
opac_other = min(abs(diff_other / full_opacity_diff), 1.)
|
501 |
-
return ["", f"background-color: rgba({bkgd_user}, {opac_user});", f"background-color: rgba({bkgd_other}, {opac_other});", "", ""]
|
502 |
-
|
503 |
-
def display_examples_cluster(preds_df, other_ids, num_examples, sort_ascending, worker_id="A"):
|
504 |
-
user_df = preds_df[preds_df.user_id == worker_id].sort_values(by=["item_id"]).reset_index()
|
505 |
-
others_df = preds_df[preds_df.user_id == other_ids[0]]
|
506 |
-
for i in range(1, len(other_ids)):
|
507 |
-
others_df.append(preds_df[preds_df.user_id == other_ids[i]])
|
508 |
-
others_df.groupby(["item_id"]).mean()
|
509 |
-
others_df = others_df.sort_values(by=["item_id"]).reset_index()
|
510 |
-
|
511 |
-
df = pd.merge(user_df, others_df, on="item_id", how="left", suffixes=('_user', '_other'))
|
512 |
-
df["Comment"] = df["comment_user"]
|
513 |
-
df["Your predicted rating"] = df["pred_user"]
|
514 |
-
df["Other users' predicted rating"] = df["pred_other"]
|
515 |
-
df["Status-quo system rating"] = df["rating_avg_user"]
|
516 |
-
df["Status-quo system std dev"] = df["rating_stddev_user"]
|
517 |
-
df = df[["Comment", "Your predicted rating", "Other users' predicted rating", "Status-quo system rating", "Status-quo system std dev"]]
|
518 |
-
|
519 |
-
# Add styling
|
520 |
-
df = df.sort_values(by=['Status-quo system std dev'], ascending=sort_ascending)
|
521 |
-
n_to_sample = np.min([num_examples, len(df)])
|
522 |
-
df = df.sample(n=n_to_sample).reset_index(drop=True)
|
523 |
-
return df.style.apply(style_color_difference, axis=1).render()
|
524 |
-
|
525 |
-
def calc_odds_ratio(df, comparison_group, toxic_threshold=1.5, worker_id="A", debug=False, smoothing_factor=1):
|
526 |
-
if comparison_group == "status_quo":
|
527 |
-
other_pred_col = "rating_avg"
|
528 |
-
# Get unique comments, but fetch average labeler rating
|
529 |
-
num_toxic_other = len(df[(df.user_id == "A") & (df[other_pred_col] >= toxic_threshold)]) + smoothing_factor
|
530 |
-
num_nontoxic_other = len(df[(df.user_id == "A") & (df[other_pred_col] < toxic_threshold)]) + smoothing_factor
|
531 |
-
elif comparison_group == "other_users":
|
532 |
-
other_pred_col = "pred"
|
533 |
-
num_toxic_other = len(df[(df.user_id != "A") & (df[other_pred_col] >= toxic_threshold)]) + smoothing_factor
|
534 |
-
num_nontoxic_other = len(df[(df.user_id != "A") & (df[other_pred_col] < toxic_threshold)]) + smoothing_factor
|
535 |
-
|
536 |
-
num_toxic_user = len(df[(df.user_id == "A") & (df.pred >= toxic_threshold)]) + smoothing_factor
|
537 |
-
num_nontoxic_user = len(df[(df.user_id == "A") & (df.pred < toxic_threshold)]) + smoothing_factor
|
538 |
-
|
539 |
-
toxic_ratio = num_toxic_user / num_toxic_other
|
540 |
-
nontoxic_ratio = num_nontoxic_user / num_nontoxic_other
|
541 |
-
odds_ratio = toxic_ratio / nontoxic_ratio
|
542 |
-
|
543 |
-
if debug:
|
544 |
-
print(f"Odds ratio: {odds_ratio}")
|
545 |
-
print(f"num_toxic_user: {num_toxic_user}, num_nontoxic_user: {num_nontoxic_user}")
|
546 |
-
print(f"num_toxic_other: {num_toxic_other}, num_nontoxic_other: {num_nontoxic_other}")
|
547 |
-
|
548 |
-
contingency_table = [[num_toxic_user, num_nontoxic_user], [num_toxic_other, num_nontoxic_other]]
|
549 |
-
odds_ratio, p_val = stats.fisher_exact(contingency_table, alternative='two-sided')
|
550 |
-
if debug:
|
551 |
-
print(f"Odds ratio: {odds_ratio}, p={p_val}")
|
552 |
-
|
553 |
-
return odds_ratio
|
554 |
-
|
555 |
-
# Neighbor search
|
556 |
-
def get_match(comment_inds, K=20, threshold=None, debug=False):
|
557 |
-
match_ids = []
|
558 |
-
rows = []
|
559 |
-
for i in comment_inds:
|
560 |
-
if debug:
|
561 |
-
print(f"\nComment: {comments[i]}")
|
562 |
-
query_embedding = model.encode(comments[i], convert_to_tensor=True)
|
563 |
-
hits = util.semantic_search(query_embedding, embeddings, score_function=util.cos_sim, top_k=K)
|
564 |
-
# print(hits[0])
|
565 |
-
for hit in hits[0]:
|
566 |
-
c_id = hit['corpus_id']
|
567 |
-
score = np.round(hit['score'], 3)
|
568 |
-
if threshold is None or score > threshold:
|
569 |
-
match_ids.append(c_id)
|
570 |
-
if debug:
|
571 |
-
print(f"\t(ID={c_id}, Score={score}): {comments[c_id]}")
|
572 |
-
rows.append([c_id, score, comments[c_id]])
|
573 |
-
|
574 |
-
df = pd.DataFrame(rows, columns=["id", "score", "comment"])
|
575 |
-
return match_ids
|
576 |
-
|
577 |
-
def display_examples_auto_cluster(preds_df, cluster, other_ids, perf_metric, sort_ascending=True, worker_id="A", num_examples=10):
|
578 |
-
# Overall performance
|
579 |
-
topic_df = preds_df
|
580 |
-
topic_df = topic_df[topic_df["topic"] == cluster]
|
581 |
-
user_perf, other_perf = get_overall_perf3(topic_df, perf_metric, other_ids)
|
582 |
-
|
583 |
-
user_direction = "LOWER" if user_perf < 0 else "HIGHER"
|
584 |
-
other_direction = "LOWER" if other_perf < 0 else "HIGHER"
|
585 |
-
print(f"Your ratings are on average {np.round(abs(user_perf), 3)} {user_direction} than the existing system for this cluster")
|
586 |
-
print(f"Others' ratings (based on {len(other_ids)} users) are on average {np.round(abs(other_perf), 3)} {other_direction} than the existing system for this cluster")
|
587 |
-
|
588 |
-
# Display example comments
|
589 |
-
df = display_examples_cluster(preds_df, other_ids, num_examples, sort_ascending)
|
590 |
-
return df
|
591 |
-
|
592 |
-
|
593 |
-
# function to get results for a new provided cluster
|
594 |
-
def display_examples_manual_cluster(preds_df, cluster_comments, other_ids, perf_metric, sort_ascending=True, worker_id="A"):
|
595 |
-
# Overall performance
|
596 |
-
cluster_df = preds_df[preds_df["comment"].isin(cluster_comments)]
|
597 |
-
user_perf, other_perf = get_overall_perf3(cluster_df, perf_metric, other_ids)
|
598 |
-
|
599 |
-
user_direction = "LOWER" if user_perf < 0 else "HIGHER"
|
600 |
-
other_direction = "LOWER" if other_perf < 0 else "HIGHER"
|
601 |
-
print(f"Your ratings are on average {np.round(abs(user_perf), 3)} {user_direction} than the existing system for this cluster")
|
602 |
-
print(f"Others' ratings (based on {len(other_ids)} users) are on average {np.round(abs(other_perf), 3)} {other_direction} than the existing system for this cluster")
|
603 |
-
|
604 |
-
user_df = preds_df[preds_df.user_id == worker_id].sort_values(by=["item_id"]).reset_index()
|
605 |
-
others_df = preds_df[preds_df.user_id == other_ids[0]]
|
606 |
-
for i in range(1, len(other_ids)):
|
607 |
-
others_df.append(preds_df[preds_df.user_id == other_ids[i]])
|
608 |
-
others_df.groupby(["item_id"]).mean()
|
609 |
-
others_df = others_df.sort_values(by=["item_id"]).reset_index()
|
610 |
-
|
611 |
-
# Get cluster_comments
|
612 |
-
user_df = user_df[user_df["comment"].isin(cluster_comments)]
|
613 |
-
others_df = others_df[others_df["comment"].isin(cluster_comments)]
|
614 |
-
|
615 |
-
df = pd.merge(user_df, others_df, on="item_id", how="left", suffixes=('_user', '_other'))
|
616 |
-
df["pred_system"] = df["rating_avg_user"]
|
617 |
-
df["pred_system_stddev"] = df["rating_stddev_user"]
|
618 |
-
df = df[["item_id", "comment_user", "pred_user", "pred_other", "pred_system", "pred_system_stddev"]]
|
619 |
-
|
620 |
-
# Add styling
|
621 |
-
df = df.sort_values(by=['pred_system_stddev'], ascending=sort_ascending)
|
622 |
-
df = df.style.apply(style_color_difference, axis=1).render()
|
623 |
-
return df
|
624 |
-
|
625 |
########################################
|
626 |
# GET_LABELING utils
|
627 |
-
def create_example_sets(
|
628 |
# Restrict to the keyword, if provided
|
629 |
-
df =
|
630 |
if keyword != None:
|
631 |
df = df[df["comment"].str.contains(keyword)]
|
632 |
|
@@ -651,8 +290,8 @@ def create_example_sets(comments_df, n_label_per_bin, score_bins, keyword=None,
|
|
651 |
|
652 |
return ex_to_label
|
653 |
|
654 |
-
def get_grp_model_labels(
|
655 |
-
df =
|
656 |
|
657 |
train_df_grp = train_df[train_df["user_id"].isin(grp_ids)]
|
658 |
train_df_grp_avg = train_df_grp.groupby(by=["item_id"]).median().reset_index()
|
@@ -676,106 +315,207 @@ def get_grp_model_labels(comments_df, n_label_per_bin, score_bins, grp_ids):
|
|
676 |
|
677 |
return ratings_grp
|
678 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
679 |
########################################
|
680 |
# GET_PERSONALIZED_MODEL utils
|
681 |
-
def fetch_existing_data(
|
682 |
# Check if we have cached model performance
|
683 |
-
|
684 |
-
|
685 |
-
if os.path.isdir(os.path.join(module_dir, perf_dir)):
|
686 |
# Fetch cached results
|
687 |
-
|
688 |
-
with open(
|
689 |
mae, mse, rmse, avg_diff = pickle.load(f)
|
690 |
else:
|
691 |
-
|
692 |
-
with open(os.path.join(module_dir, f"./data/trained_models/{model_name}.pkl"), "rb") as f:
|
693 |
-
cur_model = pickle.load(f)
|
694 |
-
mae, mse, rmse, avg_diff = users_perf(cur_model)
|
695 |
-
# Cache results
|
696 |
-
os.mkdir(os.path.join(module_dir, perf_dir))
|
697 |
-
with open(os.path.join(module_dir, perf_dir, "1.pkl"), "wb") as f:
|
698 |
-
pickle.dump((mae, mse, rmse, avg_diff), f)
|
699 |
|
700 |
# Fetch previous user-provided labels
|
701 |
ratings_prev = None
|
702 |
-
|
703 |
-
|
|
|
|
|
704 |
ratings_prev = pickle.load(f)
|
705 |
return mae, mse, rmse, avg_diff, ratings_prev
|
706 |
|
707 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
708 |
# Check if there is previously-labeled data; if so, combine it with this data
|
709 |
-
|
710 |
-
label_dir = f"./data/labels/{model_name}"
|
711 |
-
labeled_df = format_labeled_data(ratings) # Treat ratings as full batch of all ratings
|
712 |
ratings_prev = None
|
713 |
|
714 |
# Filter out rows with "unsure" (-1)
|
715 |
labeled_df = labeled_df[labeled_df["rating"] != -1]
|
716 |
|
717 |
# Filter to top N for user study
|
718 |
-
if topic is None:
|
719 |
-
|
720 |
-
labeled_df = labeled_df.tail(top_n)
|
721 |
else:
|
722 |
# For topic tuning, need to fetch old labels
|
723 |
-
|
|
|
724 |
# Concatenate previous set of labels with this new batch of labels
|
725 |
-
|
|
|
726 |
ratings_prev = pickle.load(f)
|
727 |
-
labeled_df_prev = format_labeled_data(ratings_prev)
|
728 |
labeled_df_prev = labeled_df_prev[labeled_df_prev["rating"] != -1]
|
729 |
ratings.update(ratings_prev) # append old ratings to ratings
|
730 |
labeled_df = pd.concat([labeled_df_prev, labeled_df])
|
731 |
-
|
732 |
-
|
733 |
-
|
734 |
-
cur_model, perf, _, _ = train_user_model(ratings_df=labeled_df)
|
735 |
-
|
736 |
-
user_perf_metrics[model_name] = users_perf(cur_model)
|
737 |
-
|
738 |
-
mae, mse, rmse, avg_diff = user_perf_metrics[model_name]
|
739 |
-
|
740 |
-
cur_preds_df = get_preds_df(cur_model, ["A"], sys_eval_df=ratings_df_full, topic=topic, model_name=model_name) # Just get results for user
|
741 |
-
|
742 |
# Save this batch of labels
|
743 |
-
|
|
|
744 |
pickle.dump(ratings, f)
|
745 |
|
746 |
-
#
|
747 |
-
|
748 |
-
pickle.dump(cur_preds_df, f)
|
749 |
-
|
750 |
-
if model_name not in all_model_names:
|
751 |
-
all_model_names.append(model_name)
|
752 |
-
with open(os.path.join(module_dir, "./data/all_model_names.pkl"), "wb") as f:
|
753 |
-
pickle.dump(all_model_names, f)
|
754 |
-
|
755 |
-
# Handle user
|
756 |
-
if user not in users_to_models:
|
757 |
-
users_to_models[user] = [] # New user
|
758 |
-
if model_name not in users_to_models[user]:
|
759 |
-
users_to_models[user].append(model_name) # New model
|
760 |
-
with open(f"./data/users_to_models.pkl", "wb") as f:
|
761 |
-
pickle.dump(users_to_models, f)
|
762 |
-
|
763 |
-
with open(os.path.join(module_dir, "./data/user_perf_metrics.pkl"), "wb") as f:
|
764 |
-
pickle.dump(user_perf_metrics, f)
|
765 |
-
with open(os.path.join(module_dir, f"./data/trained_models/{model_name}.pkl"), "wb") as f:
|
766 |
-
pickle.dump(cur_model, f)
|
767 |
|
768 |
-
#
|
769 |
-
|
770 |
-
|
771 |
-
|
772 |
-
with open(
|
773 |
pickle.dump((mae, mse, rmse, avg_diff), f)
|
774 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
775 |
ratings_prev = ratings
|
776 |
return mae, mse, rmse, avg_diff, ratings_prev
|
777 |
|
778 |
-
def format_labeled_data(ratings, worker_id
|
779 |
all_rows = []
|
780 |
for comment, rating in ratings.items():
|
781 |
comment_id = comments_to_ids[comment]
|
@@ -785,7 +525,7 @@ def format_labeled_data(ratings, worker_id="A", debug=False):
|
|
785 |
df = pd.DataFrame(all_rows, columns=["user_id", "item_id", "rating"])
|
786 |
return df
|
787 |
|
788 |
-
def users_perf(model, sys_eval_df=sys_eval_df
|
789 |
# Load the full empty dataset
|
790 |
sys_eval_comment_ids = sys_eval_df.item_id.unique().tolist()
|
791 |
empty_ratings_rows = [[worker_id, c_id, 0] for c_id in sys_eval_comment_ids]
|
@@ -801,17 +541,17 @@ def users_perf(model, sys_eval_df=sys_eval_df, avg_ratings_df=comments_grouped_f
|
|
801 |
user_item_preds = get_predictions_by_user_and_item(predictions)
|
802 |
df["pred"] = df.apply(lambda row: user_item_preds[(row.user_id, row.item_id)] if (row.user_id, row.item_id) in user_item_preds else np.nan, axis=1)
|
803 |
|
804 |
-
df = df.merge(
|
805 |
df.dropna(subset = ["pred"], inplace=True)
|
806 |
-
df["
|
807 |
|
808 |
-
perf_metrics = get_overall_perf(df,
|
809 |
return perf_metrics
|
810 |
|
811 |
def get_overall_perf(preds_df, user_id):
|
812 |
# Prepare dataset to calculate performance
|
813 |
-
y_pred = preds_df[preds_df["user_id"] == user_id].
|
814 |
-
y_true = preds_df[preds_df["user_id"] == user_id].pred.to_numpy()
|
815 |
|
816 |
# Get performance for user's model
|
817 |
mae = mean_absolute_error(y_true, y_pred)
|
@@ -827,7 +567,11 @@ def get_predictions_by_user_and_item(predictions):
|
|
827 |
user_item_preds[(uid, iid)] = est
|
828 |
return user_item_preds
|
829 |
|
830 |
-
|
|
|
|
|
|
|
|
|
831 |
# Prep dataframe for all predictions we'd like to request
|
832 |
start = time.time()
|
833 |
sys_eval_comment_ids = sys_eval_df.item_id.unique().tolist()
|
@@ -836,7 +580,8 @@ def get_preds_df(model, user_ids, orig_df=ratings_df_full, avg_ratings_df=commen
|
|
836 |
for user_id in user_ids:
|
837 |
empty_ratings_rows.extend([[user_id, c_id, 0] for c_id in sys_eval_comment_ids])
|
838 |
empty_ratings_df = pd.DataFrame(empty_ratings_rows, columns=["user_id", "item_id", "rating"])
|
839 |
-
|
|
|
840 |
|
841 |
# Evaluate model to get predictions
|
842 |
start = time.time()
|
@@ -844,16 +589,17 @@ def get_preds_df(model, user_ids, orig_df=ratings_df_full, avg_ratings_df=commen
|
|
844 |
eval_set_data = Dataset.load_from_df(empty_ratings_df, reader)
|
845 |
_, testset = train_test_split(eval_set_data, test_size=1.)
|
846 |
predictions = model.test(testset)
|
847 |
-
|
|
|
848 |
|
849 |
# Update dataframe with predictions
|
850 |
start = time.time()
|
851 |
df = empty_ratings_df.copy() # user_id, item_id, rating
|
852 |
user_item_preds = get_predictions_by_user_and_item(predictions)
|
853 |
df["pred"] = df.apply(lambda row: user_item_preds[(row.user_id, row.item_id)] if (row.user_id, row.item_id) in user_item_preds else np.nan, axis=1)
|
854 |
-
df = df.merge(
|
855 |
df.dropna(subset = ["pred"], inplace=True)
|
856 |
-
df["
|
857 |
|
858 |
# Get binned predictions (based on user prediction)
|
859 |
df["prediction_bin"], out_bins = pd.cut(df["pred"], bins, labels=False, retbins=True)
|
@@ -861,9 +607,14 @@ def get_preds_df(model, user_ids, orig_df=ratings_df_full, avg_ratings_df=commen
|
|
861 |
|
862 |
return df
|
863 |
|
|
|
|
|
|
|
|
|
|
|
864 |
def train_user_model(ratings_df, train_df=train_df, model_eval_df=model_eval_df, train_frac=0.75, model_type="SVD", sim_type=None, user_based=True):
|
865 |
# Sample from shuffled labeled dataframe and add batch to train set; specified set size to model_eval set
|
866 |
-
labeled = ratings_df.sample(frac=1)
|
867 |
batch_size = math.floor(len(labeled) * train_frac)
|
868 |
labeled_train = labeled[:batch_size]
|
869 |
labeled_model_eval = labeled[batch_size:]
|
@@ -876,7 +627,11 @@ def train_user_model(ratings_df, train_df=train_df, model_eval_df=model_eval_df,
|
|
876 |
|
877 |
return model, perf, labeled_train, labeled_model_eval
|
878 |
|
879 |
-
|
|
|
|
|
|
|
|
|
880 |
# Train model
|
881 |
reader = Reader(rating_scale=(0, 4))
|
882 |
train_data = Dataset.load_from_df(train_df, reader)
|
@@ -905,58 +660,18 @@ def train_model(train_df, model_eval_df, model_type="SVD", sim_type=None, user_b
|
|
905 |
mae = accuracy.mae(predictions)
|
906 |
mse = accuracy.mse(predictions)
|
907 |
|
908 |
-
|
|
|
909 |
perf = [mae, mse, rmse, fcp]
|
910 |
|
911 |
return algo, perf
|
912 |
|
913 |
-
def
|
914 |
-
|
915 |
-
label_dir = f"./data/labels/{model_name}"
|
916 |
-
n_label_files = len([name for name in os.listdir(os.path.join(module_dir, label_dir)) if os.path.isfile(os.path.join(module_dir, label_dir, name))])
|
917 |
-
|
918 |
-
all_rows = []
|
919 |
-
with open(os.path.join(module_dir, label_dir, f"{n_label_files}.pkl"), "rb") as f:
|
920 |
-
ratings = pickle.load(f)
|
921 |
-
|
922 |
-
labeled_df = format_labeled_data(ratings)
|
923 |
-
labeled_df = labeled_df[labeled_df["rating"] != -1]
|
924 |
-
|
925 |
-
# Iterate through batches of 5 labels
|
926 |
-
n_batches = int(np.ceil(len(labeled_df) / 5.))
|
927 |
-
for i in range(n_batches):
|
928 |
-
start = time.time()
|
929 |
-
n_to_sample = np.min([5 * (i + 1), len(labeled_df)])
|
930 |
-
cur_model, _, _, _ = train_user_model(ratings_df=labeled_df.head(n_to_sample))
|
931 |
-
mae, mse, rmse, avg_diff = users_perf(cur_model)
|
932 |
-
all_rows.append([n_to_sample, mae, "MAE"])
|
933 |
-
print(f"iter {i}: {time.time() - start}")
|
934 |
-
|
935 |
-
print("all_rows", all_rows)
|
936 |
-
|
937 |
-
df = pd.DataFrame(all_rows, columns=["n_to_sample", "perf", "metric"])
|
938 |
-
chart = alt.Chart(df).mark_line(point=True).encode(
|
939 |
-
x=alt.X("n_to_sample:Q", title="Number of Comments Labeled"),
|
940 |
-
y="perf",
|
941 |
-
color="metric",
|
942 |
-
tooltip=[
|
943 |
-
alt.Tooltip('n_to_sample:Q', title="Number of Comments Labeled"),
|
944 |
-
alt.Tooltip('metric:N', title="Metric"),
|
945 |
-
alt.Tooltip('perf:Q', title="Metric Value", format=".3f"),
|
946 |
-
],
|
947 |
-
).properties(
|
948 |
-
title=f"Performance over number of examples: {model_name}",
|
949 |
-
width=500,
|
950 |
-
)
|
951 |
-
return chart
|
952 |
-
|
953 |
-
def plot_train_perf_results(model_name, mae):
|
954 |
-
perf_dir = f"./data/perf/{model_name}"
|
955 |
-
n_perf_files = len([name for name in os.listdir(os.path.join(module_dir, perf_dir)) if os.path.isfile(os.path.join(module_dir, perf_dir, name))])
|
956 |
-
|
957 |
all_rows = []
|
958 |
-
for i in range(
|
959 |
-
|
|
|
960 |
mae, mse, rmse, avg_diff = pickle.load(f)
|
961 |
all_rows.append([i, mae, "Your MAE"])
|
962 |
|
@@ -975,24 +690,24 @@ def plot_train_perf_results(model_name, mae):
|
|
975 |
width=500,
|
976 |
)
|
977 |
|
978 |
-
|
979 |
-
|
980 |
-
|
981 |
|
982 |
plot_dim_width = 500
|
983 |
domain_min = 0.0
|
984 |
-
domain_max =
|
985 |
bkgd = alt.Chart(pd.DataFrame({
|
986 |
-
"start": [
|
987 |
-
"stop": [domain_max,
|
988 |
-
"bkgd": ["Needs improvement
|
989 |
})).mark_rect(opacity=0.2).encode(
|
990 |
-
y=alt.Y("start:Q", scale=alt.Scale(domain=[0, domain_max])),
|
991 |
-
y2=alt.Y2("stop:Q"),
|
992 |
x=alt.value(0),
|
993 |
x2=alt.value(plot_dim_width),
|
994 |
color=alt.Color("bkgd:O", scale=alt.Scale(
|
995 |
-
domain=["Needs improvement
|
996 |
range=["red", "yellow", "green"]),
|
997 |
title="How good is your MAE?"
|
998 |
)
|
@@ -1000,12 +715,12 @@ def plot_train_perf_results(model_name, mae):
|
|
1000 |
|
1001 |
plot = (bkgd + chart).properties(width=plot_dim_width).resolve_scale(color='independent')
|
1002 |
mae_status = None
|
1003 |
-
if mae <
|
1004 |
-
mae_status = "Your MAE is in the <b>Good</b> range
|
1005 |
-
elif mae <
|
1006 |
-
mae_status = "Your MAE is in the <b>Okay</b> range
|
1007 |
else:
|
1008 |
-
mae_status = "Your MAE is in the <b>Needs improvement</b> range
|
1009 |
return plot, mae_status
|
1010 |
|
1011 |
########################################
|
@@ -1104,14 +819,14 @@ def get_decision(rating, threshold):
|
|
1104 |
|
1105 |
def get_category(row, threshold=0.3):
|
1106 |
k_to_category = {
|
1107 |
-
"
|
1108 |
-
"
|
1109 |
-
"
|
1110 |
-
"
|
1111 |
-
"
|
1112 |
}
|
1113 |
categories = []
|
1114 |
-
for k in ["
|
1115 |
if row[k] > threshold:
|
1116 |
categories.append(k_to_category[k])
|
1117 |
|
@@ -1124,19 +839,20 @@ def get_comment_url(row):
|
|
1124 |
return f"#{row['item_id']}/#comment"
|
1125 |
|
1126 |
def get_topic_url(row):
|
1127 |
-
return f"#{row['
|
1128 |
|
1129 |
-
|
|
|
1130 |
df = preds_df.copy().reset_index()
|
1131 |
|
1132 |
if n_topics is not None:
|
1133 |
-
df = df[df["
|
1134 |
|
1135 |
df["vis_pred_bin"], out_bins = pd.cut(df["pred"], bins, labels=VIS_BINS_LABELS, retbins=True)
|
1136 |
-
df = df[df["user_id"] ==
|
1137 |
-
df["system_label"] = [("toxic" if r > threshold else "non-toxic") for r in df[
|
1138 |
-
df["threshold"] = [threshold for r in df[
|
1139 |
-
df["key"] = [get_key(sys, user, threshold) for sys, user in zip(df[
|
1140 |
df["url"] = df.apply(lambda row: get_topic_url(row), axis=1)
|
1141 |
|
1142 |
# Plot sizing
|
@@ -1154,12 +870,12 @@ def plot_overall_vis(preds_df, error_type, cur_user, cur_model, n_topics=None, b
|
|
1154 |
# Main chart
|
1155 |
chart = alt.Chart(df).mark_square(opacity=0.8, size=mark_size, stroke="grey", strokeWidth=0.5).transform_window(
|
1156 |
groupby=['vis_pred_bin'],
|
1157 |
-
sort=[{'field':
|
1158 |
id='row_number()',
|
1159 |
ignorePeers=True,
|
1160 |
).encode(
|
1161 |
x=alt.X('vis_pred_bin:Q', title="Our prediction of your rating", scale=alt.Scale(domain=(domain_min, domain_max))),
|
1162 |
-
y=alt.Y('id:O', title="
|
1163 |
color = alt.Color("key:O", scale=alt.Scale(
|
1164 |
domain=["System agrees: Non-toxic", "System agrees: Toxic", "System differs: Error > 1.5", "System differs: Error > 1.0", "System differs: Error > 0.5", "System differs: Error <=0.5"],
|
1165 |
range=["white", "#cbcbcb", "red", "#ff7a5c", "#ffa894", "#ffd1c7"]),
|
@@ -1167,9 +883,9 @@ def plot_overall_vis(preds_df, error_type, cur_user, cur_model, n_topics=None, b
|
|
1167 |
),
|
1168 |
href="url:N",
|
1169 |
tooltip = [
|
1170 |
-
alt.Tooltip("
|
1171 |
alt.Tooltip("system_label:N", title="System label"),
|
1172 |
-
alt.Tooltip("
|
1173 |
alt.Tooltip("pred:Q", title="Your rating", format=".2f")
|
1174 |
]
|
1175 |
)
|
@@ -1233,31 +949,17 @@ def plot_overall_vis(preds_df, error_type, cur_user, cur_model, n_topics=None, b
|
|
1233 |
)
|
1234 |
|
1235 |
plot = (bkgd + annotation + chart + rule).properties(height=(plot_dim_height), width=plot_dim_width).resolve_scale(color='independent').to_json()
|
1236 |
-
|
1237 |
-
# Save to file
|
1238 |
-
chart_dir = "./data/charts"
|
1239 |
-
chart_file = os.path.join(chart_dir, f"{cur_user}_{cur_model}.pkl")
|
1240 |
-
with open(chart_file, "w") as f:
|
1241 |
-
json.dump(plot, f)
|
1242 |
-
|
1243 |
return plot
|
1244 |
|
1245 |
-
|
1246 |
-
|
1247 |
-
|
1248 |
-
if use_model:
|
1249 |
-
return plot_overall_vis_cluster(preds_df_mod, error_type=error_type, n_comments=500, threshold=threshold)
|
1250 |
-
else:
|
1251 |
-
return plot_overall_vis_cluster2(preds_df_mod, error_type=error_type, n_comments=500, threshold=threshold)
|
1252 |
-
|
1253 |
-
def plot_overall_vis_cluster2(preds_df, error_type, n_comments=None, bins=VIS_BINS, threshold=TOXIC_THRESHOLD, bin_step=0.05):
|
1254 |
df = preds_df.copy().reset_index()
|
1255 |
|
1256 |
-
df["vis_pred_bin"], out_bins = pd.cut(df[
|
1257 |
-
df = df[df["user_id"] ==
|
1258 |
-
df["system_label"] = [("toxic" if r > threshold else "non-toxic") for r in df[
|
1259 |
-
df["key"] = [get_key_no_model(sys, threshold) for sys in df[
|
1260 |
-
print("len(df)", len(df)) # always 0 for some reason (from keyword search)
|
1261 |
df["category"] = df.apply(lambda row: get_category(row), axis=1)
|
1262 |
df["url"] = df.apply(lambda row: get_comment_url(row), axis=1)
|
1263 |
|
@@ -1279,7 +981,7 @@ def plot_overall_vis_cluster2(preds_df, error_type, n_comments=None, bins=VIS_BI
|
|
1279 |
# Main chart
|
1280 |
chart = alt.Chart(df).mark_square(opacity=0.8, size=mark_size, stroke="grey", strokeWidth=0.25).transform_window(
|
1281 |
groupby=['vis_pred_bin'],
|
1282 |
-
sort=[{'field':
|
1283 |
id='row_number()',
|
1284 |
ignorePeers=True
|
1285 |
).encode(
|
@@ -1293,8 +995,8 @@ def plot_overall_vis_cluster2(preds_df, error_type, n_comments=None, bins=VIS_BI
|
|
1293 |
),
|
1294 |
href="url:N",
|
1295 |
tooltip = [
|
1296 |
-
alt.Tooltip("
|
1297 |
-
alt.Tooltip("
|
1298 |
]
|
1299 |
)
|
1300 |
|
@@ -1345,24 +1047,22 @@ def plot_overall_vis_cluster2(preds_df, error_type, n_comments=None, bins=VIS_BI
|
|
1345 |
final_plot = (bkgd + annotation + chart + rule).properties(height=(plot_dim_height), width=plot_dim_width).resolve_scale(color='independent').to_json()
|
1346 |
|
1347 |
return final_plot, df
|
1348 |
-
|
1349 |
-
|
1350 |
-
|
1351 |
-
|
1352 |
|
1353 |
df["vis_pred_bin"], out_bins = pd.cut(df["pred"], bins, labels=VIS_BINS_LABELS, retbins=True)
|
1354 |
-
df = df[df["user_id"] ==
|
1355 |
-
df["system_label"] = [("toxic" if r > threshold else "non-toxic") for r in df[
|
1356 |
-
df["key"] = [get_key(sys, user, threshold) for sys, user in zip(df[
|
1357 |
-
print("len(df)", len(df)) # always 0 for some reason (from keyword search)
|
1358 |
-
# print("columns", df.columns)
|
1359 |
df["category"] = df.apply(lambda row: get_category(row), axis=1)
|
1360 |
df["url"] = df.apply(lambda row: get_comment_url(row), axis=1)
|
1361 |
|
1362 |
if n_comments is not None:
|
1363 |
n_to_sample = np.min([n_comments, len(df)])
|
1364 |
df = df.sample(n=n_to_sample)
|
1365 |
-
|
1366 |
# Plot sizing
|
1367 |
domain_min = 0
|
1368 |
domain_max = 4
|
@@ -1377,7 +1077,7 @@ def plot_overall_vis_cluster(preds_df, error_type, n_comments=None, bins=VIS_BIN
|
|
1377 |
# Main chart
|
1378 |
chart = alt.Chart(df).mark_square(opacity=0.8, size=mark_size, stroke="grey", strokeWidth=0.25).transform_window(
|
1379 |
groupby=['vis_pred_bin'],
|
1380 |
-
sort=[{'field':
|
1381 |
id='row_number()',
|
1382 |
ignorePeers=True
|
1383 |
).encode(
|
@@ -1390,8 +1090,8 @@ def plot_overall_vis_cluster(preds_df, error_type, n_comments=None, bins=VIS_BIN
|
|
1390 |
),
|
1391 |
href="url:N",
|
1392 |
tooltip = [
|
1393 |
-
alt.Tooltip("
|
1394 |
-
alt.Tooltip("
|
1395 |
alt.Tooltip("pred:Q", title="Your rating", format=".2f"),
|
1396 |
alt.Tooltip("category:N", title="Potential toxicity categories")
|
1397 |
]
|
@@ -1457,30 +1157,27 @@ def plot_overall_vis_cluster(preds_df, error_type, n_comments=None, bins=VIS_BIN
|
|
1457 |
|
1458 |
return final_plot, df
|
1459 |
|
1460 |
-
def get_cluster_comments(df, error_type, threshold=TOXIC_THRESHOLD,
|
1461 |
df["user_color"] = [get_user_color(user, threshold) for user in df["pred"].tolist()] # get cell colors
|
1462 |
-
df["system_color"] = [get_user_color(sys, threshold) for sys in df[
|
1463 |
-
df["error_color"] = [get_system_color(sys, user, threshold) for sys, user in zip(df[
|
1464 |
-
df["error_type"] = [get_error_type(sys, user, threshold) for sys, user in zip(df[
|
1465 |
-
df["error_amt"] = [abs(sys - threshold) for sys in df[
|
1466 |
df["judgment"] = ["" for _ in range(len(df))] # template for "agree" or "disagree" buttons
|
1467 |
|
1468 |
if use_model:
|
1469 |
df = df.sort_values(by=["error_amt"], ascending=False) # surface largest errors first
|
1470 |
else:
|
1471 |
-
|
1472 |
-
|
|
|
1473 |
|
1474 |
df["id"] = df["item_id"]
|
1475 |
-
# df["comment"] already exists
|
1476 |
-
df["comment"] = df["comment_"]
|
1477 |
df["toxicity_category"] = df["category"]
|
1478 |
df["user_rating"] = df["pred"]
|
1479 |
df["user_decision"] = [get_decision(rating, threshold) for rating in df["pred"].tolist()]
|
1480 |
-
df["system_rating"] = df[
|
1481 |
-
df["system_decision"] = [get_decision(rating, threshold) for rating in df[
|
1482 |
-
df["error_type"] = df["error_type"]
|
1483 |
-
df = df.head(num_examples)
|
1484 |
df = df.round(decimals=2)
|
1485 |
|
1486 |
# Filter to specified error type
|
@@ -1493,7 +1190,7 @@ def get_cluster_comments(df, error_type, threshold=TOXIC_THRESHOLD, worker_id="A
|
|
1493 |
elif error_type == "Both":
|
1494 |
df = df[(df["error_type"] == "System may be under-sensitive") | (df["error_type"] == "System may be over-sensitive")]
|
1495 |
|
1496 |
-
return df
|
1497 |
|
1498 |
# PERSONALIZED CLUSTERS utils
|
1499 |
def get_disagreement_comments(preds_df, mode, n=10_000, threshold=TOXIC_THRESHOLD):
|
@@ -1512,58 +1209,10 @@ def get_disagreement_comments(preds_df, mode, n=10_000, threshold=TOXIC_THRESHOL
|
|
1512 |
df = df.sort_values(by=["diff"], ascending=asc)
|
1513 |
df = df.head(n)
|
1514 |
|
1515 |
-
return df["
|
1516 |
-
|
1517 |
-
def
|
1518 |
-
|
1519 |
-
|
1520 |
-
|
1521 |
-
|
1522 |
-
cluster_df = cluster_df.sort_values(by=["topic_id"])
|
1523 |
-
topics_under = cluster_df[cluster_df["error_type"] == "System may be under-sensitive"]["topic"].unique().tolist()
|
1524 |
-
topics_under = topics_under[1:(n + 1)]
|
1525 |
-
topics_over = cluster_df[cluster_df["error_type"] == "System may be over-sensitive"]["topic"].unique().tolist()
|
1526 |
-
topics_over = topics_over[1:(n + 1)]
|
1527 |
-
return topics_under, topics_over
|
1528 |
-
else:
|
1529 |
-
topics_under_top = []
|
1530 |
-
topics_over_top = []
|
1531 |
-
preds_df_file = f"./data/preds_dfs/{model}.pkl"
|
1532 |
-
if (os.path.isfile(preds_df_file)):
|
1533 |
-
with open(preds_df_file, "rb") as f:
|
1534 |
-
preds_df = pickle.load(f)
|
1535 |
-
preds_df_mod = preds_df.merge(comments_grouped_full_topic_cat, on="item_id", how="left", suffixes=('_', '_avg')).reset_index()
|
1536 |
-
preds_df_mod = preds_df_mod[preds_df_mod["user_id"] == "A"]
|
1537 |
-
|
1538 |
-
comments_under, comments_under_df = get_disagreement_comments(preds_df_mod, mode="under-sensitive", n=1000)
|
1539 |
-
if len(comments_under) > 0:
|
1540 |
-
topics_under = BERTopic(embedding_model="paraphrase-MiniLM-L6-v2").fit(comments_under)
|
1541 |
-
topics_under_top = topics_under.get_topic_info().head(n)["Name"].tolist()
|
1542 |
-
print("topics_under", topics_under_top)
|
1543 |
-
# Get topics per comment
|
1544 |
-
topics_assigned, _ = topics_under.transform(comments_under)
|
1545 |
-
comments_under_df["topic_id"] = topics_assigned
|
1546 |
-
cur_topic_ids = topics_under.get_topic_info().Topic
|
1547 |
-
topic_short_names = topics_under.get_topic_info().Name
|
1548 |
-
topic_ids_to_names = {cur_topic_ids[i]: topic_short_names[i] for i in range(len(cur_topic_ids))}
|
1549 |
-
comments_under_df["topic"] = [topic_ids_to_names[topic_id] for topic_id in comments_under_df["topic_id"].tolist()]
|
1550 |
-
|
1551 |
-
comments_over, comments_over_df = get_disagreement_comments(preds_df_mod, mode="over-sensitive", n=1000)
|
1552 |
-
if len(comments_over) > 0:
|
1553 |
-
topics_over = BERTopic(embedding_model="paraphrase-MiniLM-L6-v2").fit(comments_over)
|
1554 |
-
topics_over_top = topics_over.get_topic_info().head(n)["Name"].tolist()
|
1555 |
-
print("topics_over", topics_over_top)
|
1556 |
-
# Get topics per comment
|
1557 |
-
topics_assigned, _ = topics_over.transform(comments_over)
|
1558 |
-
comments_over_df["topic_id"] = topics_assigned
|
1559 |
-
cur_topic_ids = topics_over.get_topic_info().Topic
|
1560 |
-
topic_short_names = topics_over.get_topic_info().Name
|
1561 |
-
topic_ids_to_names = {cur_topic_ids[i]: topic_short_names[i] for i in range(len(cur_topic_ids))}
|
1562 |
-
comments_over_df["topic"] = [topic_ids_to_names[topic_id] for topic_id in comments_over_df["topic_id"].tolist()]
|
1563 |
-
|
1564 |
-
cluster_df = pd.concat([comments_under_df, comments_over_df])
|
1565 |
-
with open(f"./data/personal_cluster_dfs/{model}.pkl", "wb") as f:
|
1566 |
-
pickle.dump(cluster_df, f)
|
1567 |
-
|
1568 |
-
return topics_under_top, topics_over_top
|
1569 |
-
return [], []
|
|
|
23 |
from sentence_transformers import SentenceTransformer, util
|
24 |
import torch
|
25 |
from bertopic import BERTopic
|
26 |
+
from datetime import date
|
27 |
|
28 |
########################################
|
29 |
# PRE-LOADING
|
|
|
38 |
|
39 |
# Data-loading
|
40 |
module_dir = "./"
|
41 |
+
with open(os.path.join(module_dir, "data/input/ids_to_comments.pkl"), "rb") as f:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
ids_to_comments = pickle.load(f)
|
43 |
+
with open(os.path.join(module_dir, "data/input/comments_to_ids.pkl"), "rb") as f:
|
44 |
comments_to_ids = pickle.load(f)
|
45 |
+
system_preds_df = pd.read_pickle("data/input/system_preds_df.pkl")
|
46 |
+
sys_eval_df = pd.read_pickle(os.path.join(module_dir, "data/input/split_data/sys_eval_df.pkl"))
|
47 |
+
train_df = pd.read_pickle(os.path.join(module_dir, "data/input/split_data/train_df.pkl"))
|
|
|
|
|
48 |
train_df_ids = train_df["item_id"].unique().tolist()
|
49 |
+
model_eval_df = pd.read_pickle(os.path.join(module_dir, "data/input/split_data/model_eval_df.pkl"))
|
50 |
+
ratings_df_full = pd.read_pickle(os.path.join(module_dir, "data/input/ratings_df_full.pkl"))
|
51 |
+
worker_info_df = pd.read_pickle("./data/input/worker_info_df.pkl")
|
52 |
|
53 |
+
topic_ids = system_preds_df.topic_id
|
54 |
+
topics = system_preds_df.topic
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
topic_ids_to_topics = {topic_ids[i]: topics[i] for i in range(len(topic_ids))}
|
56 |
topics_to_topic_ids = {topics[i]: topic_ids[i] for i in range(len(topic_ids))}
|
57 |
+
unique_topics_ids = sorted(system_preds_df.topic_id.unique())
|
58 |
unique_topics = [topic_ids_to_topics[topic_id] for topic_id in range(len(topic_ids_to_topics) - 1)]
|
59 |
|
60 |
def get_toxic_threshold():
|
61 |
return TOXIC_THRESHOLD
|
62 |
|
63 |
+
def get_user_model_names(user):
|
64 |
+
# Fetch the user's models
|
65 |
+
output_dir = f"./data/output"
|
66 |
+
users = [name for name in os.listdir(output_dir) if os.path.isdir(os.path.join(output_dir, name))]
|
67 |
+
if user not in users:
|
68 |
+
# User does not exist
|
69 |
+
return []
|
70 |
else:
|
71 |
+
# Fetch trained model names for the user
|
72 |
+
user_dir = f"./data/output/{user}"
|
73 |
+
user_models = [name for name in os.listdir(user_dir) if os.path.isdir(os.path.join(user_dir, name))]
|
74 |
user_models.sort()
|
75 |
return user_models
|
76 |
|
|
|
78 |
return unique_topics
|
79 |
|
80 |
def get_large_clusters(min_n):
|
81 |
+
counts_df = system_preds_df.groupby(by=["topic_id"]).size().reset_index(name='counts')
|
82 |
counts_df = counts_df[counts_df["counts"] >= min_n]
|
83 |
return [topic_ids_to_topics[t_id] for t_id in sorted(counts_df["topic_id"].tolist()[1:])]
|
84 |
|
|
|
116 |
}
|
117 |
internal_to_readable = {v: k for k, v in readable_to_internal.items()}
|
118 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
|
120 |
+
########################################
|
121 |
+
# Data storage helper functions
|
122 |
+
# Set up all directories for new user
|
123 |
+
def setup_user_dirs(cur_user):
|
124 |
+
user_dir = f"./data/output/{cur_user}"
|
125 |
+
if not os.path.isdir(user_dir):
|
126 |
+
os.mkdir(user_dir)
|
127 |
+
def setup_model_dirs(cur_user, cur_model):
|
128 |
+
model_dir = f"./data/output/{cur_user}/{cur_model}"
|
129 |
+
if not os.path.isdir(model_dir):
|
130 |
+
os.mkdir(model_dir) # Set up model dir
|
131 |
+
# Set up subdirs
|
132 |
+
os.mkdir(os.path.join(model_dir, "labels"))
|
133 |
+
os.mkdir(os.path.join(model_dir, "perf"))
|
134 |
+
def setup_user_model_dirs(cur_user, cur_model):
|
135 |
+
setup_user_dirs(cur_user)
|
136 |
+
setup_model_dirs(cur_user, cur_model)
|
137 |
+
|
138 |
+
# Charts
|
139 |
+
def get_chart_file(cur_user, cur_model):
|
140 |
+
chart_dir = f"./data/output/{cur_user}/{cur_model}"
|
141 |
+
return os.path.join(chart_dir, f"chart_overall_vis.json")
|
142 |
+
|
143 |
+
# Labels
|
144 |
+
def get_label_dir(cur_user, cur_model):
|
145 |
+
return f"./data/output/{cur_user}/{cur_model}/labels"
|
146 |
+
def get_n_label_files(cur_user, cur_model):
|
147 |
+
label_dir = get_label_dir(cur_user, cur_model)
|
148 |
+
return len([name for name in os.listdir(label_dir) if os.path.isfile(os.path.join(label_dir, name))])
|
149 |
+
def get_label_file(cur_user, cur_model, label_i=None):
|
150 |
+
if label_i is None:
|
151 |
+
# Get index to add on to end of list
|
152 |
+
label_i = get_n_label_files(cur_user, cur_model)
|
153 |
+
label_dir = get_label_dir(cur_user, cur_model)
|
154 |
+
return os.path.join(label_dir, f"{label_i}.pkl")
|
155 |
+
|
156 |
+
# Performance
|
157 |
+
def get_perf_dir(cur_user, cur_model):
|
158 |
+
return f"./data/output/{cur_user}/{cur_model}/perf"
|
159 |
+
def get_n_perf_files(cur_user, cur_model):
|
160 |
+
perf_dir = get_perf_dir(cur_user, cur_model)
|
161 |
+
return len([name for name in os.listdir(perf_dir) if os.path.isfile(os.path.join(perf_dir, name))])
|
162 |
+
def get_perf_file(cur_user, cur_model, perf_i=None):
|
163 |
+
if perf_i is None:
|
164 |
+
# Get index to add on to end of list
|
165 |
+
perf_i = get_n_perf_files(cur_user, cur_model)
|
166 |
+
perf_dir = get_perf_dir(cur_user, cur_model)
|
167 |
+
return os.path.join(perf_dir, f"{perf_i}.pkl")
|
168 |
+
|
169 |
+
# Predictions dataframe
|
170 |
+
def get_preds_file(cur_user, cur_model):
|
171 |
+
preds_dir = f"./data/output/{cur_user}/{cur_model}"
|
172 |
+
return os.path.join(preds_dir, f"preds_df.pkl")
|
173 |
+
|
174 |
+
# Reports
|
175 |
+
def get_reports_file(cur_user, cur_model):
|
176 |
+
return f"./data/output/{cur_user}/{cur_model}/reports.json"
|
177 |
|
178 |
########################################
|
179 |
# General utils
|
|
|
203 |
|
204 |
########################################
|
205 |
# GET_AUDIT utils
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
206 |
def plot_metric_histogram(metric, user_metric, other_metric_vals, n_bins=10):
|
207 |
hist, bin_edges = np.histogram(other_metric_vals, bins=n_bins, density=False)
|
208 |
data = pd.DataFrame({
|
|
|
234 |
|
235 |
return (bar + rule).interactive()
|
236 |
|
237 |
+
# Generates the summary plot across all topics for the user
|
238 |
+
def show_overall_perf(cur_model, error_type, cur_user, threshold=TOXIC_THRESHOLD, topic_vis_method="median", use_cache=True):
|
239 |
+
# Your perf (calculate using model and testset)
|
240 |
+
preds_file = get_preds_file(cur_user, cur_model)
|
241 |
+
with open(preds_file, "rb") as f:
|
242 |
+
preds_df = pickle.load(f)
|
243 |
+
|
244 |
+
chart_file = get_chart_file(cur_user, cur_model)
|
245 |
+
if use_cache and os.path.isfile(chart_file):
|
246 |
+
# Read from file if it exists
|
247 |
+
with open(chart_file, "r") as f:
|
248 |
+
topic_overview_plot_json = json.load(f)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
249 |
else:
|
250 |
+
# Otherwise, generate chart and save to file
|
251 |
+
if topic_vis_method == "median": # Default
|
252 |
+
preds_df_grp = preds_df.groupby(["topic", "user_id"]).median()
|
253 |
+
elif topic_vis_method == "mean":
|
254 |
+
preds_df_grp = preds_df.groupby(["topic", "user_id"]).mean()
|
255 |
+
topic_overview_plot_json = plot_overall_vis(preds_df=preds_df_grp, n_topics=200, threshold=threshold, error_type=error_type, cur_user=cur_user, cur_model=cur_model)
|
256 |
+
# Save to file
|
257 |
+
with open(chart_file, "w") as f:
|
258 |
+
json.dump(topic_overview_plot_json, f)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
259 |
|
260 |
return {
|
261 |
"topic_overview_plot_json": json.loads(topic_overview_plot_json),
|
262 |
}
|
263 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
264 |
########################################
|
265 |
# GET_LABELING utils
|
266 |
+
def create_example_sets(n_label_per_bin, score_bins, keyword=None, topic=None):
|
267 |
# Restrict to the keyword, if provided
|
268 |
+
df = system_preds_df.copy()
|
269 |
if keyword != None:
|
270 |
df = df[df["comment"].str.contains(keyword)]
|
271 |
|
|
|
290 |
|
291 |
return ex_to_label
|
292 |
|
293 |
+
def get_grp_model_labels(n_label_per_bin, score_bins, grp_ids):
|
294 |
+
df = system_preds_df.copy()
|
295 |
|
296 |
train_df_grp = train_df[train_df["user_id"].isin(grp_ids)]
|
297 |
train_df_grp_avg = train_df_grp.groupby(by=["item_id"]).median().reset_index()
|
|
|
315 |
|
316 |
return ratings_grp
|
317 |
|
318 |
+
########################################
|
319 |
+
# SAVE_REPORT utils
|
320 |
+
|
321 |
+
# Convert the SEP field selection from the UI to the SEP enum value
|
322 |
+
def get_sep_enum(sep_selection):
|
323 |
+
if sep_selection == "Adversarial Example":
|
324 |
+
return "S0403: Adversarial Example"
|
325 |
+
elif sep_selection == "Accuracy":
|
326 |
+
return "P0204: Accuracy"
|
327 |
+
elif sep_selection == "Bias/Discrimination":
|
328 |
+
return "E0100: Bias/ Discrimination"
|
329 |
+
else:
|
330 |
+
return "P0200: Model issues"
|
331 |
+
|
332 |
+
# Format the description for the report including the provided title, error type, and text entry field ("Summary/Suggestions" text box)
|
333 |
+
def format_description(indie_label_json):
|
334 |
+
title = indie_label_json["title"]
|
335 |
+
error_type = indie_label_json["error_type"]
|
336 |
+
text_entry = indie_label_json["text_entry"]
|
337 |
+
return f"Title: {title}\nError Type: {error_type}\nSummary/Suggestions: {text_entry}"
|
338 |
+
|
339 |
+
# Convert indielabel json to AVID json format.
|
340 |
+
# See the AVID format in https://avidml.org/avidtools/reference/report
|
341 |
+
#
|
342 |
+
# Important mappings:
|
343 |
+
# IndieLabel Attribute AVID Attribute Example
|
344 |
+
# text_entry description "I think the Perspective API
|
345 |
+
# is too sensitive. Here are some examples."
|
346 |
+
# topic feature 0_shes_woman_lady_face
|
347 |
+
# persp_score model_score 0.94
|
348 |
+
# comment ori_input "She looks beautiful"
|
349 |
+
# user_rating personal_model_score 0.92
|
350 |
+
# user_decision user_decision "Non-toxic"
|
351 |
+
# Note that this is at the individual report level.
|
352 |
+
def convert_indie_label_json_to_avid_json(indie_label_json, cur_user, email, sep_selection):
|
353 |
+
|
354 |
+
# Setting up the structure with a dict to enable programmatic additions
|
355 |
+
avid_json_dict = {
|
356 |
+
"data_type": "AVID",
|
357 |
+
"data_version": None,
|
358 |
+
"metadata": None,
|
359 |
+
"affects": {
|
360 |
+
"developer": [],
|
361 |
+
"deployer": [
|
362 |
+
"Hugging Face"
|
363 |
+
],
|
364 |
+
# TODO: Make artifacts malleable during modularity work
|
365 |
+
"artifacts": [
|
366 |
+
{
|
367 |
+
"type": "Model",
|
368 |
+
"name": "Perspective API"
|
369 |
+
}
|
370 |
+
]
|
371 |
+
},
|
372 |
+
"problemtype": {
|
373 |
+
"classof": "Undefined", # I don't think any of the other ClassEnums are applicable. Link: https://avidml.org/avidtools/_modules/avidtools/datamodels/enums#ClassEnum
|
374 |
+
"type": "Detection",
|
375 |
+
"description": {
|
376 |
+
"lang": "eng", # TODO: Make language selectable
|
377 |
+
"value": "This report contains results from an end user audit conducted on Hugging Face."
|
378 |
+
}
|
379 |
+
},
|
380 |
+
"metrics": [ # Note: For the end users use case, I made each comment an example.
|
381 |
+
],
|
382 |
+
"references": [],
|
383 |
+
"description": {
|
384 |
+
"lang": "eng", # TODO: Make language selectable
|
385 |
+
"value": "" # Leaving empty so the report comments can be contained here.
|
386 |
+
},
|
387 |
+
"impact": {
|
388 |
+
"avid": {
|
389 |
+
"risk_domain": [
|
390 |
+
"Ethics"
|
391 |
+
],
|
392 |
+
"sep_view": [
|
393 |
+
"E0101: Group fairness"
|
394 |
+
],
|
395 |
+
"lifecycle_view": [
|
396 |
+
"L05: Evaluation"
|
397 |
+
],
|
398 |
+
"taxonomy_version": "0.2"
|
399 |
+
}
|
400 |
+
},
|
401 |
+
"credit": "", # Leaving empty so that credit can be assigned
|
402 |
+
"reported_date": "" # Leaving empty so that it can be dynamically filled in
|
403 |
+
}
|
404 |
+
|
405 |
+
avid_json_dict["description"] = format_description(indie_label_json)
|
406 |
+
avid_json_dict["reported_date"] = str(date.today())
|
407 |
+
# Assign credit to email if provided, otherwise default to randomly assigned username
|
408 |
+
if email != "":
|
409 |
+
avid_json_dict["credit"] = email
|
410 |
+
else:
|
411 |
+
avid_json_dict["credit"] = cur_user
|
412 |
+
|
413 |
+
sep_enum = get_sep_enum(sep_selection)
|
414 |
+
avid_json_dict["impact"]["avid"]["sep_view"] = [sep_enum]
|
415 |
+
|
416 |
+
for e in indie_label_json["evidence"]:
|
417 |
+
curr_metric = {}
|
418 |
+
curr_metric["name"] = "Perspective API"
|
419 |
+
curr_metric["detection_method"] = {
|
420 |
+
"type": "Detection",
|
421 |
+
"name": "Individual Example from End User Audit"
|
422 |
+
}
|
423 |
+
res_dict = {}
|
424 |
+
res_dict["feature"] = e["topic"]
|
425 |
+
res_dict["model_score"] = str(e["persp_score"]) # Converted to string to avoid Float type error with DB
|
426 |
+
res_dict["ori_input"] = e["comment"]
|
427 |
+
res_dict["personal_model_score"] = str(e["user_rating"]) # See above
|
428 |
+
res_dict["user_decision"] = e["user_decision"]
|
429 |
+
curr_metric["results"] = res_dict
|
430 |
+
avid_json_dict["metrics"].append(curr_metric)
|
431 |
+
|
432 |
+
new_report = json.dumps(avid_json_dict)
|
433 |
+
return new_report
|
434 |
+
|
435 |
########################################
|
436 |
# GET_PERSONALIZED_MODEL utils
|
437 |
+
def fetch_existing_data(user, model_name):
|
438 |
# Check if we have cached model performance
|
439 |
+
n_perf_files = get_n_perf_files(user, model_name)
|
440 |
+
if n_perf_files > 0:
|
|
|
441 |
# Fetch cached results
|
442 |
+
perf_file = get_perf_file(user, model_name, n_perf_files - 1) # Get last performance file
|
443 |
+
with open(perf_file, "rb") as f:
|
444 |
mae, mse, rmse, avg_diff = pickle.load(f)
|
445 |
else:
|
446 |
+
raise Exception(f"Model {model_name} does not exist")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
447 |
|
448 |
# Fetch previous user-provided labels
|
449 |
ratings_prev = None
|
450 |
+
n_label_files = get_n_label_files(user, model_name)
|
451 |
+
if n_label_files > 0:
|
452 |
+
label_file = get_label_file(user, model_name, n_label_files - 1) # Get last label file
|
453 |
+
with open(label_file, "rb") as f:
|
454 |
ratings_prev = pickle.load(f)
|
455 |
return mae, mse, rmse, avg_diff, ratings_prev
|
456 |
|
457 |
+
# Main function called by server's `get_personalized_model` endpoint
|
458 |
+
# Trains an updated model with the specified name, user, and ratings
|
459 |
+
# Saves ratings, performance metrics, and pre-computed predictions to files
|
460 |
+
# - model_name: name of the model to train
|
461 |
+
# - ratings: dictionary of comments to ratings
|
462 |
+
# - user: user name
|
463 |
+
# - top_n: number of comments to train on (used when a set was held out for original user study)
|
464 |
+
# - topic: topic to train on (used when tuning for a specific topic)
|
465 |
+
def train_updated_model(model_name, ratings, user, top_n=None, topic=None, debug=False):
|
466 |
# Check if there is previously-labeled data; if so, combine it with this data
|
467 |
+
labeled_df = format_labeled_data(ratings, worker_id=user) # Treat ratings as full batch of all ratings
|
|
|
|
|
468 |
ratings_prev = None
|
469 |
|
470 |
# Filter out rows with "unsure" (-1)
|
471 |
labeled_df = labeled_df[labeled_df["rating"] != -1]
|
472 |
|
473 |
# Filter to top N for user study
|
474 |
+
if (topic is None) and (top_n is not None):
|
475 |
+
labeled_df = labeled_df.head(top_n)
|
|
|
476 |
else:
|
477 |
# For topic tuning, need to fetch old labels
|
478 |
+
n_label_files = get_n_label_files(user, model_name)
|
479 |
+
if n_label_files > 0:
|
480 |
# Concatenate previous set of labels with this new batch of labels
|
481 |
+
label_file = get_label_file(user, model_name, n_label_files - 1) # Get last label file
|
482 |
+
with open(label_file, "rb") as f:
|
483 |
ratings_prev = pickle.load(f)
|
484 |
+
labeled_df_prev = format_labeled_data(ratings_prev, worker_id=user)
|
485 |
labeled_df_prev = labeled_df_prev[labeled_df_prev["rating"] != -1]
|
486 |
ratings.update(ratings_prev) # append old ratings to ratings
|
487 |
labeled_df = pd.concat([labeled_df_prev, labeled_df])
|
488 |
+
if debug:
|
489 |
+
print("len ratings for training:", len(labeled_df))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
490 |
# Save this batch of labels
|
491 |
+
label_file = get_label_file(user, model_name)
|
492 |
+
with open(label_file, "wb") as f:
|
493 |
pickle.dump(ratings, f)
|
494 |
|
495 |
+
# Train model
|
496 |
+
cur_model, _, _, _ = train_user_model(ratings_df=labeled_df)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
497 |
|
498 |
+
# Compute performance metrics
|
499 |
+
mae, mse, rmse, avg_diff = users_perf(cur_model, worker_id=user)
|
500 |
+
# Save performance metrics
|
501 |
+
perf_file = get_perf_file(user, model_name)
|
502 |
+
with open(perf_file, "wb") as f:
|
503 |
pickle.dump((mae, mse, rmse, avg_diff), f)
|
504 |
|
505 |
+
# Pre-compute predictions for full dataset
|
506 |
+
cur_preds_df = get_preds_df(cur_model, [user], sys_eval_df=ratings_df_full)
|
507 |
+
# Save pre-computed predictions
|
508 |
+
preds_file = get_preds_file(user, model_name)
|
509 |
+
with open(preds_file, "wb") as f:
|
510 |
+
pickle.dump(cur_preds_df, f)
|
511 |
+
|
512 |
+
# Replace cached summary plot if it exists
|
513 |
+
show_overall_perf(cur_model=model_name, error_type="Both", cur_user=user, use_cache=False)
|
514 |
+
|
515 |
ratings_prev = ratings
|
516 |
return mae, mse, rmse, avg_diff, ratings_prev
|
517 |
|
518 |
+
def format_labeled_data(ratings, worker_id):
|
519 |
all_rows = []
|
520 |
for comment, rating in ratings.items():
|
521 |
comment_id = comments_to_ids[comment]
|
|
|
525 |
df = pd.DataFrame(all_rows, columns=["user_id", "item_id", "rating"])
|
526 |
return df
|
527 |
|
528 |
+
def users_perf(model, worker_id, sys_eval_df=sys_eval_df):
|
529 |
# Load the full empty dataset
|
530 |
sys_eval_comment_ids = sys_eval_df.item_id.unique().tolist()
|
531 |
empty_ratings_rows = [[worker_id, c_id, 0] for c_id in sys_eval_comment_ids]
|
|
|
541 |
user_item_preds = get_predictions_by_user_and_item(predictions)
|
542 |
df["pred"] = df.apply(lambda row: user_item_preds[(row.user_id, row.item_id)] if (row.user_id, row.item_id) in user_item_preds else np.nan, axis=1)
|
543 |
|
544 |
+
df = df.merge(system_preds_df, on="item_id", how="left", suffixes=('', '_sys'))
|
545 |
df.dropna(subset = ["pred"], inplace=True)
|
546 |
+
df["rating"] = df.rating.astype("int32")
|
547 |
|
548 |
+
perf_metrics = get_overall_perf(df, worker_id) # mae, mse, rmse, avg_diff
|
549 |
return perf_metrics
|
550 |
|
551 |
def get_overall_perf(preds_df, user_id):
|
552 |
# Prepare dataset to calculate performance
|
553 |
+
y_pred = preds_df[preds_df["user_id"] == user_id].rating_sys.to_numpy() # system's prediction
|
554 |
+
y_true = preds_df[preds_df["user_id"] == user_id].pred.to_numpy() # user's (predicted) ground truth
|
555 |
|
556 |
# Get performance for user's model
|
557 |
mae = mean_absolute_error(y_true, y_pred)
|
|
|
567 |
user_item_preds[(uid, iid)] = est
|
568 |
return user_item_preds
|
569 |
|
570 |
+
# Pre-computes predictions for the provided model and specified users on the system-eval dataset
|
571 |
+
# - model: trained model
|
572 |
+
# - user_ids: list of user IDs to compute predictions for
|
573 |
+
# - sys_eval_df: dataframe of system eval labels (pre-computed)
|
574 |
+
def get_preds_df(model, user_ids, sys_eval_df=sys_eval_df, bins=BINS, debug=False):
|
575 |
# Prep dataframe for all predictions we'd like to request
|
576 |
start = time.time()
|
577 |
sys_eval_comment_ids = sys_eval_df.item_id.unique().tolist()
|
|
|
580 |
for user_id in user_ids:
|
581 |
empty_ratings_rows.extend([[user_id, c_id, 0] for c_id in sys_eval_comment_ids])
|
582 |
empty_ratings_df = pd.DataFrame(empty_ratings_rows, columns=["user_id", "item_id", "rating"])
|
583 |
+
if debug:
|
584 |
+
print("setup", time.time() - start)
|
585 |
|
586 |
# Evaluate model to get predictions
|
587 |
start = time.time()
|
|
|
589 |
eval_set_data = Dataset.load_from_df(empty_ratings_df, reader)
|
590 |
_, testset = train_test_split(eval_set_data, test_size=1.)
|
591 |
predictions = model.test(testset)
|
592 |
+
if debug:
|
593 |
+
print("train_test_split", time.time() - start)
|
594 |
|
595 |
# Update dataframe with predictions
|
596 |
start = time.time()
|
597 |
df = empty_ratings_df.copy() # user_id, item_id, rating
|
598 |
user_item_preds = get_predictions_by_user_and_item(predictions)
|
599 |
df["pred"] = df.apply(lambda row: user_item_preds[(row.user_id, row.item_id)] if (row.user_id, row.item_id) in user_item_preds else np.nan, axis=1)
|
600 |
+
df = df.merge(system_preds_df, on="item_id", how="left", suffixes=('', '_sys'))
|
601 |
df.dropna(subset = ["pred"], inplace=True)
|
602 |
+
df["rating"] = df.rating.astype("int32")
|
603 |
|
604 |
# Get binned predictions (based on user prediction)
|
605 |
df["prediction_bin"], out_bins = pd.cut(df["pred"], bins, labels=False, retbins=True)
|
|
|
607 |
|
608 |
return df
|
609 |
|
610 |
+
# Given the full set of ratings, trains the specified model type and evaluates on the model eval set
|
611 |
+
# - ratings_df: dataframe of all ratings
|
612 |
+
# - train_df: dataframe of training labels
|
613 |
+
# - model_eval_df: dataframe of model eval labels (validation set)
|
614 |
+
# - train_frac: fraction of ratings to use for training
|
615 |
def train_user_model(ratings_df, train_df=train_df, model_eval_df=model_eval_df, train_frac=0.75, model_type="SVD", sim_type=None, user_based=True):
|
616 |
# Sample from shuffled labeled dataframe and add batch to train set; specified set size to model_eval set
|
617 |
+
labeled = ratings_df.sample(frac=1) # Shuffle the data
|
618 |
batch_size = math.floor(len(labeled) * train_frac)
|
619 |
labeled_train = labeled[:batch_size]
|
620 |
labeled_model_eval = labeled[batch_size:]
|
|
|
627 |
|
628 |
return model, perf, labeled_train, labeled_model_eval
|
629 |
|
630 |
+
# Given a set of labels split into training and validation (model_eval), trains the specified model type on the training labels and evaluates on the model_eval labels
|
631 |
+
# - train_df: dataframe of training labels
|
632 |
+
# - model_eval_df: dataframe of model eval labels (validation set)
|
633 |
+
# - model_type: type of model to train
|
634 |
+
def train_model(train_df, model_eval_df, model_type="SVD", sim_type=None, user_based=True, debug=False):
|
635 |
# Train model
|
636 |
reader = Reader(rating_scale=(0, 4))
|
637 |
train_data = Dataset.load_from_df(train_df, reader)
|
|
|
660 |
mae = accuracy.mae(predictions)
|
661 |
mse = accuracy.mse(predictions)
|
662 |
|
663 |
+
if debug:
|
664 |
+
print(f"MAE: {mae}, MSE: {mse}, RMSE: {rmse}, FCP: {fcp}")
|
665 |
perf = [mae, mse, rmse, fcp]
|
666 |
|
667 |
return algo, perf
|
668 |
|
669 |
+
def plot_train_perf_results(user, model_name, mae):
|
670 |
+
n_perf_files = get_n_perf_files(user, model_name)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
671 |
all_rows = []
|
672 |
+
for i in range(n_perf_files):
|
673 |
+
perf_file = get_perf_file(user, model_name, i)
|
674 |
+
with open(perf_file, "rb") as f:
|
675 |
mae, mse, rmse, avg_diff = pickle.load(f)
|
676 |
all_rows.append([i, mae, "Your MAE"])
|
677 |
|
|
|
690 |
width=500,
|
691 |
)
|
692 |
|
693 |
+
# Manually set for now
|
694 |
+
mae_good = 1.0
|
695 |
+
mae_okay = 1.2
|
696 |
|
697 |
plot_dim_width = 500
|
698 |
domain_min = 0.0
|
699 |
+
domain_max = 2.0
|
700 |
bkgd = alt.Chart(pd.DataFrame({
|
701 |
+
"start": [mae_okay, mae_good, domain_min],
|
702 |
+
"stop": [domain_max, mae_okay, mae_good],
|
703 |
+
"bkgd": ["Needs improvement", "Okay", "Good"],
|
704 |
})).mark_rect(opacity=0.2).encode(
|
705 |
+
y=alt.Y("start:Q", scale=alt.Scale(domain=[0, domain_max]), title=""),
|
706 |
+
y2=alt.Y2("stop:Q", title="Performance (MAE)"),
|
707 |
x=alt.value(0),
|
708 |
x2=alt.value(plot_dim_width),
|
709 |
color=alt.Color("bkgd:O", scale=alt.Scale(
|
710 |
+
domain=["Needs improvement", "Okay", "Good"],
|
711 |
range=["red", "yellow", "green"]),
|
712 |
title="How good is your MAE?"
|
713 |
)
|
|
|
715 |
|
716 |
plot = (bkgd + chart).properties(width=plot_dim_width).resolve_scale(color='independent')
|
717 |
mae_status = None
|
718 |
+
if mae < mae_good:
|
719 |
+
mae_status = "Your MAE is in the <b>Good</b> range. Your model looks ready to go."
|
720 |
+
elif mae < mae_okay:
|
721 |
+
mae_status = "Your MAE is in the <b>Okay</b> range. Your model can be used, but you can provide additional labels to improve it."
|
722 |
else:
|
723 |
+
mae_status = "Your MAE is in the <b>Needs improvement</b> range. Your model may need additional labels to improve."
|
724 |
return plot, mae_status
|
725 |
|
726 |
########################################
|
|
|
819 |
|
820 |
def get_category(row, threshold=0.3):
|
821 |
k_to_category = {
|
822 |
+
"is_profane_frac": "Profanity",
|
823 |
+
"is_threat_frac": "Threat",
|
824 |
+
"is_identity_attack_frac": "Identity Attack",
|
825 |
+
"is_insult_frac": "Insult",
|
826 |
+
"is_sexual_harassment_frac": "Sexual Harassment",
|
827 |
}
|
828 |
categories = []
|
829 |
+
for k in ["is_profane_frac", "is_threat_frac", "is_identity_attack_frac", "is_insult_frac", "is_sexual_harassment_frac"]:
|
830 |
if row[k] > threshold:
|
831 |
categories.append(k_to_category[k])
|
832 |
|
|
|
839 |
return f"#{row['item_id']}/#comment"
|
840 |
|
841 |
def get_topic_url(row):
|
842 |
+
return f"#{row['topic']}/#topic"
|
843 |
|
844 |
+
# Plots overall results histogram (each block is a topic)
|
845 |
+
def plot_overall_vis(preds_df, error_type, cur_user, cur_model, n_topics=None, bins=VIS_BINS, threshold=TOXIC_THRESHOLD, sys_col="rating_sys"):
|
846 |
df = preds_df.copy().reset_index()
|
847 |
|
848 |
if n_topics is not None:
|
849 |
+
df = df[df["topic_id"] < n_topics]
|
850 |
|
851 |
df["vis_pred_bin"], out_bins = pd.cut(df["pred"], bins, labels=VIS_BINS_LABELS, retbins=True)
|
852 |
+
df = df[df["user_id"] == cur_user].sort_values(by=["item_id"]).reset_index()
|
853 |
+
df["system_label"] = [("toxic" if r > threshold else "non-toxic") for r in df[sys_col].tolist()]
|
854 |
+
df["threshold"] = [threshold for r in df[sys_col].tolist()]
|
855 |
+
df["key"] = [get_key(sys, user, threshold) for sys, user in zip(df[sys_col].tolist(), df["pred"].tolist())]
|
856 |
df["url"] = df.apply(lambda row: get_topic_url(row), axis=1)
|
857 |
|
858 |
# Plot sizing
|
|
|
870 |
# Main chart
|
871 |
chart = alt.Chart(df).mark_square(opacity=0.8, size=mark_size, stroke="grey", strokeWidth=0.5).transform_window(
|
872 |
groupby=['vis_pred_bin'],
|
873 |
+
sort=[{'field': sys_col}],
|
874 |
id='row_number()',
|
875 |
ignorePeers=True,
|
876 |
).encode(
|
877 |
x=alt.X('vis_pred_bin:Q', title="Our prediction of your rating", scale=alt.Scale(domain=(domain_min, domain_max))),
|
878 |
+
y=alt.Y('id:O', title="Topics (ordered by System toxicity rating)", axis=alt.Axis(values=list(range(0, max_items, 5))), sort='descending'),
|
879 |
color = alt.Color("key:O", scale=alt.Scale(
|
880 |
domain=["System agrees: Non-toxic", "System agrees: Toxic", "System differs: Error > 1.5", "System differs: Error > 1.0", "System differs: Error > 0.5", "System differs: Error <=0.5"],
|
881 |
range=["white", "#cbcbcb", "red", "#ff7a5c", "#ffa894", "#ffd1c7"]),
|
|
|
883 |
),
|
884 |
href="url:N",
|
885 |
tooltip = [
|
886 |
+
alt.Tooltip("topic:N", title="Topic"),
|
887 |
alt.Tooltip("system_label:N", title="System label"),
|
888 |
+
alt.Tooltip(f"{sys_col}:Q", title="System rating", format=".2f"),
|
889 |
alt.Tooltip("pred:Q", title="Your rating", format=".2f")
|
890 |
]
|
891 |
)
|
|
|
949 |
)
|
950 |
|
951 |
plot = (bkgd + annotation + chart + rule).properties(height=(plot_dim_height), width=plot_dim_width).resolve_scale(color='independent').to_json()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
952 |
return plot
|
953 |
|
954 |
+
# Plots cluster results histogram (each block is a comment), but *without* a model
|
955 |
+
# as a point of reference (in contrast to plot_overall_vis_cluster)
|
956 |
+
def plot_overall_vis_cluster_no_model(cur_user, preds_df, n_comments=None, bins=VIS_BINS, threshold=TOXIC_THRESHOLD, sys_col="rating_sys"):
|
|
|
|
|
|
|
|
|
|
|
|
|
957 |
df = preds_df.copy().reset_index()
|
958 |
|
959 |
+
df["vis_pred_bin"], out_bins = pd.cut(df[sys_col], bins, labels=VIS_BINS_LABELS, retbins=True)
|
960 |
+
df = df[df["user_id"] == cur_user].sort_values(by=[sys_col]).reset_index()
|
961 |
+
df["system_label"] = [("toxic" if r > threshold else "non-toxic") for r in df[sys_col].tolist()]
|
962 |
+
df["key"] = [get_key_no_model(sys, threshold) for sys in df[sys_col].tolist()]
|
|
|
963 |
df["category"] = df.apply(lambda row: get_category(row), axis=1)
|
964 |
df["url"] = df.apply(lambda row: get_comment_url(row), axis=1)
|
965 |
|
|
|
981 |
# Main chart
|
982 |
chart = alt.Chart(df).mark_square(opacity=0.8, size=mark_size, stroke="grey", strokeWidth=0.25).transform_window(
|
983 |
groupby=['vis_pred_bin'],
|
984 |
+
sort=[{'field': sys_col}],
|
985 |
id='row_number()',
|
986 |
ignorePeers=True
|
987 |
).encode(
|
|
|
995 |
),
|
996 |
href="url:N",
|
997 |
tooltip = [
|
998 |
+
alt.Tooltip("comment:N", title="comment"),
|
999 |
+
alt.Tooltip(f"{sys_col}:Q", title="System rating", format=".2f"),
|
1000 |
]
|
1001 |
)
|
1002 |
|
|
|
1047 |
final_plot = (bkgd + annotation + chart + rule).properties(height=(plot_dim_height), width=plot_dim_width).resolve_scale(color='independent').to_json()
|
1048 |
|
1049 |
return final_plot, df
|
1050 |
+
|
1051 |
+
# Plots cluster results histogram (each block is a comment) *with* a model as a point of reference
|
1052 |
+
def plot_overall_vis_cluster(cur_user, preds_df, error_type, n_comments=None, bins=VIS_BINS, threshold=TOXIC_THRESHOLD, sys_col="rating_sys"):
|
1053 |
+
df = preds_df.copy().reset_index()
|
1054 |
|
1055 |
df["vis_pred_bin"], out_bins = pd.cut(df["pred"], bins, labels=VIS_BINS_LABELS, retbins=True)
|
1056 |
+
df = df[df["user_id"] == cur_user].sort_values(by=[sys_col]).reset_index(drop=True)
|
1057 |
+
df["system_label"] = [("toxic" if r > threshold else "non-toxic") for r in df[sys_col].tolist()]
|
1058 |
+
df["key"] = [get_key(sys, user, threshold) for sys, user in zip(df[sys_col].tolist(), df["pred"].tolist())]
|
|
|
|
|
1059 |
df["category"] = df.apply(lambda row: get_category(row), axis=1)
|
1060 |
df["url"] = df.apply(lambda row: get_comment_url(row), axis=1)
|
1061 |
|
1062 |
if n_comments is not None:
|
1063 |
n_to_sample = np.min([n_comments, len(df)])
|
1064 |
df = df.sample(n=n_to_sample)
|
1065 |
+
|
1066 |
# Plot sizing
|
1067 |
domain_min = 0
|
1068 |
domain_max = 4
|
|
|
1077 |
# Main chart
|
1078 |
chart = alt.Chart(df).mark_square(opacity=0.8, size=mark_size, stroke="grey", strokeWidth=0.25).transform_window(
|
1079 |
groupby=['vis_pred_bin'],
|
1080 |
+
sort=[{'field': sys_col}],
|
1081 |
id='row_number()',
|
1082 |
ignorePeers=True
|
1083 |
).encode(
|
|
|
1090 |
),
|
1091 |
href="url:N",
|
1092 |
tooltip = [
|
1093 |
+
alt.Tooltip("comment:N", title="comment"),
|
1094 |
+
alt.Tooltip(f"{sys_col}:Q", title="System rating", format=".2f"),
|
1095 |
alt.Tooltip("pred:Q", title="Your rating", format=".2f"),
|
1096 |
alt.Tooltip("category:N", title="Potential toxicity categories")
|
1097 |
]
|
|
|
1157 |
|
1158 |
return final_plot, df
|
1159 |
|
1160 |
+
def get_cluster_comments(df, error_type, threshold=TOXIC_THRESHOLD, sys_col="rating_sys", use_model=True, debug=False):
|
1161 |
df["user_color"] = [get_user_color(user, threshold) for user in df["pred"].tolist()] # get cell colors
|
1162 |
+
df["system_color"] = [get_user_color(sys, threshold) for sys in df[sys_col].tolist()] # get cell colors
|
1163 |
+
df["error_color"] = [get_system_color(sys, user, threshold) for sys, user in zip(df[sys_col].tolist(), df["pred"].tolist())] # get cell colors
|
1164 |
+
df["error_type"] = [get_error_type(sys, user, threshold) for sys, user in zip(df[sys_col].tolist(), df["pred"].tolist())] # get error type in words
|
1165 |
+
df["error_amt"] = [abs(sys - threshold) for sys in df[sys_col].tolist()] # get raw error
|
1166 |
df["judgment"] = ["" for _ in range(len(df))] # template for "agree" or "disagree" buttons
|
1167 |
|
1168 |
if use_model:
|
1169 |
df = df.sort_values(by=["error_amt"], ascending=False) # surface largest errors first
|
1170 |
else:
|
1171 |
+
if debug:
|
1172 |
+
print("get_cluster_comments; not using model")
|
1173 |
+
df = df.sort_values(by=[sys_col], ascending=True)
|
1174 |
|
1175 |
df["id"] = df["item_id"]
|
|
|
|
|
1176 |
df["toxicity_category"] = df["category"]
|
1177 |
df["user_rating"] = df["pred"]
|
1178 |
df["user_decision"] = [get_decision(rating, threshold) for rating in df["pred"].tolist()]
|
1179 |
+
df["system_rating"] = df[sys_col]
|
1180 |
+
df["system_decision"] = [get_decision(rating, threshold) for rating in df[sys_col].tolist()]
|
|
|
|
|
1181 |
df = df.round(decimals=2)
|
1182 |
|
1183 |
# Filter to specified error type
|
|
|
1190 |
elif error_type == "Both":
|
1191 |
df = df[(df["error_type"] == "System may be under-sensitive") | (df["error_type"] == "System may be over-sensitive")]
|
1192 |
|
1193 |
+
return df
|
1194 |
|
1195 |
# PERSONALIZED CLUSTERS utils
|
1196 |
def get_disagreement_comments(preds_df, mode, n=10_000, threshold=TOXIC_THRESHOLD):
|
|
|
1209 |
df = df.sort_values(by=["diff"], ascending=asc)
|
1210 |
df = df.head(n)
|
1211 |
|
1212 |
+
return df["comment"].tolist(), df
|
1213 |
+
|
1214 |
+
def get_explore_df(n_examples, threshold):
|
1215 |
+
df = system_preds_df.sample(n=n_examples)
|
1216 |
+
df["system_decision"] = [get_decision(rating, threshold) for rating in df["rating"].tolist()]
|
1217 |
+
df["system_color"] = [get_user_color(sys, threshold) for sys in df["rating"].tolist()] # get cell colors
|
1218 |
+
return df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
indie_label_svelte/public/global.css
CHANGED
@@ -2,6 +2,7 @@ html, body {
|
|
2 |
position: relative;
|
3 |
width: 100%;
|
4 |
height: 100%;
|
|
|
5 |
}
|
6 |
|
7 |
* {
|
@@ -88,13 +89,13 @@ h3 {
|
|
88 |
}
|
89 |
h5 {
|
90 |
color: rgb(80, 80, 80);
|
91 |
-
font-size:
|
92 |
}
|
93 |
h6 {
|
94 |
margin-top: 50px;
|
95 |
margin-bottom: 20px;
|
96 |
text-transform: uppercase;
|
97 |
-
font-size:
|
98 |
}
|
99 |
|
100 |
.head_3 {
|
@@ -107,7 +108,7 @@ h6 {
|
|
107 |
|
108 |
.head_5 {
|
109 |
color: rgb(80, 80, 80);
|
110 |
-
font-size:
|
111 |
font-weight: bold;
|
112 |
margin-top: 25px;
|
113 |
margin-bottom: 25px;
|
@@ -117,7 +118,7 @@ h6 {
|
|
117 |
color: rgb(80, 80, 80);
|
118 |
text-transform: uppercase;
|
119 |
font-weight: bold;
|
120 |
-
font-size:
|
121 |
margin-top: 25px;
|
122 |
margin-bottom: 25px;
|
123 |
}
|
@@ -125,7 +126,7 @@ h6 {
|
|
125 |
.head_6_non_cap {
|
126 |
color: rgb(80, 80, 80);
|
127 |
font-weight: bold;
|
128 |
-
font-size:
|
129 |
margin-top: 25px;
|
130 |
margin-bottom: 25px;
|
131 |
}
|
@@ -234,6 +235,10 @@ table {
|
|
234 |
overflow-y: scroll;
|
235 |
}
|
236 |
|
|
|
|
|
|
|
|
|
237 |
.tab_header {
|
238 |
position: fixed;
|
239 |
background-color: #e3d6fd;
|
@@ -242,6 +247,11 @@ table {
|
|
242 |
/* border-bottom: 1px solid grey; */
|
243 |
}
|
244 |
|
|
|
|
|
|
|
|
|
|
|
245 |
.label_table {
|
246 |
height: 750px;
|
247 |
overflow-y: scroll;
|
@@ -279,6 +289,7 @@ table {
|
|
279 |
|
280 |
.audit_section {
|
281 |
padding-top: 40px;
|
|
|
282 |
}
|
283 |
|
284 |
.comment_table_small {
|
|
|
2 |
position: relative;
|
3 |
width: 100%;
|
4 |
height: 100%;
|
5 |
+
font-size: 12px;
|
6 |
}
|
7 |
|
8 |
* {
|
|
|
89 |
}
|
90 |
h5 {
|
91 |
color: rgb(80, 80, 80);
|
92 |
+
font-size: 20px;
|
93 |
}
|
94 |
h6 {
|
95 |
margin-top: 50px;
|
96 |
margin-bottom: 20px;
|
97 |
text-transform: uppercase;
|
98 |
+
font-size: 16px;
|
99 |
}
|
100 |
|
101 |
.head_3 {
|
|
|
108 |
|
109 |
.head_5 {
|
110 |
color: rgb(80, 80, 80);
|
111 |
+
font-size: 20px;
|
112 |
font-weight: bold;
|
113 |
margin-top: 25px;
|
114 |
margin-bottom: 25px;
|
|
|
118 |
color: rgb(80, 80, 80);
|
119 |
text-transform: uppercase;
|
120 |
font-weight: bold;
|
121 |
+
font-size: 16px;
|
122 |
margin-top: 25px;
|
123 |
margin-bottom: 25px;
|
124 |
}
|
|
|
126 |
.head_6_non_cap {
|
127 |
color: rgb(80, 80, 80);
|
128 |
font-weight: bold;
|
129 |
+
font-size: 16px;
|
130 |
margin-top: 25px;
|
131 |
margin-bottom: 25px;
|
132 |
}
|
|
|
235 |
overflow-y: scroll;
|
236 |
}
|
237 |
|
238 |
+
.page_header {
|
239 |
+
height: 64px !important;
|
240 |
+
}
|
241 |
+
|
242 |
.tab_header {
|
243 |
position: fixed;
|
244 |
background-color: #e3d6fd;
|
|
|
247 |
/* border-bottom: 1px solid grey; */
|
248 |
}
|
249 |
|
250 |
+
.tab_header span {
|
251 |
+
font-size: 14px;
|
252 |
+
line-height: normal !important;
|
253 |
+
}
|
254 |
+
|
255 |
.label_table {
|
256 |
height: 750px;
|
257 |
overflow-y: scroll;
|
|
|
289 |
|
290 |
.audit_section {
|
291 |
padding-top: 40px;
|
292 |
+
width: 100%;
|
293 |
}
|
294 |
|
295 |
.comment_table_small {
|
indie_label_svelte/src/App.svelte
CHANGED
@@ -4,18 +4,10 @@
|
|
4 |
|
5 |
import HypothesisPanel from "./HypothesisPanel.svelte";
|
6 |
import MainPanel from "./MainPanel.svelte";
|
7 |
-
import SelectUserDialog from "./SelectUserDialog.svelte";
|
8 |
import Explore from "./Explore.svelte";
|
9 |
-
import Results from "./Results.svelte";
|
10 |
-
import StudyLinks from "./StudyLinks.svelte";
|
11 |
-
import { user } from './stores/cur_user_store.js';
|
12 |
-
import { users } from "./stores/all_users_store.js";
|
13 |
|
14 |
let personalized_model;
|
15 |
let personalized_models = [];
|
16 |
-
|
17 |
-
// let topic = "";
|
18 |
-
|
19 |
let error_type_options = ['Both', 'System is under-sensitive', 'System is over-sensitive', 'Show errors and non-errors'];
|
20 |
let error_type = error_type_options[0];
|
21 |
|
@@ -24,27 +16,6 @@
|
|
24 |
let mode = searchParams.get("mode");
|
25 |
let cur_user = searchParams.get("user");
|
26 |
|
27 |
-
// Set cur_user if it's provided in URL params
|
28 |
-
if (cur_user !== null) {
|
29 |
-
user.update((value) => cur_user);
|
30 |
-
}
|
31 |
-
|
32 |
-
// Handle user dialog
|
33 |
-
let user_dialog_open = false;
|
34 |
-
user.subscribe(value => {
|
35 |
-
cur_user = value;
|
36 |
-
});
|
37 |
-
|
38 |
-
// Handle all users
|
39 |
-
let all_users = [];
|
40 |
-
async function getUsers() {
|
41 |
-
const response = await fetch("./get_users");
|
42 |
-
const text = await response.text();
|
43 |
-
const data = JSON.parse(text);
|
44 |
-
all_users = data["users"];
|
45 |
-
users.update((value) => all_users);
|
46 |
-
}
|
47 |
-
|
48 |
function getAuditSettings() {
|
49 |
let req_params = {
|
50 |
user: cur_user,
|
@@ -55,27 +26,12 @@
|
|
55 |
.then(function (r_orig) {
|
56 |
let r = JSON.parse(r_orig);
|
57 |
personalized_models = r["personalized_models"];
|
58 |
-
personalized_model = personalized_models[0];
|
59 |
-
|
60 |
-
// personalized_model = "model_1632886687_iterA";
|
61 |
-
// let clusters = r["clusters"];
|
62 |
-
// topic = clusters[0]; // TEMP
|
63 |
});
|
64 |
-
|
65 |
-
// fetch("./audit_settings")
|
66 |
-
// .then((r) => r.text())
|
67 |
-
// .then(function (r_orig) {
|
68 |
-
// let r = JSON.parse(r_orig);
|
69 |
-
// personalized_models = r["personalized_models"];
|
70 |
-
// personalized_model = personalized_models[0]; // TEMP
|
71 |
-
// // personalized_model = "model_1632886687_iterA";
|
72 |
-
// let clusters = r["clusters"];
|
73 |
-
// topic = clusters[0]; // TEMP
|
74 |
-
// });
|
75 |
}
|
76 |
onMount(async () => {
|
77 |
getAuditSettings();
|
78 |
-
getUsers();
|
79 |
});
|
80 |
</script>
|
81 |
|
@@ -88,22 +44,13 @@
|
|
88 |
<div>
|
89 |
<Explore />
|
90 |
</div>
|
91 |
-
{:else if mode == "results"}
|
92 |
-
<div>
|
93 |
-
<Results />
|
94 |
-
</div>
|
95 |
-
{:else if mode == "study_links"}
|
96 |
-
<div>
|
97 |
-
<StudyLinks />
|
98 |
-
</div>
|
99 |
{:else }
|
100 |
-
<SelectUserDialog bind:open={user_dialog_open} cur_user={cur_user} />
|
101 |
<div>
|
102 |
{#key personalized_model }
|
103 |
-
<HypothesisPanel model={personalized_model}
|
104 |
{/key}
|
105 |
|
106 |
-
<MainPanel bind:model={personalized_model} bind:error_type={error_type} on:change />
|
107 |
</div>
|
108 |
{/if}
|
109 |
</main>
|
|
|
4 |
|
5 |
import HypothesisPanel from "./HypothesisPanel.svelte";
|
6 |
import MainPanel from "./MainPanel.svelte";
|
|
|
7 |
import Explore from "./Explore.svelte";
|
|
|
|
|
|
|
|
|
8 |
|
9 |
let personalized_model;
|
10 |
let personalized_models = [];
|
|
|
|
|
|
|
11 |
let error_type_options = ['Both', 'System is under-sensitive', 'System is over-sensitive', 'Show errors and non-errors'];
|
12 |
let error_type = error_type_options[0];
|
13 |
|
|
|
16 |
let mode = searchParams.get("mode");
|
17 |
let cur_user = searchParams.get("user");
|
18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
function getAuditSettings() {
|
20 |
let req_params = {
|
21 |
user: cur_user,
|
|
|
26 |
.then(function (r_orig) {
|
27 |
let r = JSON.parse(r_orig);
|
28 |
personalized_models = r["personalized_models"];
|
29 |
+
personalized_model = personalized_models[0];
|
30 |
+
cur_user = r["user"];
|
|
|
|
|
|
|
31 |
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
}
|
33 |
onMount(async () => {
|
34 |
getAuditSettings();
|
|
|
35 |
});
|
36 |
</script>
|
37 |
|
|
|
44 |
<div>
|
45 |
<Explore />
|
46 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
{:else }
|
|
|
48 |
<div>
|
49 |
{#key personalized_model }
|
50 |
+
<HypothesisPanel model={personalized_model} cur_user={cur_user}/>
|
51 |
{/key}
|
52 |
|
53 |
+
<MainPanel bind:model={personalized_model} bind:error_type={error_type} cur_user={cur_user} on:change />
|
54 |
</div>
|
55 |
{/if}
|
56 |
</main>
|
indie_label_svelte/src/AppOld.svelte
DELETED
@@ -1,127 +0,0 @@
|
|
1 |
-
<svelte:head>
|
2 |
-
<title>IndieLabel</title>
|
3 |
-
</svelte:head>
|
4 |
-
|
5 |
-
<script lang="ts">
|
6 |
-
import { onMount } from "svelte";
|
7 |
-
import Section from "./Section.svelte";
|
8 |
-
import IterativeClustering from "./IterativeClustering.svelte";
|
9 |
-
import OverallResults from "./OverallResults.svelte";
|
10 |
-
import Labeling from "./Labeling.svelte";
|
11 |
-
import HypothesisPanel from "./HypothesisPanel.svelte"
|
12 |
-
|
13 |
-
let personalized_model;
|
14 |
-
let personalized_models = [];
|
15 |
-
let breakdown_category;
|
16 |
-
let breakdown_categories = [];
|
17 |
-
let systems = ["Perspective comment toxicity classifier"]; // Only one system for now
|
18 |
-
let clusters = [];
|
19 |
-
let promise = Promise.resolve(null);
|
20 |
-
|
21 |
-
function getAuditSettings() {
|
22 |
-
fetch("./audit_settings")
|
23 |
-
.then((r) => r.text())
|
24 |
-
.then(function (r_orig) {
|
25 |
-
let r = JSON.parse(r_orig);
|
26 |
-
breakdown_categories = r["breakdown_categories"];
|
27 |
-
breakdown_category = breakdown_categories[0];
|
28 |
-
personalized_models = r["personalized_models"];
|
29 |
-
personalized_model = personalized_models[0];
|
30 |
-
clusters = r["clusters"];
|
31 |
-
});
|
32 |
-
}
|
33 |
-
onMount(async () => {
|
34 |
-
getAuditSettings();
|
35 |
-
});
|
36 |
-
|
37 |
-
function handleAuditButton() {
|
38 |
-
promise = getAudit();
|
39 |
-
}
|
40 |
-
|
41 |
-
async function getAudit() {
|
42 |
-
let req_params = {
|
43 |
-
pers_model: personalized_model,
|
44 |
-
breakdown_axis: breakdown_category,
|
45 |
-
perf_metric: "avg_diff",
|
46 |
-
breakdown_sort: "difference",
|
47 |
-
n_topics: 10,
|
48 |
-
};
|
49 |
-
let params = new URLSearchParams(req_params).toString();
|
50 |
-
const response = await fetch("./get_audit?" + params);
|
51 |
-
const text = await response.text();
|
52 |
-
const data = JSON.parse(text);
|
53 |
-
return data;
|
54 |
-
}
|
55 |
-
|
56 |
-
</script>
|
57 |
-
|
58 |
-
<main>
|
59 |
-
<HypothesisPanel model={personalized_model} />
|
60 |
-
|
61 |
-
<Labeling />
|
62 |
-
|
63 |
-
<IterativeClustering clusters={clusters} ind={1} personalized_model={personalized_model} />
|
64 |
-
|
65 |
-
<div id="audit-settings" class="section">
|
66 |
-
<h5>Audit settings</h5>
|
67 |
-
<Section
|
68 |
-
section_id="systems"
|
69 |
-
section_title="What status-quo system would you like to audit?"
|
70 |
-
section_opts={systems}
|
71 |
-
bind:value={systems[0]}
|
72 |
-
/>
|
73 |
-
<Section
|
74 |
-
section_id="personalized_model"
|
75 |
-
section_title="What model would you like to use to represent your views?"
|
76 |
-
section_opts={personalized_models}
|
77 |
-
bind:value={personalized_model}
|
78 |
-
/>
|
79 |
-
<Section
|
80 |
-
section_id="breakdown_category"
|
81 |
-
section_title="How would you like to explore the performance of the system?"
|
82 |
-
section_opts={breakdown_categories}
|
83 |
-
bind:value={breakdown_category}
|
84 |
-
/>
|
85 |
-
<button on:click={handleAuditButton}> Generate results </button>
|
86 |
-
<div>
|
87 |
-
Personalized model: {personalized_model}, Breakdown category: {breakdown_category}
|
88 |
-
</div>
|
89 |
-
</div>
|
90 |
-
|
91 |
-
{#await promise}
|
92 |
-
<p>...waiting</p>
|
93 |
-
{:then audit_results}
|
94 |
-
{#if audit_results}
|
95 |
-
<OverallResults data={audit_results} clusters={clusters} personalized_model={personalized_model} />
|
96 |
-
{/if}
|
97 |
-
{:catch error}
|
98 |
-
<p style="color: red">{error.message}</p>
|
99 |
-
{/await}
|
100 |
-
</main>
|
101 |
-
|
102 |
-
<style>
|
103 |
-
main {
|
104 |
-
text-align: left;
|
105 |
-
padding: 1em;
|
106 |
-
max-width: 240px;
|
107 |
-
margin: 0 0;
|
108 |
-
}
|
109 |
-
h3 {
|
110 |
-
color: rgb(80, 80, 80);
|
111 |
-
font-size: 30px;
|
112 |
-
}
|
113 |
-
h5 {
|
114 |
-
color: rgb(80, 80, 80);
|
115 |
-
font-size: 25px;
|
116 |
-
}
|
117 |
-
h6 {
|
118 |
-
margin-top: 50px;
|
119 |
-
text-transform: uppercase;
|
120 |
-
font-size: 14px;
|
121 |
-
}
|
122 |
-
@media (min-width: 640px) {
|
123 |
-
main {
|
124 |
-
max-width: none;
|
125 |
-
}
|
126 |
-
}
|
127 |
-
</style>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
indie_label_svelte/src/Auditing.svelte
CHANGED
@@ -7,7 +7,6 @@
|
|
7 |
import HelpTooltip from "./HelpTooltip.svelte";
|
8 |
import TopicTraining from "./TopicTraining.svelte";
|
9 |
|
10 |
-
import { user } from './stores/cur_user_store.js';
|
11 |
import { error_type } from './stores/error_type_store.js';
|
12 |
import { topic_chosen } from './stores/cur_topic_store.js';
|
13 |
import { model_chosen } from './stores/cur_model_store.js';
|
@@ -17,15 +16,13 @@
|
|
17 |
import LayoutGrid, { Cell } from "@smui/layout-grid";
|
18 |
import Radio from '@smui/radio';
|
19 |
import FormField from '@smui/form-field';
|
20 |
-
import Card, { Content } from '@smui/card';
|
21 |
import{ Wrapper } from '@smui/tooltip';
|
22 |
import IconButton from '@smui/icon-button';
|
23 |
-
import Select, { Option } from "@smui/select";
|
24 |
import Svelecte from '../node_modules/svelecte/src/Svelecte.svelte';
|
25 |
|
26 |
export let personalized_model;
|
27 |
-
// export let topic;
|
28 |
export let cur_error_type = "Both";
|
|
|
29 |
|
30 |
let evidence = [];
|
31 |
let show_audit_settings = false;
|
@@ -54,8 +51,6 @@
|
|
54 |
]
|
55 |
|
56 |
let personalized_models = [];
|
57 |
-
let breakdown_category;
|
58 |
-
let breakdown_categories = [];
|
59 |
let systems = ["YouSocial comment toxicity classifier"]; // Only one system for now
|
60 |
let clusters = [];
|
61 |
let clusters_for_tuning = []
|
@@ -75,7 +70,6 @@
|
|
75 |
let audit_type;
|
76 |
if (scaffold_method == "fixed" || scaffold_method == "personal" || scaffold_method == "personal_group" || scaffold_method == "personal_test" || scaffold_method == "personal_cluster" || scaffold_method == "topic_train" || scaffold_method == "prompts") {
|
77 |
audit_type = audit_types[1];
|
78 |
-
// audit_type = audit_types[0];
|
79 |
} else {
|
80 |
// No scaffolding mode or tutorial
|
81 |
audit_type = audit_types[0];
|
@@ -99,19 +93,8 @@
|
|
99 |
use_group_model = true;
|
100 |
}
|
101 |
|
102 |
-
// TEMP
|
103 |
let promise_cluster = Promise.resolve(null);
|
104 |
|
105 |
-
// Get current user from store
|
106 |
-
let cur_user;
|
107 |
-
user.subscribe(value => {
|
108 |
-
if (value != cur_user) {
|
109 |
-
cur_user = value;
|
110 |
-
personalized_model = "";
|
111 |
-
getAuditSettings();
|
112 |
-
}
|
113 |
-
});
|
114 |
-
|
115 |
// Get current topic from store
|
116 |
let topic;
|
117 |
topic_chosen.subscribe(value => {
|
@@ -126,8 +109,7 @@
|
|
126 |
if (!personalized_models.includes(personalized_model)) {
|
127 |
personalized_models.push(personalized_model);
|
128 |
}
|
129 |
-
|
130 |
-
handleClusterButton(); // re-render cluster results
|
131 |
});
|
132 |
|
133 |
// Save current error type
|
@@ -137,17 +119,13 @@
|
|
137 |
handleClusterButton();
|
138 |
}
|
139 |
|
140 |
-
// Handle topic-specific training
|
141 |
-
// let topic_training = null;
|
142 |
-
|
143 |
async function updateTopicChosen() {
|
144 |
if (topic != null) {
|
145 |
-
console.log("updateTopicChosen", topic)
|
146 |
topic_chosen.update((value) => topic);
|
147 |
}
|
148 |
}
|
149 |
|
150 |
-
function
|
151 |
let req_params = {
|
152 |
user: cur_user,
|
153 |
scaffold_method: scaffold_method,
|
@@ -157,8 +135,6 @@
|
|
157 |
.then((r) => r.text())
|
158 |
.then(function (r_orig) {
|
159 |
let r = JSON.parse(r_orig);
|
160 |
-
breakdown_categories = r["breakdown_categories"];
|
161 |
-
breakdown_category = breakdown_categories[0];
|
162 |
personalized_models = r["personalized_models"];
|
163 |
if (use_group_model) {
|
164 |
let personalized_model_grp = r["personalized_model_grp"];
|
@@ -170,26 +146,27 @@
|
|
170 |
model_chosen.update((value) => personalized_model);
|
171 |
clusters = r["clusters"];
|
172 |
clusters_for_tuning = r["clusters_for_tuning"];
|
173 |
-
console.log("clusters", clusters); // TEMP
|
174 |
topic = clusters[0]["options"][0]["text"];
|
175 |
topic_chosen.update((value) => topic);
|
176 |
-
handleAuditButton();
|
177 |
-
handleClusterButton();
|
178 |
});
|
179 |
}
|
180 |
onMount(async () => {
|
181 |
-
|
182 |
});
|
183 |
|
184 |
function handleAuditButton() {
|
185 |
model_chosen.update((value) => personalized_model);
|
186 |
-
|
|
|
|
|
|
|
187 |
}
|
188 |
|
189 |
-
async function getAudit() {
|
190 |
let req_params = {
|
191 |
-
pers_model:
|
192 |
-
breakdown_axis: breakdown_category,
|
193 |
perf_metric: "avg_diff",
|
194 |
breakdown_sort: "difference",
|
195 |
n_topics: 10,
|
@@ -205,23 +182,22 @@
|
|
205 |
}
|
206 |
|
207 |
function handleClusterButton() {
|
208 |
-
promise_cluster = getCluster();
|
209 |
}
|
210 |
|
211 |
-
async function getCluster() {
|
212 |
-
if (
|
213 |
return null;
|
214 |
}
|
215 |
let req_params = {
|
216 |
cluster: topic,
|
217 |
topic_df_ids: [],
|
218 |
-
|
219 |
-
pers_model:
|
220 |
example_sort: "descending", // TEMP
|
221 |
comparison_group: "status_quo", // TEMP
|
222 |
search_type: "cluster",
|
223 |
keyword: "",
|
224 |
-
n_neighbors: 0,
|
225 |
error_type: cur_error_type,
|
226 |
use_model: use_model,
|
227 |
scaffold_method: scaffold_method,
|
@@ -230,7 +206,6 @@
|
|
230 |
const response = await fetch("./get_cluster_results?" + params);
|
231 |
const text = await response.text();
|
232 |
const data = JSON.parse(text);
|
233 |
-
console.log(topic);
|
234 |
return data;
|
235 |
}
|
236 |
</script>
|
@@ -240,16 +215,13 @@
|
|
240 |
<div>
|
241 |
<div style="margin-top: 30px">
|
242 |
<span class="head_3">Auditing</span>
|
243 |
-
<IconButton
|
244 |
-
class="material-icons grey_button"
|
245 |
-
size="normal"
|
246 |
-
on:click={() => (show_audit_settings = !show_audit_settings)}
|
247 |
-
>
|
248 |
-
help_outline
|
249 |
-
</IconButton>
|
250 |
</div>
|
251 |
<div style="width: 80%">
|
|
|
252 |
<p>In this section, we'll be auditing the content moderation system. Here, you’ll be aided by a personalized model that will help direct your attention towards potential problem areas in the model’s performance. This model isn’t meant to be perfect, but is designed to help you better focus on areas that need human review.</p>
|
|
|
|
|
|
|
253 |
</div>
|
254 |
|
255 |
{#if show_audit_settings}
|
@@ -299,11 +271,14 @@
|
|
299 |
</LayoutGrid>
|
300 |
</div>
|
301 |
</div>
|
|
|
|
|
302 |
<p>Current model: {personalized_model}</p>
|
303 |
{/if}
|
304 |
</div>
|
305 |
|
306 |
<!-- 1: All topics overview -->
|
|
|
307 |
{#if audit_type == audit_types[0]}
|
308 |
<div class="audit_section">
|
309 |
<div class="head_5">Overview of all topics</div>
|
@@ -364,7 +339,7 @@
|
|
364 |
</li>
|
365 |
</ul>
|
366 |
{#key topic}
|
367 |
-
<TopicTraining topic={topic} />
|
368 |
{/key}
|
369 |
</div>
|
370 |
|
@@ -425,7 +400,7 @@
|
|
425 |
clusters={clusters}
|
426 |
model={personalized_model}
|
427 |
data={cluster_results}
|
428 |
-
table_width_pct={
|
429 |
table_id={"main"}
|
430 |
use_model={use_model}
|
431 |
bind:evidence={evidence}
|
@@ -447,7 +422,7 @@
|
|
447 |
<p>Next, you can optionally search for more comments to serve as evidence through manual keyword search (for individual words or phrases).</p>
|
448 |
<div class="section_indent">
|
449 |
{#key error_type}
|
450 |
-
<KeywordSearch clusters={clusters} personalized_model={personalized_model} bind:evidence={evidence} use_model={use_model} on:change/>
|
451 |
{/key}
|
452 |
</div>
|
453 |
</div>
|
@@ -457,7 +432,7 @@
|
|
457 |
<div class="head_5">Finalize your current report</div>
|
458 |
<p>Finally, review the report you've generated on the side panel and provide a brief summary of the problem you see. You may also list suggestions or insights into addressing this problem if you have ideas. This report will be directly used by the model developers to address the issue you've raised</p>
|
459 |
</div>
|
460 |
-
|
461 |
</div>
|
462 |
|
463 |
<style>
|
|
|
7 |
import HelpTooltip from "./HelpTooltip.svelte";
|
8 |
import TopicTraining from "./TopicTraining.svelte";
|
9 |
|
|
|
10 |
import { error_type } from './stores/error_type_store.js';
|
11 |
import { topic_chosen } from './stores/cur_topic_store.js';
|
12 |
import { model_chosen } from './stores/cur_model_store.js';
|
|
|
16 |
import LayoutGrid, { Cell } from "@smui/layout-grid";
|
17 |
import Radio from '@smui/radio';
|
18 |
import FormField from '@smui/form-field';
|
|
|
19 |
import{ Wrapper } from '@smui/tooltip';
|
20 |
import IconButton from '@smui/icon-button';
|
|
|
21 |
import Svelecte from '../node_modules/svelecte/src/Svelecte.svelte';
|
22 |
|
23 |
export let personalized_model;
|
|
|
24 |
export let cur_error_type = "Both";
|
25 |
+
export let cur_user;
|
26 |
|
27 |
let evidence = [];
|
28 |
let show_audit_settings = false;
|
|
|
51 |
]
|
52 |
|
53 |
let personalized_models = [];
|
|
|
|
|
54 |
let systems = ["YouSocial comment toxicity classifier"]; // Only one system for now
|
55 |
let clusters = [];
|
56 |
let clusters_for_tuning = []
|
|
|
70 |
let audit_type;
|
71 |
if (scaffold_method == "fixed" || scaffold_method == "personal" || scaffold_method == "personal_group" || scaffold_method == "personal_test" || scaffold_method == "personal_cluster" || scaffold_method == "topic_train" || scaffold_method == "prompts") {
|
72 |
audit_type = audit_types[1];
|
|
|
73 |
} else {
|
74 |
// No scaffolding mode or tutorial
|
75 |
audit_type = audit_types[0];
|
|
|
93 |
use_group_model = true;
|
94 |
}
|
95 |
|
|
|
96 |
let promise_cluster = Promise.resolve(null);
|
97 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
// Get current topic from store
|
99 |
let topic;
|
100 |
topic_chosen.subscribe(value => {
|
|
|
109 |
if (!personalized_models.includes(personalized_model)) {
|
110 |
personalized_models.push(personalized_model);
|
111 |
}
|
112 |
+
getAuditResults();
|
|
|
113 |
});
|
114 |
|
115 |
// Save current error type
|
|
|
119 |
handleClusterButton();
|
120 |
}
|
121 |
|
|
|
|
|
|
|
122 |
async function updateTopicChosen() {
|
123 |
if (topic != null) {
|
|
|
124 |
topic_chosen.update((value) => topic);
|
125 |
}
|
126 |
}
|
127 |
|
128 |
+
function getAuditResults() {
|
129 |
let req_params = {
|
130 |
user: cur_user,
|
131 |
scaffold_method: scaffold_method,
|
|
|
135 |
.then((r) => r.text())
|
136 |
.then(function (r_orig) {
|
137 |
let r = JSON.parse(r_orig);
|
|
|
|
|
138 |
personalized_models = r["personalized_models"];
|
139 |
if (use_group_model) {
|
140 |
let personalized_model_grp = r["personalized_model_grp"];
|
|
|
146 |
model_chosen.update((value) => personalized_model);
|
147 |
clusters = r["clusters"];
|
148 |
clusters_for_tuning = r["clusters_for_tuning"];
|
|
|
149 |
topic = clusters[0]["options"][0]["text"];
|
150 |
topic_chosen.update((value) => topic);
|
151 |
+
handleAuditButton();
|
152 |
+
handleClusterButton();
|
153 |
});
|
154 |
}
|
155 |
onMount(async () => {
|
156 |
+
getAuditResults();
|
157 |
});
|
158 |
|
159 |
function handleAuditButton() {
|
160 |
model_chosen.update((value) => personalized_model);
|
161 |
+
if (personalized_model == "" || personalized_model == undefined) {
|
162 |
+
return;
|
163 |
+
}
|
164 |
+
promise = getAudit(personalized_model);
|
165 |
}
|
166 |
|
167 |
+
async function getAudit(pers_model) {
|
168 |
let req_params = {
|
169 |
+
pers_model: pers_model,
|
|
|
170 |
perf_metric: "avg_diff",
|
171 |
breakdown_sort: "difference",
|
172 |
n_topics: 10,
|
|
|
182 |
}
|
183 |
|
184 |
function handleClusterButton() {
|
185 |
+
promise_cluster = getCluster(personalized_model);
|
186 |
}
|
187 |
|
188 |
+
async function getCluster(pers_model) {
|
189 |
+
if (pers_model == "" || pers_model == undefined) {
|
190 |
return null;
|
191 |
}
|
192 |
let req_params = {
|
193 |
cluster: topic,
|
194 |
topic_df_ids: [],
|
195 |
+
cur_user: cur_user,
|
196 |
+
pers_model: pers_model,
|
197 |
example_sort: "descending", // TEMP
|
198 |
comparison_group: "status_quo", // TEMP
|
199 |
search_type: "cluster",
|
200 |
keyword: "",
|
|
|
201 |
error_type: cur_error_type,
|
202 |
use_model: use_model,
|
203 |
scaffold_method: scaffold_method,
|
|
|
206 |
const response = await fetch("./get_cluster_results?" + params);
|
207 |
const text = await response.text();
|
208 |
const data = JSON.parse(text);
|
|
|
209 |
return data;
|
210 |
}
|
211 |
</script>
|
|
|
215 |
<div>
|
216 |
<div style="margin-top: 30px">
|
217 |
<span class="head_3">Auditing</span>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
218 |
</div>
|
219 |
<div style="width: 80%">
|
220 |
+
{#if personalized_model}
|
221 |
<p>In this section, we'll be auditing the content moderation system. Here, you’ll be aided by a personalized model that will help direct your attention towards potential problem areas in the model’s performance. This model isn’t meant to be perfect, but is designed to help you better focus on areas that need human review.</p>
|
222 |
+
{:else}
|
223 |
+
<p>Please first train your personalized model by following the steps in the "Labeling" tab (click the top left tab above).</p>
|
224 |
+
{/if}
|
225 |
</div>
|
226 |
|
227 |
{#if show_audit_settings}
|
|
|
271 |
</LayoutGrid>
|
272 |
</div>
|
273 |
</div>
|
274 |
+
{/if}
|
275 |
+
{#if personalized_model}
|
276 |
<p>Current model: {personalized_model}</p>
|
277 |
{/if}
|
278 |
</div>
|
279 |
|
280 |
<!-- 1: All topics overview -->
|
281 |
+
{#if personalized_model}
|
282 |
{#if audit_type == audit_types[0]}
|
283 |
<div class="audit_section">
|
284 |
<div class="head_5">Overview of all topics</div>
|
|
|
339 |
</li>
|
340 |
</ul>
|
341 |
{#key topic}
|
342 |
+
<TopicTraining topic={topic} cur_user={cur_user}/>
|
343 |
{/key}
|
344 |
</div>
|
345 |
|
|
|
400 |
clusters={clusters}
|
401 |
model={personalized_model}
|
402 |
data={cluster_results}
|
403 |
+
table_width_pct={100}
|
404 |
table_id={"main"}
|
405 |
use_model={use_model}
|
406 |
bind:evidence={evidence}
|
|
|
422 |
<p>Next, you can optionally search for more comments to serve as evidence through manual keyword search (for individual words or phrases).</p>
|
423 |
<div class="section_indent">
|
424 |
{#key error_type}
|
425 |
+
<KeywordSearch clusters={clusters} personalized_model={personalized_model} cur_user={cur_user} bind:evidence={evidence} use_model={use_model} on:change/>
|
426 |
{/key}
|
427 |
</div>
|
428 |
</div>
|
|
|
432 |
<div class="head_5">Finalize your current report</div>
|
433 |
<p>Finally, review the report you've generated on the side panel and provide a brief summary of the problem you see. You may also list suggestions or insights into addressing this problem if you have ideas. This report will be directly used by the model developers to address the issue you've raised</p>
|
434 |
</div>
|
435 |
+
{/if}
|
436 |
</div>
|
437 |
|
438 |
<style>
|
indie_label_svelte/src/ClusterResults.svelte
CHANGED
@@ -35,6 +35,7 @@
|
|
35 |
export let evidence;
|
36 |
export let table_id;
|
37 |
export let use_model = true;
|
|
|
38 |
|
39 |
let N_COMMENTS = 500;
|
40 |
let show_num_ratings = false;
|
@@ -54,12 +55,10 @@
|
|
54 |
//your code goes here on location change
|
55 |
let cur_url = window.location.href;
|
56 |
let cur_url_elems = cur_url.split("#");
|
57 |
-
// console.log(cur_url_elems)
|
58 |
if (cur_url_elems.length > 0) {
|
59 |
let path = cur_url_elems[2];
|
60 |
if (path == "comment") {
|
61 |
let comment_id = cur_url_elems[1].split("/")[0];
|
62 |
-
console.log("comment_id", comment_id)
|
63 |
selected_comment_id = parseInt(comment_id);
|
64 |
let table_ind = null;
|
65 |
for (let i = 0; i < items.length; i++) {
|
@@ -129,7 +128,6 @@
|
|
129 |
items = data["cluster_comments"];
|
130 |
set_length = items.length;
|
131 |
}
|
132 |
-
// console.log(set_length);
|
133 |
|
134 |
let cur_open_evidence;
|
135 |
open_evidence.subscribe(value => {
|
@@ -323,8 +321,10 @@
|
|
323 |
<Cell>Potential toxicity<br>categories</Cell>
|
324 |
{/if}
|
325 |
{/if}
|
326 |
-
|
|
|
327 |
<Cell>Do you agree<br>with the system?</Cell>
|
|
|
328 |
|
329 |
{#if !show_checkboxes}
|
330 |
<Cell>Remove</Cell>
|
@@ -396,7 +396,8 @@
|
|
396 |
</Cell>
|
397 |
{/if}
|
398 |
{/if}
|
399 |
-
|
|
|
400 |
<Cell>
|
401 |
<div>
|
402 |
<FormField>
|
@@ -417,6 +418,7 @@
|
|
417 |
</FormField>
|
418 |
</div>
|
419 |
</Cell>
|
|
|
420 |
|
421 |
{#if !show_checkboxes}
|
422 |
<Cell>
|
|
|
35 |
export let evidence;
|
36 |
export let table_id;
|
37 |
export let use_model = true;
|
38 |
+
export let show_agree_disagree = false;
|
39 |
|
40 |
let N_COMMENTS = 500;
|
41 |
let show_num_ratings = false;
|
|
|
55 |
//your code goes here on location change
|
56 |
let cur_url = window.location.href;
|
57 |
let cur_url_elems = cur_url.split("#");
|
|
|
58 |
if (cur_url_elems.length > 0) {
|
59 |
let path = cur_url_elems[2];
|
60 |
if (path == "comment") {
|
61 |
let comment_id = cur_url_elems[1].split("/")[0];
|
|
|
62 |
selected_comment_id = parseInt(comment_id);
|
63 |
let table_ind = null;
|
64 |
for (let i = 0; i < items.length; i++) {
|
|
|
128 |
items = data["cluster_comments"];
|
129 |
set_length = items.length;
|
130 |
}
|
|
|
131 |
|
132 |
let cur_open_evidence;
|
133 |
open_evidence.subscribe(value => {
|
|
|
321 |
<Cell>Potential toxicity<br>categories</Cell>
|
322 |
{/if}
|
323 |
{/if}
|
324 |
+
|
325 |
+
{#if show_agree_disagree}
|
326 |
<Cell>Do you agree<br>with the system?</Cell>
|
327 |
+
{/if}
|
328 |
|
329 |
{#if !show_checkboxes}
|
330 |
<Cell>Remove</Cell>
|
|
|
396 |
</Cell>
|
397 |
{/if}
|
398 |
{/if}
|
399 |
+
|
400 |
+
{#if show_agree_disagree}
|
401 |
<Cell>
|
402 |
<div>
|
403 |
<FormField>
|
|
|
418 |
</FormField>
|
419 |
</div>
|
420 |
</Cell>
|
421 |
+
{/if}
|
422 |
|
423 |
{#if !show_checkboxes}
|
424 |
<Cell>
|
indie_label_svelte/src/CommentTable.svelte
CHANGED
@@ -4,22 +4,18 @@
|
|
4 |
import Button, { Label } from "@smui/button";
|
5 |
import DataTable, { Head, Body, Row, Cell } from "@smui/data-table";
|
6 |
import LinearProgress from '@smui/linear-progress';
|
7 |
-
|
8 |
-
import {
|
9 |
|
10 |
export let mode;
|
11 |
export let model_name;
|
|
|
12 |
|
13 |
let to_label = {};
|
14 |
let promise = Promise.resolve(null);
|
15 |
let n_complete_ratings;
|
16 |
let n_unsure_ratings;
|
17 |
-
|
18 |
-
// Get current user
|
19 |
-
let cur_user;
|
20 |
-
user.subscribe(value => {
|
21 |
-
cur_user = value;
|
22 |
-
});
|
23 |
|
24 |
function getCommentsToLabel(cur_mode, n) {
|
25 |
if (cur_mode == "train") {
|
@@ -48,6 +44,7 @@
|
|
48 |
}
|
49 |
|
50 |
function handleTrainModelButton() {
|
|
|
51 |
promise = getModel("train");
|
52 |
}
|
53 |
|
@@ -91,11 +88,14 @@
|
|
91 |
user: cur_user,
|
92 |
};
|
93 |
let params = new URLSearchParams(req_params).toString();
|
94 |
-
const
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
|
|
|
|
|
|
99 |
return data;
|
100 |
}
|
101 |
</script>
|
@@ -221,12 +221,14 @@
|
|
221 |
{/key}
|
222 |
|
223 |
<div class="spacing_vert_40">
|
224 |
-
<Button on:click={handleTrainModelButton} variant="outlined"
|
225 |
<Label>Train Model</Label>
|
226 |
</Button>
|
|
|
227 |
<Button on:click={getCompleteRatings} variant="outlined">
|
228 |
<Label>Get Number of Comments Labeled</Label>
|
229 |
</Button>
|
|
|
230 |
<Button on:click={() => handleLoadCommentsButton(5)} variant="outlined">
|
231 |
<Label>Fetch More Comments To Label</Label>
|
232 |
</Button>
|
|
|
4 |
import Button, { Label } from "@smui/button";
|
5 |
import DataTable, { Head, Body, Row, Cell } from "@smui/data-table";
|
6 |
import LinearProgress from '@smui/linear-progress';
|
7 |
+
|
8 |
+
import { model_chosen } from './stores/cur_model_store.js';
|
9 |
|
10 |
export let mode;
|
11 |
export let model_name;
|
12 |
+
export let cur_user;
|
13 |
|
14 |
let to_label = {};
|
15 |
let promise = Promise.resolve(null);
|
16 |
let n_complete_ratings;
|
17 |
let n_unsure_ratings;
|
18 |
+
let show_comments_labeled_count = false;
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
function getCommentsToLabel(cur_mode, n) {
|
21 |
if (cur_mode == "train") {
|
|
|
44 |
}
|
45 |
|
46 |
function handleTrainModelButton() {
|
47 |
+
getCompleteRatings();
|
48 |
promise = getModel("train");
|
49 |
}
|
50 |
|
|
|
88 |
user: cur_user,
|
89 |
};
|
90 |
let params = new URLSearchParams(req_params).toString();
|
91 |
+
const data = await fetch("./get_personalized_model?" + params)
|
92 |
+
.then((r) => r.text())
|
93 |
+
.then(function (text) {
|
94 |
+
let data = JSON.parse(text);
|
95 |
+
to_label = data["ratings_prev"];
|
96 |
+
model_chosen.update((value) => model_name);
|
97 |
+
return data;
|
98 |
+
});
|
99 |
return data;
|
100 |
}
|
101 |
</script>
|
|
|
221 |
{/key}
|
222 |
|
223 |
<div class="spacing_vert_40">
|
224 |
+
<Button on:click={handleTrainModelButton} variant="outlined">
|
225 |
<Label>Train Model</Label>
|
226 |
</Button>
|
227 |
+
{#if show_comments_labeled_count}
|
228 |
<Button on:click={getCompleteRatings} variant="outlined">
|
229 |
<Label>Get Number of Comments Labeled</Label>
|
230 |
</Button>
|
231 |
+
{/if}
|
232 |
<Button on:click={() => handleLoadCommentsButton(5)} variant="outlined">
|
233 |
<Label>Fetch More Comments To Label</Label>
|
234 |
</Button>
|
indie_label_svelte/src/Explore.svelte
CHANGED
@@ -48,7 +48,6 @@
|
|
48 |
const text = await response.text();
|
49 |
const data = JSON.parse(text);
|
50 |
cur_examples = JSON.parse(data["examples"]);
|
51 |
-
console.log(cur_examples); // TEMP
|
52 |
return true;
|
53 |
}
|
54 |
</script>
|
|
|
48 |
const text = await response.text();
|
49 |
const data = JSON.parse(text);
|
50 |
cur_examples = JSON.parse(data["examples"]);
|
|
|
51 |
return true;
|
52 |
}
|
53 |
</script>
|
indie_label_svelte/src/Hunch.svelte
CHANGED
@@ -1,9 +1,7 @@
|
|
1 |
<script lang="ts">
|
2 |
import { onMount } from "svelte";
|
3 |
-
import IterativeClustering from "./IterativeClustering.svelte";
|
4 |
import Button, { Label } from "@smui/button";
|
5 |
import Textfield from '@smui/textfield';
|
6 |
-
import LinearProgress from "@smui/linear-progress";
|
7 |
|
8 |
export let ind;
|
9 |
export let hunch;
|
@@ -32,7 +30,6 @@
|
|
32 |
|
33 |
<div>
|
34 |
<div>
|
35 |
-
<!-- <h6>Hunch {ind + 1}</h6> -->
|
36 |
<h6>Topic:</h6>
|
37 |
{topic}
|
38 |
</div>
|
@@ -46,13 +43,6 @@
|
|
46 |
label="My current hunch is that..."
|
47 |
>
|
48 |
</Textfield>
|
49 |
-
<!-- <Button
|
50 |
-
on:click={handleTestOnExamples}
|
51 |
-
class="button_float_right spacing_vert"
|
52 |
-
variant="outlined"
|
53 |
-
>
|
54 |
-
<Label>Test on examples</Label>
|
55 |
-
</Button> -->
|
56 |
</div>
|
57 |
|
58 |
<div class="spacing_vert">
|
@@ -63,23 +53,7 @@
|
|
63 |
<Label>Submit</Label>
|
64 |
</Button>
|
65 |
</div>
|
66 |
-
|
67 |
-
<!-- {#await example_block}
|
68 |
-
<div class="app_loading">
|
69 |
-
<LinearProgress indeterminate />
|
70 |
-
</div>
|
71 |
-
{:then} -->
|
72 |
-
<!-- {#if example_block}
|
73 |
-
<IterativeClustering clusters={clusters} ind={ind + 1} personalized_model={model} />
|
74 |
-
{/if} -->
|
75 |
-
<!-- {:catch error}
|
76 |
-
<p style="color: red">{error.message}</p>
|
77 |
-
{/await} -->
|
78 |
</div>
|
79 |
|
80 |
<style>
|
81 |
-
/* * {
|
82 |
-
z-index: 11;
|
83 |
-
overflow-x: hidden;
|
84 |
-
} */
|
85 |
</style>
|
|
|
1 |
<script lang="ts">
|
2 |
import { onMount } from "svelte";
|
|
|
3 |
import Button, { Label } from "@smui/button";
|
4 |
import Textfield from '@smui/textfield';
|
|
|
5 |
|
6 |
export let ind;
|
7 |
export let hunch;
|
|
|
30 |
|
31 |
<div>
|
32 |
<div>
|
|
|
33 |
<h6>Topic:</h6>
|
34 |
{topic}
|
35 |
</div>
|
|
|
43 |
label="My current hunch is that..."
|
44 |
>
|
45 |
</Textfield>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
</div>
|
47 |
|
48 |
<div class="spacing_vert">
|
|
|
53 |
<Label>Submit</Label>
|
54 |
</Button>
|
55 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
</div>
|
57 |
|
58 |
<style>
|
|
|
|
|
|
|
|
|
59 |
</style>
|
indie_label_svelte/src/HypothesisPanel.svelte
CHANGED
@@ -1,12 +1,10 @@
|
|
1 |
<script lang="ts">
|
2 |
import { onMount } from "svelte";
|
3 |
import ClusterResults from "./ClusterResults.svelte";
|
4 |
-
import
|
5 |
|
6 |
import Button, { Label } from "@smui/button";
|
7 |
import Textfield from '@smui/textfield';
|
8 |
-
import { user } from './stores/cur_user_store.js';
|
9 |
-
import { error_type } from './stores/error_type_store.js';
|
10 |
import { new_evidence } from './stores/new_evidence_store.js';
|
11 |
import { open_evidence } from './stores/open_evidence_store.js';
|
12 |
import { topic_chosen } from './stores/cur_topic_store.js';
|
@@ -24,37 +22,27 @@
|
|
24 |
import Checkbox from '@smui/checkbox';
|
25 |
import FormField from '@smui/form-field';
|
26 |
import IconButton from "@smui/icon-button";
|
27 |
-
import{ Wrapper } from '@smui/tooltip';
|
28 |
import Radio from '@smui/radio';
|
29 |
-
import Switch from '@smui/switch';
|
30 |
|
31 |
export let model;
|
32 |
-
|
33 |
-
export let user_dialog_open;
|
34 |
|
35 |
let all_reports = [];
|
36 |
-
|
37 |
-
let cur_user;
|
38 |
-
user.subscribe(value => {
|
39 |
-
cur_user = value;
|
40 |
-
});
|
41 |
-
|
42 |
let cur_topic;
|
43 |
topic_chosen.subscribe(value => {
|
44 |
cur_topic = value;
|
45 |
});
|
|
|
|
|
46 |
|
47 |
// Handle routing
|
48 |
let searchParams = new URLSearchParams(window.location.search);
|
49 |
let scaffold_method = searchParams.get("scaffold");
|
|
|
|
|
|
|
50 |
let topic_vis_method = searchParams.get("topic_vis_method");
|
51 |
|
52 |
-
// TODO: connect to selected["error_type"] so changes on main panel affect report panel
|
53 |
-
// let cur_error_type;
|
54 |
-
// error_type.subscribe(value => {
|
55 |
-
// cur_error_type = value;
|
56 |
-
// });
|
57 |
-
|
58 |
// Handle drawer
|
59 |
let open = false;
|
60 |
let selected = null;
|
@@ -69,8 +57,6 @@
|
|
69 |
cur_open_evidence = selected["evidence"];
|
70 |
open_evidence.update((value) => cur_open_evidence);
|
71 |
let isolated_topic = selected["title"].replace(/^(Topic: )/,'');
|
72 |
-
console.log("selected title", selected["title"]);
|
73 |
-
console.log(selected);
|
74 |
|
75 |
// Close panel
|
76 |
open = false;
|
@@ -151,6 +137,7 @@
|
|
151 |
cur_user: cur_user,
|
152 |
reports: JSON.stringify(all_reports),
|
153 |
scaffold_method: scaffold_method,
|
|
|
154 |
};
|
155 |
let params = new URLSearchParams(req_params).toString();
|
156 |
const response = await fetch("./save_reports?" + params);
|
@@ -218,328 +205,309 @@
|
|
218 |
// Save current error type
|
219 |
async function updateErrorType() {
|
220 |
// Update error type on main page to be the selected error type
|
221 |
-
// error_type.update((value) => cur_error_type);
|
222 |
-
// selected["error_type"] = cur_error_type;
|
223 |
editErrorType = false;
|
224 |
}
|
225 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
226 |
</script>
|
227 |
|
228 |
-
<div
|
229 |
-
|
230 |
-
<
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
<div class="
|
239 |
-
<
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
>
|
245 |
-
|
246 |
-
<
|
247 |
-
|
248 |
-
{
|
249 |
-
|
250 |
-
{
|
251 |
-
|
252 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
253 |
</div>
|
254 |
</div>
|
255 |
-
</div>
|
256 |
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
{#
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
<
|
273 |
-
<
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
<
|
287 |
-
{report["title"]}
|
288 |
-
</PrimaryText>
|
289 |
-
<SecondaryText>
|
290 |
-
{report["error_type"]}
|
291 |
-
</SecondaryText>
|
292 |
-
</Text>
|
293 |
-
</Item>
|
294 |
-
{/each}
|
295 |
-
</List>
|
296 |
-
</Content>
|
297 |
-
</Drawer>
|
298 |
-
{/key}
|
299 |
-
<AppContent class="app-content">
|
300 |
-
<main class="main-content">
|
301 |
-
{#if selected}
|
302 |
-
<div class="head_6_highlight">
|
303 |
-
Current Report
|
304 |
-
</div>
|
305 |
-
<div class="panel_contents2">
|
306 |
-
<!-- Title -->
|
307 |
-
<div class="spacing_vert">
|
308 |
-
<div class="edit_button_row">
|
309 |
-
{#if editTitle}
|
310 |
-
<div class="edit_button_row_input">
|
311 |
-
<Textfield
|
312 |
-
bind:value={selected["title"]}
|
313 |
-
label="Your report title"
|
314 |
-
input$rows={4}
|
315 |
-
textarea
|
316 |
-
variant="outlined"
|
317 |
-
style="width: 100%;"
|
318 |
-
helperLine$style="width: 100%;"
|
319 |
-
/>
|
320 |
-
</div>
|
321 |
-
<div>
|
322 |
-
<IconButton class="material-icons grey_button" size="button" on:click={() => (editTitle = false)}>
|
323 |
-
check
|
324 |
-
</IconButton>
|
325 |
-
</div>
|
326 |
-
{:else}
|
327 |
-
{#if selected["title"] != ""}
|
328 |
-
<div class="head_5">
|
329 |
-
{selected["title"]}
|
330 |
-
</div>
|
331 |
{:else}
|
332 |
-
|
333 |
{/if}
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
342 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
343 |
|
344 |
-
|
345 |
-
|
346 |
-
|
347 |
-
|
|
|
|
|
|
|
348 |
</div>
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
|
|
|
|
|
|
|
|
|
|
355 |
<FormField>
|
356 |
<Radio bind:group={selected["error_type"]} value={e.opt} on:change={updateErrorType} color="secondary" />
|
357 |
<span slot="label">
|
358 |
-
{e.opt}
|
359 |
-
<IconButton class="material-icons" size="button" disabled>help_outline</IconButton>
|
360 |
</span>
|
361 |
</FormField>
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
</span>
|
370 |
-
</FormField>
|
371 |
</div>
|
372 |
-
{
|
373 |
-
|
374 |
-
|
375 |
-
|
376 |
-
check
|
377 |
-
</IconButton>
|
378 |
-
</div> -->
|
379 |
-
{:else}
|
380 |
-
{#if selected["error_type"] != ""}
|
381 |
<div>
|
382 |
-
<
|
|
|
|
|
383 |
</div>
|
384 |
-
{:else}
|
385 |
-
<div class="grey_text">Select an error type</div>
|
386 |
{/if}
|
387 |
-
|
388 |
-
<div>
|
389 |
-
<IconButton class="material-icons grey_button" size="button" on:click={() => (editErrorType = true)}>
|
390 |
-
create
|
391 |
-
</IconButton>
|
392 |
-
</div>
|
393 |
-
{/if}
|
394 |
</div>
|
395 |
-
|
396 |
-
|
397 |
-
|
398 |
-
|
399 |
-
|
400 |
-
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
|
410 |
-
|
411 |
-
|
412 |
-
|
413 |
-
|
414 |
-
|
415 |
-
|
416 |
-
|
417 |
-
|
418 |
-
|
419 |
-
|
|
|
420 |
</div>
|
421 |
-
{/key}
|
422 |
-
</div>
|
423 |
|
424 |
-
|
425 |
-
|
426 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
427 |
</div>
|
428 |
-
<div class="spacing_vert">
|
429 |
-
<Textfield
|
430 |
-
style="width: 100%;"
|
431 |
-
helperLine$style="width: 100%;"
|
432 |
-
input$rows={8}
|
433 |
-
textarea
|
434 |
-
bind:value={selected["text_entry"]}
|
435 |
-
label="My current hunch is that..."
|
436 |
-
>
|
437 |
-
</Textfield>
|
438 |
-
</div>
|
439 |
-
|
440 |
-
</div>
|
441 |
|
442 |
-
|
443 |
-
|
444 |
-
|
445 |
-
|
446 |
-
|
447 |
-
|
|
|
|
|
448 |
</div>
|
449 |
-
|
450 |
</div>
|
451 |
-
|
452 |
-
|
453 |
-
</
|
454 |
-
</
|
455 |
-
|
456 |
-
{
|
457 |
-
|
458 |
-
|
459 |
-
|
460 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
461 |
|
462 |
-
|
463 |
-
|
464 |
-
|
465 |
-
|
466 |
-
|
467 |
-
|
468 |
-
|
469 |
-
|
470 |
-
|
471 |
-
|
472 |
-
|
473 |
-
|
474 |
-
|
475 |
-
|
476 |
-
|
477 |
-
|
478 |
-
|
479 |
-
|
480 |
-
|
481 |
-
|
482 |
-
|
483 |
-
|
484 |
-
|
485 |
-
|
486 |
-
|
487 |
-
|
488 |
-
|
489 |
-
|
490 |
-
|
491 |
-
|
492 |
-
|
493 |
-
|
494 |
-
|
495 |
-
|
496 |
-
|
497 |
-
{
|
498 |
-
|
499 |
-
{
|
500 |
-
|
501 |
-
|
502 |
-
{:catch error}
|
503 |
-
<p style="color: red">{error.message}</p>
|
504 |
-
{/await}
|
505 |
-
</i></span>
|
506 |
</div>
|
507 |
</div>
|
|
|
508 |
</div>
|
509 |
-
|
510 |
-
<!-- TEMP -->
|
511 |
-
<!-- {#key model}
|
512 |
-
<div>Model: {model}</div>
|
513 |
-
{/key} -->
|
514 |
</div>
|
515 |
|
516 |
<style>
|
517 |
-
/* Drawer */
|
518 |
-
/* .drawer-container {
|
519 |
-
position: relative;
|
520 |
-
display: flex;
|
521 |
-
height: 350px;
|
522 |
-
max-width: 600px;
|
523 |
-
border: 1px solid
|
524 |
-
var(--mdc-theme-text-hint-on-background, rgba(0, 0, 0, 0.1));
|
525 |
-
overflow: hidden;
|
526 |
-
z-index: 0;
|
527 |
-
}
|
528 |
-
|
529 |
-
* :global(.app-content) {
|
530 |
-
flex: auto;
|
531 |
-
overflow: auto;
|
532 |
-
position: relative;
|
533 |
-
flex-grow: 1;
|
534 |
-
}
|
535 |
-
|
536 |
-
.main-content {
|
537 |
-
overflow: auto;
|
538 |
-
padding: 16px;
|
539 |
-
height: 100%;
|
540 |
-
box-sizing: border-box;
|
541 |
-
} */
|
542 |
-
|
543 |
.panel_contents {
|
544 |
padding: 0 20px;
|
545 |
overflow-y: auto;
|
@@ -578,6 +546,13 @@
|
|
578 |
:global(.mdc-button.user_button) {
|
579 |
float: right;
|
580 |
margin-right: 20px;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
581 |
}
|
582 |
|
583 |
.page_header {
|
|
|
1 |
<script lang="ts">
|
2 |
import { onMount } from "svelte";
|
3 |
import ClusterResults from "./ClusterResults.svelte";
|
4 |
+
import SubmitReportDialog from "./SubmitReportDialog.svelte";
|
5 |
|
6 |
import Button, { Label } from "@smui/button";
|
7 |
import Textfield from '@smui/textfield';
|
|
|
|
|
8 |
import { new_evidence } from './stores/new_evidence_store.js';
|
9 |
import { open_evidence } from './stores/open_evidence_store.js';
|
10 |
import { topic_chosen } from './stores/cur_topic_store.js';
|
|
|
22 |
import Checkbox from '@smui/checkbox';
|
23 |
import FormField from '@smui/form-field';
|
24 |
import IconButton from "@smui/icon-button";
|
|
|
25 |
import Radio from '@smui/radio';
|
|
|
26 |
|
27 |
export let model;
|
28 |
+
export let cur_user;
|
|
|
29 |
|
30 |
let all_reports = [];
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
let cur_topic;
|
32 |
topic_chosen.subscribe(value => {
|
33 |
cur_topic = value;
|
34 |
});
|
35 |
+
// Handle submit report dialog
|
36 |
+
let submit_dialog_open = false;
|
37 |
|
38 |
// Handle routing
|
39 |
let searchParams = new URLSearchParams(window.location.search);
|
40 |
let scaffold_method = searchParams.get("scaffold");
|
41 |
+
if (scaffold_method == null) {
|
42 |
+
scaffold_method = "personal"; // Default to personalized model scaffold
|
43 |
+
}
|
44 |
let topic_vis_method = searchParams.get("topic_vis_method");
|
45 |
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
// Handle drawer
|
47 |
let open = false;
|
48 |
let selected = null;
|
|
|
57 |
cur_open_evidence = selected["evidence"];
|
58 |
open_evidence.update((value) => cur_open_evidence);
|
59 |
let isolated_topic = selected["title"].replace(/^(Topic: )/,'');
|
|
|
|
|
60 |
|
61 |
// Close panel
|
62 |
open = false;
|
|
|
137 |
cur_user: cur_user,
|
138 |
reports: JSON.stringify(all_reports),
|
139 |
scaffold_method: scaffold_method,
|
140 |
+
model: model,
|
141 |
};
|
142 |
let params = new URLSearchParams(req_params).toString();
|
143 |
const response = await fetch("./save_reports?" + params);
|
|
|
205 |
// Save current error type
|
206 |
async function updateErrorType() {
|
207 |
// Update error type on main page to be the selected error type
|
|
|
|
|
208 |
editErrorType = false;
|
209 |
}
|
210 |
|
211 |
+
let promise_submit = Promise.resolve(null);
|
212 |
+
function handleSubmitReport() {
|
213 |
+
promise_submit = submitReport();
|
214 |
+
}
|
215 |
+
async function submitReport() {
|
216 |
+
submit_dialog_open = true;
|
217 |
+
return true;
|
218 |
+
}
|
219 |
+
|
220 |
</script>
|
221 |
|
222 |
+
<div>
|
223 |
+
{#await promise_submit}
|
224 |
+
<CircularProgress style="height: 32px; width: 32px;" indeterminate />
|
225 |
+
{:then}
|
226 |
+
<SubmitReportDialog bind:open={submit_dialog_open} cur_user={cur_user} all_reports={all_reports}/>
|
227 |
+
{:catch error}
|
228 |
+
<p style="color: red">{error.message}</p>
|
229 |
+
{/await}
|
230 |
+
<div class="hypothesis_panel">
|
231 |
+
<div class="panel_header">
|
232 |
+
<div class="panel_header_content">
|
233 |
+
<div class="page_header">
|
234 |
+
<img src="/logo.png" style="height: 50px; padding: 0px 20px;" alt="IndieLabel" />
|
235 |
+
<Button class="user_button" color="secondary" style="margin: 12px 10px;" >
|
236 |
+
<Label>User: {cur_user}</Label>
|
237 |
+
</Button>
|
238 |
+
</div>
|
239 |
+
<div class="hypotheses_header">
|
240 |
+
<h5 style="float: left; margin: 0; padding: 5px 20px;">Your Audit Reports</h5>
|
241 |
+
<Button
|
242 |
+
on:click={() => (open = !open)}
|
243 |
+
color="primary"
|
244 |
+
disabled={model == null}
|
245 |
+
style="float: right; padding: 10px; margin-right: 10px;"
|
246 |
+
>
|
247 |
+
{#if open}
|
248 |
+
<Label>Close</Label>
|
249 |
+
{:else}
|
250 |
+
{#key unfinished_count}
|
251 |
+
<Label>Unfinished reports ({unfinished_count})</Label>
|
252 |
+
{/key}
|
253 |
+
{/if}
|
254 |
+
</Button>
|
255 |
+
</div>
|
256 |
</div>
|
257 |
</div>
|
|
|
258 |
|
259 |
+
{#if model == null}
|
260 |
+
<div class="panel_contents">
|
261 |
+
<p>You can start to author audit reports in this panel after you've trained your personalized model in the "Labeling" tab.</p>
|
262 |
+
</div>
|
263 |
+
{:else}
|
264 |
+
<div class="panel_contents">
|
265 |
+
<!-- Drawer -->
|
266 |
+
{#await promise}
|
267 |
+
<div class="app_loading_fullwidth">
|
268 |
+
<LinearProgress indeterminate />
|
269 |
+
</div>
|
270 |
+
{:then reports}
|
271 |
+
{#if reports}
|
272 |
+
<div class="drawer-container">
|
273 |
+
{#key open}
|
274 |
+
<Drawer variant="dismissible" bind:open>
|
275 |
+
<Header>
|
276 |
+
<Title>Your Reports</Title>
|
277 |
+
<Subtitle>Select a report to view.</Subtitle>
|
278 |
+
</Header>
|
279 |
+
<Content>
|
280 |
+
<List twoLine>
|
281 |
+
{#each reports as report}
|
282 |
+
<Item
|
283 |
+
href="javascript:void(0)"
|
284 |
+
on:click={() => setActive(report)}
|
285 |
+
activated={selected === report}
|
286 |
+
>
|
287 |
+
{#if report["complete_status"]}
|
288 |
+
<Graphic class="material-icons" aria-hidden="true">task_alt</Graphic>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
289 |
{:else}
|
290 |
+
<Graphic class="material-icons" aria-hidden="true">radio_button_unchecked</Graphic>
|
291 |
{/if}
|
292 |
+
<Text>
|
293 |
+
<PrimaryText>
|
294 |
+
{report["title"]}
|
295 |
+
</PrimaryText>
|
296 |
+
<SecondaryText>
|
297 |
+
{report["error_type"]}
|
298 |
+
</SecondaryText>
|
299 |
+
</Text>
|
300 |
+
</Item>
|
301 |
+
{/each}
|
302 |
+
</List>
|
303 |
+
</Content>
|
304 |
+
</Drawer>
|
305 |
+
{/key}
|
306 |
+
<AppContent class="app-content">
|
307 |
+
<main class="main-content">
|
308 |
+
{#if selected}
|
309 |
+
<div class="head_6_highlight">
|
310 |
+
Current Report
|
311 |
</div>
|
312 |
+
<div class="panel_contents2">
|
313 |
+
<!-- Title -->
|
314 |
+
<div class="spacing_vert">
|
315 |
+
<div class="edit_button_row">
|
316 |
+
{#if editTitle}
|
317 |
+
<div class="edit_button_row_input">
|
318 |
+
<Textfield
|
319 |
+
bind:value={selected["title"]}
|
320 |
+
label="Your report title"
|
321 |
+
input$rows={4}
|
322 |
+
textarea
|
323 |
+
variant="outlined"
|
324 |
+
style="width: 100%;"
|
325 |
+
helperLine$style="width: 100%;"
|
326 |
+
/>
|
327 |
+
</div>
|
328 |
+
<div>
|
329 |
+
<IconButton class="material-icons grey_button" size="button" on:click={() => (editTitle = false)}>
|
330 |
+
check
|
331 |
+
</IconButton>
|
332 |
+
</div>
|
333 |
+
{:else}
|
334 |
+
{#if selected["title"] != ""}
|
335 |
+
<div class="head_5">
|
336 |
+
{selected["title"]}
|
337 |
+
</div>
|
338 |
+
{:else}
|
339 |
+
<div class="grey_text">Enter a report title</div>
|
340 |
+
{/if}
|
341 |
|
342 |
+
<div>
|
343 |
+
<IconButton class="material-icons grey_button" size="button" on:click={() => (editTitle = true)}>
|
344 |
+
create
|
345 |
+
</IconButton>
|
346 |
+
</div>
|
347 |
+
{/if}
|
348 |
+
</div>
|
349 |
</div>
|
350 |
+
|
351 |
+
<!-- Error type -->
|
352 |
+
<div class="spacing_vert_40">
|
353 |
+
<div class="head_6">
|
354 |
+
<b>Error Type</b>
|
355 |
+
</div>
|
356 |
+
<div class="edit_button_row">
|
357 |
+
{#if editErrorType}
|
358 |
+
<div>
|
359 |
+
{#each error_type_options as e}
|
360 |
+
<div style="display: flex; align-items: center;">
|
361 |
<FormField>
|
362 |
<Radio bind:group={selected["error_type"]} value={e.opt} on:change={updateErrorType} color="secondary" />
|
363 |
<span slot="label">
|
364 |
+
<b>{e.opt}</b> {e.descr}
|
|
|
365 |
</span>
|
366 |
</FormField>
|
367 |
+
</div>
|
368 |
+
{/each}
|
369 |
+
</div>
|
370 |
+
{:else}
|
371 |
+
{#if selected["error_type"] != ""}
|
372 |
+
<div>
|
373 |
+
<p>{selected["error_type"]}</p>
|
|
|
|
|
374 |
</div>
|
375 |
+
{:else}
|
376 |
+
<div class="grey_text">Select an error type</div>
|
377 |
+
{/if}
|
378 |
+
|
|
|
|
|
|
|
|
|
|
|
379 |
<div>
|
380 |
+
<IconButton class="material-icons grey_button" size="button" on:click={() => (editErrorType = true)}>
|
381 |
+
create
|
382 |
+
</IconButton>
|
383 |
</div>
|
|
|
|
|
384 |
{/if}
|
385 |
+
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
386 |
</div>
|
387 |
+
|
388 |
+
<!-- Evidence -->
|
389 |
+
<div class="spacing_vert_40">
|
390 |
+
<div class="head_6">
|
391 |
+
<b>Evidence</b>
|
392 |
+
</div>
|
393 |
+
{#key cur_open_evidence}
|
394 |
+
<div>
|
395 |
+
{#if cur_open_evidence.length > 0}
|
396 |
+
<ClusterResults
|
397 |
+
cluster={cur_topic}
|
398 |
+
model={model}
|
399 |
+
data={{"cluster_comments": cur_open_evidence}}
|
400 |
+
show_vis={false}
|
401 |
+
show_checkboxes={false}
|
402 |
+
table_width_pct={100}
|
403 |
+
rowsPerPage={25}
|
404 |
+
table_id={"panel"}
|
405 |
+
/>
|
406 |
+
{:else}
|
407 |
+
<p class="grey_text">
|
408 |
+
Add examples from the main panel to see them here!
|
409 |
+
</p>
|
410 |
+
{/if}
|
411 |
+
</div>
|
412 |
+
{/key}
|
413 |
</div>
|
|
|
|
|
414 |
|
415 |
+
<div class="spacing_vert_60">
|
416 |
+
<div class="head_6">
|
417 |
+
<b>Summary/Suggestions</b>
|
418 |
+
</div>
|
419 |
+
<div class="spacing_vert">
|
420 |
+
<Textfield
|
421 |
+
style="width: 100%;"
|
422 |
+
helperLine$style="width: 100%;"
|
423 |
+
input$rows={8}
|
424 |
+
textarea
|
425 |
+
bind:value={selected["text_entry"]}
|
426 |
+
label="My current hunch is that..."
|
427 |
+
>
|
428 |
+
</Textfield>
|
429 |
+
</div>
|
430 |
+
|
431 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
432 |
|
433 |
+
<div class="spacing_vert_40">
|
434 |
+
<div class="head_6">
|
435 |
+
<b>Mark report as complete?</b>
|
436 |
+
<FormField>
|
437 |
+
<Checkbox checked={selected["complete_status"]} on:change={handleMarkComplete} />
|
438 |
+
</FormField>
|
439 |
+
</div>
|
440 |
+
|
441 |
</div>
|
|
|
442 |
</div>
|
443 |
+
{/if}
|
444 |
+
</main>
|
445 |
+
</AppContent>
|
446 |
+
</div>
|
447 |
+
{/if}
|
448 |
+
{:catch error}
|
449 |
+
<p style="color: red">{error.message}</p>
|
450 |
+
{/await}
|
451 |
+
</div>
|
452 |
+
|
453 |
+
<div class="panel_footer">
|
454 |
+
<div class="panel_footer_contents">
|
455 |
+
<Button
|
456 |
+
on:click={handleNewReport}
|
457 |
+
variant="outlined"
|
458 |
+
color="secondary"
|
459 |
+
style=""
|
460 |
+
>
|
461 |
+
<Label>New</Label>
|
462 |
+
</Button>
|
463 |
|
464 |
+
<!-- <Button
|
465 |
+
on:click={handleDeleteReport}
|
466 |
+
variant="outlined"
|
467 |
+
color="secondary"
|
468 |
+
style=""
|
469 |
+
>
|
470 |
+
<Label>Delete</Label>
|
471 |
+
</Button> -->
|
472 |
+
|
473 |
+
<Button
|
474 |
+
on:click={handleSaveReport}
|
475 |
+
variant="outlined"
|
476 |
+
color="secondary"
|
477 |
+
>
|
478 |
+
<Label>Save</Label>
|
479 |
+
</Button>
|
480 |
+
|
481 |
+
<Button
|
482 |
+
on:click={handleSubmitReport}
|
483 |
+
variant="outlined"
|
484 |
+
color="secondary"
|
485 |
+
>
|
486 |
+
<Label>Send Reports</Label>
|
487 |
+
</Button>
|
488 |
+
|
489 |
+
<div>
|
490 |
+
<span style="color: grey"><i>Last saved:
|
491 |
+
{#await promise_save}
|
492 |
+
<CircularProgress style="height: 32px; width: 32px;" indeterminate />
|
493 |
+
{:then result}
|
494 |
+
{#if result}
|
495 |
+
{new Date().toLocaleTimeString()}
|
496 |
+
{:else}
|
497 |
+
—
|
498 |
+
{/if}
|
499 |
+
{:catch error}
|
500 |
+
<p style="color: red">{error.message}</p>
|
501 |
+
{/await}
|
502 |
+
</i></span>
|
503 |
+
</div>
|
|
|
|
|
|
|
|
|
504 |
</div>
|
505 |
</div>
|
506 |
+
{/if}
|
507 |
</div>
|
|
|
|
|
|
|
|
|
|
|
508 |
</div>
|
509 |
|
510 |
<style>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
511 |
.panel_contents {
|
512 |
padding: 0 20px;
|
513 |
overflow-y: auto;
|
|
|
546 |
:global(.mdc-button.user_button) {
|
547 |
float: right;
|
548 |
margin-right: 20px;
|
549 |
+
max-width: 200px;
|
550 |
+
}
|
551 |
+
|
552 |
+
:global(.mdc-button.user_button span) {
|
553 |
+
text-overflow: ellipsis;
|
554 |
+
white-space: nowrap;
|
555 |
+
overflow: hidden;
|
556 |
}
|
557 |
|
558 |
.page_header {
|
indie_label_svelte/src/IterativeClustering.svelte
DELETED
@@ -1,164 +0,0 @@
|
|
1 |
-
<script>
|
2 |
-
import Section from "./Section.svelte";
|
3 |
-
import ClusterResults from "./ClusterResults.svelte";
|
4 |
-
import Button, { Label } from "@smui/button";
|
5 |
-
import Textfield from "@smui/textfield";
|
6 |
-
import LayoutGrid, { Cell } from "@smui/layout-grid";
|
7 |
-
import LinearProgress from "@smui/linear-progress";
|
8 |
-
import Chip, { Set, Text } from '@smui/chips';
|
9 |
-
|
10 |
-
export let clusters;
|
11 |
-
export let personalized_model;
|
12 |
-
export let evidence;
|
13 |
-
export let width_pct = 80;
|
14 |
-
|
15 |
-
let topic_df_ids = [];
|
16 |
-
let promise_iter_cluster = Promise.resolve(null);
|
17 |
-
let keyword = null;
|
18 |
-
let n_neighbors = null;
|
19 |
-
let cur_iter_cluster = null;
|
20 |
-
let history = [];
|
21 |
-
|
22 |
-
async function getIterCluster(search_type) {
|
23 |
-
let req_params = {
|
24 |
-
cluster: cur_iter_cluster,
|
25 |
-
topic_df_ids: topic_df_ids,
|
26 |
-
n_examples: 500, // TEMP
|
27 |
-
pers_model: personalized_model,
|
28 |
-
example_sort: "descending", // TEMP
|
29 |
-
comparison_group: "status_quo", // TEMP
|
30 |
-
search_type: search_type,
|
31 |
-
keyword: keyword,
|
32 |
-
n_neighbors: n_neighbors,
|
33 |
-
};
|
34 |
-
console.log("topic_df_ids", topic_df_ids);
|
35 |
-
let params = new URLSearchParams(req_params).toString();
|
36 |
-
const response = await fetch("./get_cluster_results?" + params);
|
37 |
-
const text = await response.text();
|
38 |
-
const data = JSON.parse(text);
|
39 |
-
// if (data["cluster_comments"] == null) {
|
40 |
-
// return false
|
41 |
-
// }
|
42 |
-
topic_df_ids = data["topic_df_ids"];
|
43 |
-
return data;
|
44 |
-
}
|
45 |
-
|
46 |
-
function findCluster() {
|
47 |
-
promise_iter_cluster = getIterCluster("cluster");
|
48 |
-
history = history.concat("bulk-add cluster: " + cur_iter_cluster);
|
49 |
-
}
|
50 |
-
|
51 |
-
function findNeighbors() {
|
52 |
-
promise_iter_cluster = getIterCluster("neighbors");
|
53 |
-
history = history.concat("find " + n_neighbors + " neighbors");
|
54 |
-
}
|
55 |
-
|
56 |
-
function findKeywords() {
|
57 |
-
promise_iter_cluster = getIterCluster("keyword");
|
58 |
-
history = history.concat("keyword search: " + keyword);
|
59 |
-
}
|
60 |
-
</script>
|
61 |
-
|
62 |
-
<div>
|
63 |
-
<div>
|
64 |
-
<!-- <h6>Hunch {ind} examples</h6> -->
|
65 |
-
<div>
|
66 |
-
<h6>Search Settings</h6>
|
67 |
-
<!-- Start with cluster -->
|
68 |
-
<!-- <div class="">
|
69 |
-
<Section
|
70 |
-
section_id="iter_cluster"
|
71 |
-
section_title="Bulk-add cluster"
|
72 |
-
section_opts={clusters}
|
73 |
-
bind:value={cur_iter_cluster}
|
74 |
-
width_pct={100}
|
75 |
-
/>
|
76 |
-
<Button
|
77 |
-
on:click={findCluster}
|
78 |
-
variant="outlined"
|
79 |
-
class="button_float_right"
|
80 |
-
disabled={cur_iter_cluster == null}
|
81 |
-
>
|
82 |
-
<Label>Search</Label>
|
83 |
-
</Button>
|
84 |
-
</div> -->
|
85 |
-
|
86 |
-
<!-- Manual keyword -->
|
87 |
-
<div class="spacing_vert">
|
88 |
-
<Textfield
|
89 |
-
bind:value={keyword}
|
90 |
-
label="Keyword search"
|
91 |
-
variant="outlined"
|
92 |
-
style="width: {width_pct}%"
|
93 |
-
/>
|
94 |
-
<Button
|
95 |
-
on:click={findKeywords}
|
96 |
-
variant="outlined"
|
97 |
-
class="button_float_right spacing_vert"
|
98 |
-
disabled={keyword == null}
|
99 |
-
>
|
100 |
-
<Label>Search</Label>
|
101 |
-
</Button>
|
102 |
-
</div>
|
103 |
-
|
104 |
-
<!-- Find neighbors of current set -->
|
105 |
-
<div class="spacing_vert">
|
106 |
-
<Textfield
|
107 |
-
bind:value={n_neighbors}
|
108 |
-
label="Number of neighbors to retrieve"
|
109 |
-
type="number"
|
110 |
-
min="1"
|
111 |
-
max="50"
|
112 |
-
variant="outlined"
|
113 |
-
style="width: {width_pct}%"
|
114 |
-
/>
|
115 |
-
<Button
|
116 |
-
on:click={findNeighbors}
|
117 |
-
variant="outlined"
|
118 |
-
class="button_float_right spacing_vert"
|
119 |
-
disabled={n_neighbors == null}
|
120 |
-
>
|
121 |
-
<Label>Search</Label>
|
122 |
-
</Button>
|
123 |
-
</div>
|
124 |
-
</div>
|
125 |
-
</div>
|
126 |
-
|
127 |
-
{#await promise_iter_cluster}
|
128 |
-
<div class="app_loading" style="width: {width_pct}%">
|
129 |
-
<LinearProgress indeterminate />
|
130 |
-
</div>
|
131 |
-
{:then iter_cluster_results}
|
132 |
-
{#if iter_cluster_results}
|
133 |
-
{#if history.length > 0}
|
134 |
-
<div class="bold" style="padding-top:40px;">Search History</div>
|
135 |
-
<Set chips={history} let:chip choice>
|
136 |
-
<Chip {chip}>
|
137 |
-
<Text>{chip}</Text>
|
138 |
-
</Chip>
|
139 |
-
</Set>
|
140 |
-
{/if}
|
141 |
-
{#if iter_cluster_results.cluster_comments != null}
|
142 |
-
<ClusterResults
|
143 |
-
cluster={""}
|
144 |
-
clusters={clusters}
|
145 |
-
model={personalized_model}
|
146 |
-
data={iter_cluster_results}
|
147 |
-
show_vis={false}
|
148 |
-
table_width_pct={80}
|
149 |
-
bind:evidence={evidence}
|
150 |
-
on:change
|
151 |
-
/>
|
152 |
-
{:else}
|
153 |
-
<div class="bold" style="padding-top:40px;">
|
154 |
-
No results found
|
155 |
-
</div>
|
156 |
-
{/if}
|
157 |
-
{/if}
|
158 |
-
{:catch error}
|
159 |
-
<p style="color: red">{error.message}</p>
|
160 |
-
{/await}
|
161 |
-
</div>
|
162 |
-
|
163 |
-
<style>
|
164 |
-
</style>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
indie_label_svelte/src/KeywordSearch.svelte
CHANGED
@@ -4,12 +4,11 @@
|
|
4 |
|
5 |
import Button, { Label } from "@smui/button";
|
6 |
import Textfield from "@smui/textfield";
|
7 |
-
import LinearProgress from "@smui/linear-progress";
|
8 |
-
import Chip, { Set, Text } from '@smui/chips';
|
9 |
-
|
10 |
|
11 |
export let clusters;
|
12 |
export let personalized_model;
|
|
|
13 |
export let evidence;
|
14 |
export let width_pct = 80;
|
15 |
export let use_model = true;
|
@@ -17,7 +16,6 @@
|
|
17 |
let topic_df_ids = [];
|
18 |
let promise_iter_cluster = Promise.resolve(null);
|
19 |
let keyword = null;
|
20 |
-
let n_neighbors = null;
|
21 |
let cur_iter_cluster = null;
|
22 |
let history = [];
|
23 |
|
@@ -30,23 +28,18 @@
|
|
30 |
let req_params = {
|
31 |
cluster: cur_iter_cluster,
|
32 |
topic_df_ids: topic_df_ids,
|
33 |
-
|
34 |
pers_model: personalized_model,
|
35 |
example_sort: "descending", // TEMP
|
36 |
comparison_group: "status_quo", // TEMP
|
37 |
search_type: search_type,
|
38 |
keyword: keyword,
|
39 |
-
n_neighbors: n_neighbors,
|
40 |
error_type: cur_error_type,
|
41 |
};
|
42 |
-
console.log("topic_df_ids", topic_df_ids);
|
43 |
let params = new URLSearchParams(req_params).toString();
|
44 |
const response = await fetch("./get_cluster_results?" + params);
|
45 |
const text = await response.text();
|
46 |
const data = JSON.parse(text);
|
47 |
-
// if (data["cluster_comments"] == null) {
|
48 |
-
// return false
|
49 |
-
// }
|
50 |
topic_df_ids = data["topic_df_ids"];
|
51 |
return data;
|
52 |
}
|
|
|
4 |
|
5 |
import Button, { Label } from "@smui/button";
|
6 |
import Textfield from "@smui/textfield";
|
7 |
+
import LinearProgress from "@smui/linear-progress";
|
|
|
|
|
8 |
|
9 |
export let clusters;
|
10 |
export let personalized_model;
|
11 |
+
export let cur_user;
|
12 |
export let evidence;
|
13 |
export let width_pct = 80;
|
14 |
export let use_model = true;
|
|
|
16 |
let topic_df_ids = [];
|
17 |
let promise_iter_cluster = Promise.resolve(null);
|
18 |
let keyword = null;
|
|
|
19 |
let cur_iter_cluster = null;
|
20 |
let history = [];
|
21 |
|
|
|
28 |
let req_params = {
|
29 |
cluster: cur_iter_cluster,
|
30 |
topic_df_ids: topic_df_ids,
|
31 |
+
cur_user: cur_user,
|
32 |
pers_model: personalized_model,
|
33 |
example_sort: "descending", // TEMP
|
34 |
comparison_group: "status_quo", // TEMP
|
35 |
search_type: search_type,
|
36 |
keyword: keyword,
|
|
|
37 |
error_type: cur_error_type,
|
38 |
};
|
|
|
39 |
let params = new URLSearchParams(req_params).toString();
|
40 |
const response = await fetch("./get_cluster_results?" + params);
|
41 |
const text = await response.text();
|
42 |
const data = JSON.parse(text);
|
|
|
|
|
|
|
43 |
topic_df_ids = data["topic_df_ids"];
|
44 |
return data;
|
45 |
}
|
indie_label_svelte/src/Labeling.svelte
CHANGED
@@ -8,7 +8,8 @@
|
|
8 |
import Button, { Label } from "@smui/button";
|
9 |
import LinearProgress from '@smui/linear-progress';
|
10 |
import Svelecte from '../node_modules/svelecte/src/Svelecte.svelte';
|
11 |
-
|
|
|
12 |
|
13 |
let model_name = "";
|
14 |
let personalized_models = [];
|
@@ -16,24 +17,13 @@
|
|
16 |
let label_modes = [
|
17 |
"Create a new model",
|
18 |
"Edit an existing model",
|
19 |
-
"Tune your model for a topic area",
|
20 |
-
"Set up a group-based model",
|
21 |
];
|
22 |
|
23 |
let clusters_for_tuning = [];
|
24 |
let topic;
|
25 |
|
26 |
-
// Get current user
|
27 |
-
let cur_user;
|
28 |
-
user.subscribe(value => {
|
29 |
-
if (value != cur_user) {
|
30 |
-
cur_user = value;
|
31 |
-
personalized_models = [];
|
32 |
-
getLabeling();
|
33 |
-
}
|
34 |
-
cur_user = value;
|
35 |
-
});
|
36 |
-
|
37 |
// Handle routing
|
38 |
let label_mode = label_modes[0];
|
39 |
let searchParams = new URLSearchParams(window.location.search);
|
@@ -43,8 +33,10 @@
|
|
43 |
} else if (req_label_mode == 1) {
|
44 |
label_mode = label_modes[1];
|
45 |
} else if (req_label_mode == 2) {
|
|
|
46 |
label_mode = label_modes[2];
|
47 |
} else if (req_label_mode == 3) {
|
|
|
48 |
label_mode = label_modes[3];
|
49 |
}
|
50 |
|
@@ -101,7 +93,6 @@
|
|
101 |
const response = await fetch("./get_group_model?" + params);
|
102 |
const text = await response.text();
|
103 |
const data = JSON.parse(text);
|
104 |
-
console.log("getGroupModel", data);
|
105 |
return data
|
106 |
}
|
107 |
|
@@ -172,7 +163,7 @@
|
|
172 |
</li>
|
173 |
</ul>
|
174 |
|
175 |
-
<CommentTable mode={"train"} model_name={model_name}/>
|
176 |
</div>
|
177 |
{:else if label_mode == label_modes[1]}
|
178 |
<!-- EXISTING MODEL -->
|
@@ -202,7 +193,7 @@
|
|
202 |
</li>
|
203 |
</ul>
|
204 |
{#key existing_model_name}
|
205 |
-
<CommentTable mode={"view"} model_name={existing_model_name}/>
|
206 |
{/key}
|
207 |
</div>
|
208 |
{:else if label_mode == label_modes[2]}
|
@@ -239,7 +230,7 @@
|
|
239 |
</li>
|
240 |
</ul>
|
241 |
{#key topic}
|
242 |
-
<TopicTraining topic={topic} model_name={model_name} />
|
243 |
{/key}
|
244 |
</div>
|
245 |
|
|
|
8 |
import Button, { Label } from "@smui/button";
|
9 |
import LinearProgress from '@smui/linear-progress';
|
10 |
import Svelecte from '../node_modules/svelecte/src/Svelecte.svelte';
|
11 |
+
|
12 |
+
export let cur_user;
|
13 |
|
14 |
let model_name = "";
|
15 |
let personalized_models = [];
|
|
|
17 |
let label_modes = [
|
18 |
"Create a new model",
|
19 |
"Edit an existing model",
|
20 |
+
// "Tune your model for a topic area",
|
21 |
+
// "Set up a group-based model",
|
22 |
];
|
23 |
|
24 |
let clusters_for_tuning = [];
|
25 |
let topic;
|
26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
// Handle routing
|
28 |
let label_mode = label_modes[0];
|
29 |
let searchParams = new URLSearchParams(window.location.search);
|
|
|
33 |
} else if (req_label_mode == 1) {
|
34 |
label_mode = label_modes[1];
|
35 |
} else if (req_label_mode == 2) {
|
36 |
+
// Unused; previous topic-based mode
|
37 |
label_mode = label_modes[2];
|
38 |
} else if (req_label_mode == 3) {
|
39 |
+
// Unused; previous group-based mode
|
40 |
label_mode = label_modes[3];
|
41 |
}
|
42 |
|
|
|
93 |
const response = await fetch("./get_group_model?" + params);
|
94 |
const text = await response.text();
|
95 |
const data = JSON.parse(text);
|
|
|
96 |
return data
|
97 |
}
|
98 |
|
|
|
163 |
</li>
|
164 |
</ul>
|
165 |
|
166 |
+
<CommentTable mode={"train"} model_name={model_name} cur_user={cur_user}/>
|
167 |
</div>
|
168 |
{:else if label_mode == label_modes[1]}
|
169 |
<!-- EXISTING MODEL -->
|
|
|
193 |
</li>
|
194 |
</ul>
|
195 |
{#key existing_model_name}
|
196 |
+
<CommentTable mode={"view"} model_name={existing_model_name} cur_user={cur_user}/>
|
197 |
{/key}
|
198 |
</div>
|
199 |
{:else if label_mode == label_modes[2]}
|
|
|
230 |
</li>
|
231 |
</ul>
|
232 |
{#key topic}
|
233 |
+
<TopicTraining topic={topic} model_name={model_name} cur_user={cur_user}/>
|
234 |
{/key}
|
235 |
</div>
|
236 |
|
indie_label_svelte/src/MainPanel.svelte
CHANGED
@@ -1,24 +1,20 @@
|
|
1 |
<script lang="ts">
|
2 |
import Labeling from "./Labeling.svelte";
|
3 |
import Auditing from "./Auditing.svelte";
|
4 |
-
import AppOld from "./AppOld.svelte";
|
5 |
|
6 |
import Tab, { Label } from "@smui/tab";
|
7 |
import TabBar from "@smui/tab-bar";
|
8 |
|
9 |
export let model;
|
10 |
-
// export let topic;
|
11 |
export let error_type;
|
12 |
-
|
13 |
-
let app_versions = ["old", "new"];
|
14 |
-
let app_version = "new";
|
15 |
|
16 |
// Handle routing
|
17 |
-
let active = "
|
18 |
let searchParams = new URLSearchParams(window.location.search);
|
19 |
let tab = searchParams.get("tab");
|
20 |
-
if (tab == "
|
21 |
-
active = "
|
22 |
}
|
23 |
|
24 |
</script>
|
@@ -37,37 +33,16 @@
|
|
37 |
</div>
|
38 |
|
39 |
<div class="panel_contents">
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
section_title="What app version do you want to use?"
|
45 |
-
section_opts={app_versions}
|
46 |
-
width_pct={40}
|
47 |
-
bind:value={app_version}
|
48 |
-
/>
|
49 |
-
</div> -->
|
50 |
-
|
51 |
-
{#if app_version == app_versions[0]}
|
52 |
-
<!-- OLD VERSION -->
|
53 |
-
<AppOld />
|
54 |
-
{:else if app_version == app_versions[1]}
|
55 |
-
<!-- NEW VERSION -->
|
56 |
-
<div>
|
57 |
-
<div id="labeling" hidden={active == "auditing"} >
|
58 |
-
<Labeling/>
|
59 |
-
</div>
|
60 |
|
61 |
-
|
62 |
-
|
63 |
-
</div>
|
64 |
</div>
|
65 |
-
|
66 |
|
67 |
-
<!-- TEMP -->
|
68 |
-
<!-- {#key model}
|
69 |
-
<div>Model: {model}</div>
|
70 |
-
{/key} -->
|
71 |
</div>
|
72 |
</div>
|
73 |
|
|
|
1 |
<script lang="ts">
|
2 |
import Labeling from "./Labeling.svelte";
|
3 |
import Auditing from "./Auditing.svelte";
|
|
|
4 |
|
5 |
import Tab, { Label } from "@smui/tab";
|
6 |
import TabBar from "@smui/tab-bar";
|
7 |
|
8 |
export let model;
|
|
|
9 |
export let error_type;
|
10 |
+
export let cur_user;
|
|
|
|
|
11 |
|
12 |
// Handle routing
|
13 |
+
let active = "labeling";
|
14 |
let searchParams = new URLSearchParams(window.location.search);
|
15 |
let tab = searchParams.get("tab");
|
16 |
+
if (tab == "auditing") {
|
17 |
+
active = "auditing";
|
18 |
}
|
19 |
|
20 |
</script>
|
|
|
33 |
</div>
|
34 |
|
35 |
<div class="panel_contents">
|
36 |
+
<div>
|
37 |
+
<div id="labeling" hidden={active == "auditing"} >
|
38 |
+
<Labeling cur_user={cur_user}/>
|
39 |
+
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
+
<div id="auditing" hidden={active == "labeling"} >
|
42 |
+
<Auditing bind:personalized_model={model} bind:cur_error_type={error_type} cur_user={cur_user} on:change/>
|
|
|
43 |
</div>
|
44 |
+
</div>
|
45 |
|
|
|
|
|
|
|
|
|
46 |
</div>
|
47 |
</div>
|
48 |
|
indie_label_svelte/src/ModelPerf.svelte
CHANGED
@@ -1,9 +1,7 @@
|
|
1 |
<script lang="ts">
|
2 |
import { VegaLite } from "svelte-vega";
|
3 |
import type { View } from "svelte-vega";
|
4 |
-
|
5 |
import LayoutGrid, { Cell } from "@smui/layout-grid";
|
6 |
-
import Card, { Content } from '@smui/card';
|
7 |
|
8 |
export let data;
|
9 |
|
@@ -13,64 +11,25 @@
|
|
13 |
];
|
14 |
let perf_plot_view: View;
|
15 |
|
16 |
-
// let perf_plot2_spec = data["perf_plot2_json"];
|
17 |
-
// let perf_plot2_data = perf_plot2_spec["datasets"][perf_plot2_spec["data"]["name"]];
|
18 |
-
// let perf_plot2_view: View;
|
19 |
</script>
|
20 |
|
21 |
<div>
|
22 |
<h6>Your Model Performance</h6>
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
<ul>
|
38 |
-
<li>{@html data["mae_status"]}</li>
|
39 |
-
<!-- <li>
|
40 |
-
This is <b>better</b> (lower) than the average MAE for other users, so your model appears to <b>better capture</b> your views than the typical user model.
|
41 |
-
</li> -->
|
42 |
-
</ul>
|
43 |
-
</li>
|
44 |
-
</ul>
|
45 |
-
</Card>
|
46 |
-
</div>
|
47 |
-
</Cell>
|
48 |
-
</LayoutGrid>
|
49 |
<div>
|
50 |
-
<!-- Overall -->
|
51 |
-
<!-- <table>
|
52 |
-
<tbody>
|
53 |
-
<tr>
|
54 |
-
<td>
|
55 |
-
<span class="bold">Mean Absolute Error (MAE)</span><br>
|
56 |
-
|
57 |
-
</td>
|
58 |
-
<td>
|
59 |
-
<span class="bold-large">{data["mae"]}</span>
|
60 |
-
</td>
|
61 |
-
</tr>
|
62 |
-
<tr>
|
63 |
-
<td>
|
64 |
-
<span class="bold">Average rating difference</span><br>
|
65 |
-
This metric indicates the average difference between your model's rating and your actual rating on a held-out set of comments.
|
66 |
-
</td>
|
67 |
-
<td>
|
68 |
-
<span class="bold-large">{data["avg_diff"]}</span>
|
69 |
-
</td>
|
70 |
-
</tr>
|
71 |
-
</tbody>
|
72 |
-
</table> -->
|
73 |
-
|
74 |
<!-- Performance visualization -->
|
75 |
<div>
|
76 |
<VegaLite {perf_plot_data} spec={perf_plot_spec} bind:view={perf_plot_view}/>
|
|
|
1 |
<script lang="ts">
|
2 |
import { VegaLite } from "svelte-vega";
|
3 |
import type { View } from "svelte-vega";
|
|
|
4 |
import LayoutGrid, { Cell } from "@smui/layout-grid";
|
|
|
5 |
|
6 |
export let data;
|
7 |
|
|
|
11 |
];
|
12 |
let perf_plot_view: View;
|
13 |
|
|
|
|
|
|
|
14 |
</script>
|
15 |
|
16 |
<div>
|
17 |
<h6>Your Model Performance</h6>
|
18 |
+
<ul>
|
19 |
+
<li>
|
20 |
+
The <b>Mean Absolute Error (MAE)</b> metric indicates the average absolute difference <br>between your model's rating and your actual rating on a held-out set of comments.
|
21 |
+
</li>
|
22 |
+
<li>
|
23 |
+
You want your model to have a <b>lower</b> MAE (indicating <b>less error</b>).
|
24 |
+
</li>
|
25 |
+
<li>
|
26 |
+
<b>Your current MAE: {data["mae"]}</b>
|
27 |
+
<ul>
|
28 |
+
<li>{@html data["mae_status"]}</li>
|
29 |
+
</ul>
|
30 |
+
</li>
|
31 |
+
</ul>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
<div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
<!-- Performance visualization -->
|
34 |
<div>
|
35 |
<VegaLite {perf_plot_data} spec={perf_plot_spec} bind:view={perf_plot_view}/>
|
indie_label_svelte/src/OverallResults.svelte
CHANGED
@@ -19,18 +19,6 @@
|
|
19 |
let topic_overview_spec = topic_overview_json;
|
20 |
let topic_overview_view: View;
|
21 |
|
22 |
-
// // Overall Histogram
|
23 |
-
// let overall_hist_json = data["overall_perf"]["overall_hist_json"];
|
24 |
-
// let overall_hist_data = overall_hist_json["datasets"][overall_hist_json["data"]["name"]];
|
25 |
-
// let overall_hist_spec = overall_hist_json;
|
26 |
-
// let overall_hist_view: View;
|
27 |
-
|
28 |
-
// // Class-conditional Histogram
|
29 |
-
// let class_cond_plot_json = data["overall_perf"]["class_cond_plot_json"];
|
30 |
-
// let class_cond_plot_data = class_cond_plot_json["datasets"][class_cond_plot_json["data"]["name"]];
|
31 |
-
// let class_cond_plot_spec = class_cond_plot_json;
|
32 |
-
// let class_cond_plot_view: View;
|
33 |
-
|
34 |
</script>
|
35 |
|
36 |
<div>
|
@@ -84,73 +72,6 @@
|
|
84 |
</div>
|
85 |
</div>
|
86 |
|
87 |
-
<!-- Old visualizations -->
|
88 |
-
<!-- <div style="margin-top: 500px">
|
89 |
-
<h6>Overall Performance</h6>
|
90 |
-
<div class="row">
|
91 |
-
<div class="col s12">
|
92 |
-
<div id="overall_perf">
|
93 |
-
<table>
|
94 |
-
<tbody>
|
95 |
-
<tr class="custom-blue">
|
96 |
-
<td class="bold"
|
97 |
-
>System {data[
|
98 |
-
"overall_perf"
|
99 |
-
]["metric"]} with YOUR labels</td
|
100 |
-
>
|
101 |
-
<td>
|
102 |
-
<span class="bold-large"
|
103 |
-
>{data[
|
104 |
-
"overall_perf"
|
105 |
-
]["user_metric"]}</span
|
106 |
-
>
|
107 |
-
(Percentile: {data[
|
108 |
-
"overall_perf"
|
109 |
-
]["user_percentile"]})
|
110 |
-
</td>
|
111 |
-
</tr>
|
112 |
-
<tr>
|
113 |
-
<td class="bold"
|
114 |
-
>System {data[
|
115 |
-
"overall_perf"
|
116 |
-
]["metric"]} with OTHER USERS' labels</td
|
117 |
-
>
|
118 |
-
<td>
|
119 |
-
<span class="bold-large"
|
120 |
-
>{data[
|
121 |
-
"overall_perf"
|
122 |
-
]["other_metric"]}</span
|
123 |
-
>
|
124 |
-
(95% CI: [{data[
|
125 |
-
"overall_perf"
|
126 |
-
]["other_ci_low"]}, {data[
|
127 |
-
"overall_perf"
|
128 |
-
]["other_ci_high"]}])
|
129 |
-
</td>
|
130 |
-
</tr>
|
131 |
-
</tbody>
|
132 |
-
</table>
|
133 |
-
</div>
|
134 |
-
</div>
|
135 |
-
</div>
|
136 |
-
<div class="row">
|
137 |
-
<div class="col s8">
|
138 |
-
<VegaLite {overall_hist_data} spec={overall_hist_spec} bind:view={overall_hist_view}/>
|
139 |
-
</div>
|
140 |
-
</div>
|
141 |
-
|
142 |
-
<h6>Performance Breakdown</h6>
|
143 |
-
<div class="row">
|
144 |
-
<div class="col s12">
|
145 |
-
<div class="row">
|
146 |
-
<div class="col s12">
|
147 |
-
<VegaLite {class_cond_plot_data} spec={class_cond_plot_spec} bind:view={class_cond_plot_view} />
|
148 |
-
</div>
|
149 |
-
</div>
|
150 |
-
</div>
|
151 |
-
</div>
|
152 |
-
</div> -->
|
153 |
-
|
154 |
</div>
|
155 |
<style>
|
156 |
</style>
|
|
|
19 |
let topic_overview_spec = topic_overview_json;
|
20 |
let topic_overview_view: View;
|
21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
</script>
|
23 |
|
24 |
<div>
|
|
|
72 |
</div>
|
73 |
</div>
|
74 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
</div>
|
76 |
<style>
|
77 |
</style>
|
indie_label_svelte/src/Results.svelte
DELETED
@@ -1,206 +0,0 @@
|
|
1 |
-
<script lang="ts">
|
2 |
-
import { onMount } from "svelte";
|
3 |
-
import ClusterResults from "./ClusterResults.svelte";
|
4 |
-
|
5 |
-
import Button, { Label } from "@smui/button";
|
6 |
-
import LinearProgress from "@smui/linear-progress";
|
7 |
-
import Checkbox from '@smui/checkbox';
|
8 |
-
import DataTable, {
|
9 |
-
Head,
|
10 |
-
Body,
|
11 |
-
Row,
|
12 |
-
Cell,
|
13 |
-
Label,
|
14 |
-
SortValue,
|
15 |
-
} from "@smui/data-table";
|
16 |
-
import FormField from "@smui/form-field";
|
17 |
-
|
18 |
-
let cur_examples = [];
|
19 |
-
let promise = Promise.resolve(null);
|
20 |
-
|
21 |
-
let scaffold_methods = ["personal", "personal_group", "prompts"];
|
22 |
-
|
23 |
-
let all_users = [];
|
24 |
-
async function getUsers() {
|
25 |
-
const response = await fetch("./get_users");
|
26 |
-
const text = await response.text();
|
27 |
-
const data = JSON.parse(text);
|
28 |
-
all_users = data["users"];
|
29 |
-
promise = getResults();
|
30 |
-
}
|
31 |
-
|
32 |
-
onMount(async () => {
|
33 |
-
getUsers()
|
34 |
-
});
|
35 |
-
|
36 |
-
async function getResults() {
|
37 |
-
let req_params = {
|
38 |
-
users: all_users
|
39 |
-
};
|
40 |
-
let params = new URLSearchParams(req_params).toString();
|
41 |
-
const response = await fetch("./get_results?" + params);
|
42 |
-
const text = await response.text();
|
43 |
-
const data = JSON.parse(text);
|
44 |
-
|
45 |
-
let results = data["results"];
|
46 |
-
return results;
|
47 |
-
}
|
48 |
-
|
49 |
-
function get_complete_ratio(reports) {
|
50 |
-
let total = reports.length;
|
51 |
-
let complete = reports.filter(item => item.complete_status).length;
|
52 |
-
return "" + complete + "/" + total + " complete";
|
53 |
-
}
|
54 |
-
|
55 |
-
function get_complete_count(reports) {
|
56 |
-
return reports.filter(item => item.complete_status).length;
|
57 |
-
}
|
58 |
-
|
59 |
-
function get_summary(reports) {
|
60 |
-
let summary = "";
|
61 |
-
let total_audits = 0
|
62 |
-
for (const scaffold_method of scaffold_methods) {
|
63 |
-
if (reports[scaffold_method]) {
|
64 |
-
let cur_reports = reports[scaffold_method];
|
65 |
-
let cur_ratio = get_complete_ratio(cur_reports);
|
66 |
-
let cur_result = "<li><b>" + scaffold_method + "</b>: " + cur_ratio + "</li>";
|
67 |
-
summary += cur_result;
|
68 |
-
let cur_complete = get_complete_count(cur_reports);
|
69 |
-
total_audits += cur_complete;
|
70 |
-
}
|
71 |
-
}
|
72 |
-
|
73 |
-
let top_summary = "<li><b>Total audits</b>: " + total_audits + "</li>";
|
74 |
-
summary = "<ul>" + top_summary + summary + "</ul>";
|
75 |
-
return summary;
|
76 |
-
}
|
77 |
-
|
78 |
-
function get_url(user, scaffold_method) {
|
79 |
-
return "http://localhost:5001/?user=" + user + "&scaffold=" + scaffold_method;
|
80 |
-
}
|
81 |
-
</script>
|
82 |
-
|
83 |
-
<svelte:head>
|
84 |
-
<title>Results</title>
|
85 |
-
</svelte:head>
|
86 |
-
|
87 |
-
<div class="panel">
|
88 |
-
<div class="panel_contents">
|
89 |
-
<div>
|
90 |
-
<h3>Results</h3>
|
91 |
-
</div>
|
92 |
-
|
93 |
-
<div style="padding-top:50px">
|
94 |
-
{#await promise}
|
95 |
-
<div class="app_loading">
|
96 |
-
<LinearProgress indeterminate />
|
97 |
-
</div>
|
98 |
-
{:then results}
|
99 |
-
{#if results}
|
100 |
-
{#each results as user_report}
|
101 |
-
<div class="head_3">{user_report["user"]}</div>
|
102 |
-
<div class="section_indent">
|
103 |
-
<div class="head_5">Summary</div>
|
104 |
-
<div>{@html get_summary(user_report)}</div>
|
105 |
-
<ul>
|
106 |
-
<li>Labeling pages
|
107 |
-
<ul>
|
108 |
-
<li>
|
109 |
-
<a href="http://localhost:5001/?user={user_report["user"]}&tab=labeling&label_mode=3" target="_blank">Group-based model</a>
|
110 |
-
</li>
|
111 |
-
<li>
|
112 |
-
<a href="http://localhost:5001/?user={user_report["user"]}&tab=labeling&label_mode=0" target="_blank">Personalized model</a>
|
113 |
-
</li>
|
114 |
-
</ul>
|
115 |
-
</li>
|
116 |
-
<li>Auditing pages
|
117 |
-
<ul>
|
118 |
-
<li>
|
119 |
-
<a href="http://localhost:5001/?user={user_report["user"]}&scaffold=personal_group" target="_blank">Group-based audit - personal scaffold</a>
|
120 |
-
</li>
|
121 |
-
<li>
|
122 |
-
<a href="http://localhost:5001/?user={user_report["user"]}&scaffold=personal" target="_blank">Individual audit - personal scaffold</a>
|
123 |
-
</li>
|
124 |
-
<li>
|
125 |
-
<a href="http://localhost:5001/?user={user_report["user"]}&scaffold=prompts" target="_blank">Individual audit - prompt scaffold</a>
|
126 |
-
</li>
|
127 |
-
</ul>
|
128 |
-
</li>
|
129 |
-
</ul>
|
130 |
-
</div>
|
131 |
-
{#each scaffold_methods as scaffold_method}
|
132 |
-
{#if user_report[scaffold_method]}
|
133 |
-
<div class="spacing_vert_60 section_indent">
|
134 |
-
<div class="head_5">
|
135 |
-
{scaffold_method} ({get_complete_ratio(user_report[scaffold_method])})
|
136 |
-
[<a href={get_url(user_report["user"], scaffold_method)} target="_blank">link</a>]
|
137 |
-
</div>
|
138 |
-
{#each user_report[scaffold_method] as report}
|
139 |
-
<div class="spacing_vert_40 section_indent">
|
140 |
-
<div class="head_6_non_cap">
|
141 |
-
{report["title"]}
|
142 |
-
</div>
|
143 |
-
|
144 |
-
<div class="spacing_vert_20">
|
145 |
-
<div class="">
|
146 |
-
<b>Error type</b>
|
147 |
-
</div>
|
148 |
-
{report["error_type"]}
|
149 |
-
</div>
|
150 |
-
|
151 |
-
<div class="spacing_vert_20">
|
152 |
-
<div class="">
|
153 |
-
<b>Evidence</b>
|
154 |
-
</div>
|
155 |
-
{#if report["evidence"].length > 0}
|
156 |
-
<ClusterResults
|
157 |
-
cluster={null}
|
158 |
-
model={null}
|
159 |
-
data={{"cluster_comments": report["evidence"]}}
|
160 |
-
show_vis={false}
|
161 |
-
show_checkboxes={false}
|
162 |
-
table_width_pct={100}
|
163 |
-
rowsPerPage={10}
|
164 |
-
table_id={"panel"}
|
165 |
-
/>
|
166 |
-
{:else}
|
167 |
-
<p class="grey_text">
|
168 |
-
No examples added
|
169 |
-
</p>
|
170 |
-
{/if}
|
171 |
-
</div>
|
172 |
-
|
173 |
-
<div class="spacing_vert_20">
|
174 |
-
<div class="">
|
175 |
-
<b>Summary/Suggestions</b>
|
176 |
-
</div>
|
177 |
-
{report["text_entry"]}
|
178 |
-
</div>
|
179 |
-
|
180 |
-
<div class="spacing_vert_20">
|
181 |
-
<b>Completed</b>
|
182 |
-
<FormField>
|
183 |
-
<Checkbox checked={report["complete_status"]} disabled/>
|
184 |
-
</FormField>
|
185 |
-
</div>
|
186 |
-
|
187 |
-
</div>
|
188 |
-
{/each}
|
189 |
-
</div>
|
190 |
-
{/if}
|
191 |
-
{/each}
|
192 |
-
{/each}
|
193 |
-
{/if}
|
194 |
-
{:catch error}
|
195 |
-
<p style="color: red">{error.message}</p>
|
196 |
-
{/await}
|
197 |
-
</div>
|
198 |
-
</div>
|
199 |
-
</div>
|
200 |
-
|
201 |
-
<style>
|
202 |
-
.panel {
|
203 |
-
width: 80%;
|
204 |
-
padding: 50px;
|
205 |
-
}
|
206 |
-
</style>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
indie_label_svelte/src/SelectUserDialog.svelte
DELETED
@@ -1,66 +0,0 @@
|
|
1 |
-
<script lang="ts">
|
2 |
-
import Dialog, { Title, Content, Actions } from "@smui/dialog";
|
3 |
-
import Button, { Label } from "@smui/button";
|
4 |
-
import Textfield from "@smui/textfield";
|
5 |
-
import Select, { Option } from "@smui/select";
|
6 |
-
import { user } from "./stores/cur_user_store.js";
|
7 |
-
import { users } from "./stores/all_users_store.js";
|
8 |
-
|
9 |
-
export let open;
|
10 |
-
export let cur_user;
|
11 |
-
let cur_user_tf = cur_user;
|
12 |
-
let cur_user_sel = cur_user;
|
13 |
-
|
14 |
-
let all_users;
|
15 |
-
users.subscribe((value) => {
|
16 |
-
all_users = value;
|
17 |
-
});
|
18 |
-
|
19 |
-
function updateUserTextField() {
|
20 |
-
user.update((value) => cur_user_tf);
|
21 |
-
if (!all_users.includes(user)) {
|
22 |
-
all_users = all_users.concat(cur_user_tf);
|
23 |
-
users.update(all_users);
|
24 |
-
}
|
25 |
-
open = false;
|
26 |
-
}
|
27 |
-
|
28 |
-
function updateUserSel() {
|
29 |
-
user.update((value) => cur_user_sel);
|
30 |
-
open = false;
|
31 |
-
}
|
32 |
-
</script>
|
33 |
-
|
34 |
-
<div>
|
35 |
-
<Dialog
|
36 |
-
bind:open
|
37 |
-
aria-labelledby="simple-title"
|
38 |
-
aria-describedby="simple-content"
|
39 |
-
>
|
40 |
-
<!-- Title cannot contain leading whitespace due to mdc-typography-baseline-top() -->
|
41 |
-
<Title id="simple-title">Select Current User</Title>
|
42 |
-
<Content id="simple-content">
|
43 |
-
<Textfield bind:value={cur_user_tf} label="Enter user's name" />
|
44 |
-
|
45 |
-
<Select bind:value={cur_user_sel} label="Select Menu">
|
46 |
-
{#each all_users as u}
|
47 |
-
<Option value={u}>{u}</Option>
|
48 |
-
{/each}
|
49 |
-
</Select>
|
50 |
-
</Content>
|
51 |
-
<Actions>
|
52 |
-
<Button on:click={updateUserTextField}>
|
53 |
-
<Label>Update from TextField</Label>
|
54 |
-
</Button>
|
55 |
-
<Button on:click={updateUserSel}>
|
56 |
-
<Label>Update from Select</Label>
|
57 |
-
</Button>
|
58 |
-
</Actions>
|
59 |
-
</Dialog>
|
60 |
-
</div>
|
61 |
-
|
62 |
-
<style>
|
63 |
-
:global(.mdc-dialog__surface) {
|
64 |
-
height: 300px;
|
65 |
-
}
|
66 |
-
</style>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
indie_label_svelte/src/StudyLinks.svelte
DELETED
@@ -1,59 +0,0 @@
|
|
1 |
-
<script lang="ts">
|
2 |
-
import { user } from "./stores/cur_user_store.js";
|
3 |
-
|
4 |
-
let cur_user;
|
5 |
-
user.subscribe((value) => {
|
6 |
-
cur_user = value;
|
7 |
-
});
|
8 |
-
|
9 |
-
</script>
|
10 |
-
|
11 |
-
<svelte:head>
|
12 |
-
<title>Study Links</title>
|
13 |
-
</svelte:head>
|
14 |
-
|
15 |
-
<div class="panel">
|
16 |
-
<div class="panel_contents">
|
17 |
-
<div>
|
18 |
-
<h3>Study Links</h3>
|
19 |
-
</div>
|
20 |
-
|
21 |
-
<div>
|
22 |
-
<!-- <div class="head_5">{cur_user}</div> -->
|
23 |
-
<div class="section_indent">
|
24 |
-
<ul>
|
25 |
-
<li>Labeling pages
|
26 |
-
<ul>
|
27 |
-
<li>
|
28 |
-
<a href="http://localhost:5001/?user={cur_user}&tab=labeling&label_mode=3" target="_blank">Group-based model</a>
|
29 |
-
</li>
|
30 |
-
<li>
|
31 |
-
<a href="http://localhost:5001/?user={cur_user}&tab=labeling&label_mode=0" target="_blank">Personalized model</a>
|
32 |
-
</li>
|
33 |
-
</ul>
|
34 |
-
</li>
|
35 |
-
<li>Auditing pages
|
36 |
-
<ul>
|
37 |
-
<li>
|
38 |
-
<a href="http://localhost:5001/?user={cur_user}&scaffold=personal_group" target="_blank">Group-based audit - personal scaffold</a>
|
39 |
-
</li>
|
40 |
-
<li>
|
41 |
-
<a href="http://localhost:5001/?user={cur_user}&scaffold=personal" target="_blank">Individual audit - personal scaffold</a>
|
42 |
-
</li>
|
43 |
-
<li>
|
44 |
-
<a href="http://localhost:5001/?user={cur_user}&scaffold=prompts" target="_blank">Individual audit - prompt scaffold</a>
|
45 |
-
</li>
|
46 |
-
</ul>
|
47 |
-
</li>
|
48 |
-
</ul>
|
49 |
-
</div>
|
50 |
-
</div>
|
51 |
-
</div>
|
52 |
-
</div>
|
53 |
-
|
54 |
-
<style>
|
55 |
-
.panel {
|
56 |
-
width: 80%;
|
57 |
-
padding: 50px;
|
58 |
-
}
|
59 |
-
</style>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
indie_label_svelte/src/SubmitReportDialog.svelte
ADDED
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<script lang="ts">
|
2 |
+
import Dialog, { Title, Content, Actions } from "@smui/dialog";
|
3 |
+
import Button, { Label } from "@smui/button";
|
4 |
+
import Textfield from "@smui/textfield";
|
5 |
+
import Select, { Option } from "@smui/select";
|
6 |
+
import CircularProgress from '@smui/circular-progress';
|
7 |
+
|
8 |
+
export let open;
|
9 |
+
export let cur_user;
|
10 |
+
export let all_reports;
|
11 |
+
let email = "";
|
12 |
+
let all_sep_options = [
|
13 |
+
"Accuracy",
|
14 |
+
"Bias/Discrimination",
|
15 |
+
"Adversarial Example",
|
16 |
+
"Other",
|
17 |
+
];
|
18 |
+
let sep_selection = "";
|
19 |
+
|
20 |
+
let promise_submit = Promise.resolve(null);
|
21 |
+
function handleSubmitReport() {
|
22 |
+
promise_submit = submitReport();
|
23 |
+
}
|
24 |
+
|
25 |
+
async function submitReport() {
|
26 |
+
let req_params = {
|
27 |
+
cur_user: cur_user,
|
28 |
+
reports: JSON.stringify(all_reports),
|
29 |
+
email: email,
|
30 |
+
sep_selection: sep_selection,
|
31 |
+
};
|
32 |
+
|
33 |
+
let params = new URLSearchParams(req_params).toString();
|
34 |
+
const response = await fetch("./submit_avid_report?" + params);
|
35 |
+
const text = await response.text();
|
36 |
+
const data = JSON.parse(text);
|
37 |
+
return data;
|
38 |
+
}
|
39 |
+
|
40 |
+
</script>
|
41 |
+
|
42 |
+
<div>
|
43 |
+
<Dialog
|
44 |
+
bind:open
|
45 |
+
aria-labelledby="simple-title"
|
46 |
+
aria-describedby="simple-content"
|
47 |
+
>
|
48 |
+
<!-- Title cannot contain leading whitespace due to mdc-typography-baseline-top() -->
|
49 |
+
<Title id="simple-title">Send All Audit Reports</Title>
|
50 |
+
<Content id="simple-content">
|
51 |
+
<!-- Description -->
|
52 |
+
<div>
|
53 |
+
<b>When you are ready to send all of your audit reports to the <a href="https://avidml.org/" target="_blank">AI Vulnerability Database</a> (AVID), please fill out the following information.</b>
|
54 |
+
Only your submitted reports will be stored in the database for further analysis. While you can submit reports anonymously, we encourage you to provide your email so that we can contact you if we have any questions.
|
55 |
+
</div>
|
56 |
+
|
57 |
+
<!-- Summary of complete reports -->
|
58 |
+
<div>
|
59 |
+
<p><b>Summary of Reports to Send</b> (Reports that include evidence and are marked as complete)</p>
|
60 |
+
<ul>
|
61 |
+
{#each all_reports as report}
|
62 |
+
{#if report["complete_status"] && (report["evidence"].length > 0)}
|
63 |
+
<li>{report["title"]}</li>
|
64 |
+
<ul>
|
65 |
+
<li>Error Type: {report["error_type"]}</li>
|
66 |
+
<li>Evidence: Includes {report["evidence"].length} example{(report["evidence"].length > 1) ? 's' : ''}</li>
|
67 |
+
<li>Summary/Suggestions: {report["text_entry"]}</li>
|
68 |
+
</ul>
|
69 |
+
{/if}
|
70 |
+
{/each}
|
71 |
+
</ul>
|
72 |
+
</div>
|
73 |
+
|
74 |
+
<!-- Form fields -->
|
75 |
+
<div>
|
76 |
+
<Select bind:value={sep_selection} label="Audit category" style="width: 90%">
|
77 |
+
{#each all_sep_options as opt}
|
78 |
+
<Option value={opt}>{opt}</Option>
|
79 |
+
{/each}
|
80 |
+
</Select>
|
81 |
+
</div>
|
82 |
+
<div>
|
83 |
+
<Textfield bind:value={email} label="(Optional) Contact email" style="width: 90%" />
|
84 |
+
</div>
|
85 |
+
|
86 |
+
<!-- Submission and status message -->
|
87 |
+
<div class="dialog_footer">
|
88 |
+
<Button on:click={handleSubmitReport} variant="outlined">
|
89 |
+
<Label>Submit Report to AVID</Label>
|
90 |
+
</Button>
|
91 |
+
|
92 |
+
<div>
|
93 |
+
<span style="color: grey"><i>
|
94 |
+
{#await promise_submit}
|
95 |
+
<CircularProgress style="height: 32px; width: 32px;" indeterminate />
|
96 |
+
{:then result}
|
97 |
+
{#if result}
|
98 |
+
Successfully sent reports! You may close this window.
|
99 |
+
{/if}
|
100 |
+
{:catch error}
|
101 |
+
<p style="color: red">{error.message}</p>
|
102 |
+
{/await}
|
103 |
+
</i></span>
|
104 |
+
</div>
|
105 |
+
</div>
|
106 |
+
</Content>
|
107 |
+
</Dialog>
|
108 |
+
</div>
|
109 |
+
|
110 |
+
<style>
|
111 |
+
:global(.mdc-dialog__surface) {
|
112 |
+
min-width: 50%;
|
113 |
+
min-height: 50%;
|
114 |
+
margin-left: 30%;
|
115 |
+
}
|
116 |
+
|
117 |
+
.dialog_footer {
|
118 |
+
padding: 20px 0px;
|
119 |
+
}
|
120 |
+
</style>
|
indie_label_svelte/src/TopicTraining.svelte
CHANGED
@@ -4,21 +4,15 @@
|
|
4 |
import Button, { Label } from "@smui/button";
|
5 |
import DataTable, { Head, Body, Row, Cell } from "@smui/data-table";
|
6 |
import LinearProgress from '@smui/linear-progress';
|
7 |
-
import { user } from './stores/cur_user_store.js';
|
8 |
import { model_chosen } from './stores/cur_model_store.js';
|
9 |
|
10 |
export let topic;
|
11 |
export let model_name = null;
|
|
|
12 |
|
13 |
let to_label = {};
|
14 |
let promise = Promise.resolve(null);
|
15 |
|
16 |
-
// Get current user
|
17 |
-
let cur_user;
|
18 |
-
user.subscribe(value => {
|
19 |
-
cur_user = value;
|
20 |
-
});
|
21 |
-
|
22 |
// Get current model
|
23 |
if (model_name == null) {
|
24 |
model_chosen.subscribe(value => {
|
@@ -81,7 +75,6 @@
|
|
81 |
topic: topic,
|
82 |
};
|
83 |
|
84 |
-
console.log("topic training model name", model_name);
|
85 |
let params = new URLSearchParams(req_params).toString();
|
86 |
const response = await fetch("./get_personalized_model_topic?" + params); // TODO
|
87 |
const text = await response.text();
|
@@ -90,7 +83,6 @@
|
|
90 |
model_name = data["new_model_name"];
|
91 |
model_chosen.update((value) => model_name);
|
92 |
|
93 |
-
console.log("topicTraining", data);
|
94 |
return data;
|
95 |
}
|
96 |
</script>
|
|
|
4 |
import Button, { Label } from "@smui/button";
|
5 |
import DataTable, { Head, Body, Row, Cell } from "@smui/data-table";
|
6 |
import LinearProgress from '@smui/linear-progress';
|
|
|
7 |
import { model_chosen } from './stores/cur_model_store.js';
|
8 |
|
9 |
export let topic;
|
10 |
export let model_name = null;
|
11 |
+
export let cur_user;
|
12 |
|
13 |
let to_label = {};
|
14 |
let promise = Promise.resolve(null);
|
15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
// Get current model
|
17 |
if (model_name == null) {
|
18 |
model_chosen.subscribe(value => {
|
|
|
75 |
topic: topic,
|
76 |
};
|
77 |
|
|
|
78 |
let params = new URLSearchParams(req_params).toString();
|
79 |
const response = await fetch("./get_personalized_model_topic?" + params); // TODO
|
80 |
const text = await response.text();
|
|
|
83 |
model_name = data["new_model_name"];
|
84 |
model_chosen.update((value) => model_name);
|
85 |
|
|
|
86 |
return data;
|
87 |
}
|
88 |
</script>
|
indie_label_svelte/src/stores/all_users_store.js
DELETED
@@ -1,6 +0,0 @@
|
|
1 |
-
import { writable } from 'svelte/store';
|
2 |
-
|
3 |
-
// Fallback if request doesn't work
|
4 |
-
let all_users = ["DemoUser"];
|
5 |
-
|
6 |
-
export const users = writable(all_users);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
indie_label_svelte/src/stores/cur_user_store.js
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
import { writable } from 'svelte/store';
|
2 |
-
|
3 |
-
export const user = writable("DemoUser");
|
|
|
|
|
|
|
|
server.py
CHANGED
@@ -17,10 +17,15 @@ import math
|
|
17 |
import altair as alt
|
18 |
import matplotlib.pyplot as plt
|
19 |
import time
|
|
|
20 |
|
21 |
import audit_utils as utils
|
22 |
|
|
|
|
|
|
|
23 |
app = Flask(__name__)
|
|
|
24 |
|
25 |
# Path for our main Svelte page
|
26 |
@app.route("/")
|
@@ -35,15 +40,19 @@ def home(path):
|
|
35 |
|
36 |
########################################
|
37 |
# ROUTE: /AUDIT_SETTINGS
|
38 |
-
comments_grouped_full_topic_cat = pd.read_pickle("data/comments_grouped_full_topic_cat2_persp.pkl")
|
39 |
|
40 |
@app.route("/audit_settings")
|
41 |
-
def audit_settings():
|
42 |
# Fetch page content
|
43 |
user = request.args.get("user")
|
44 |
scaffold_method = request.args.get("scaffold_method")
|
45 |
|
46 |
-
|
|
|
|
|
|
|
|
|
|
|
47 |
grp_models = [m for m in user_models if m.startswith(f"model_{user}_group_")]
|
48 |
|
49 |
clusters = utils.get_unique_topics()
|
@@ -66,19 +75,6 @@ def audit_settings():
|
|
66 |
"options": [{"value": i, "text": cluster} for i, cluster in enumerate(clusters)],
|
67 |
},]
|
68 |
|
69 |
-
if scaffold_method == "personal_cluster":
|
70 |
-
cluster_model = user_models[0]
|
71 |
-
personal_cluster_file = f"./data/personal_cluster_dfs/{cluster_model}.pkl"
|
72 |
-
if os.path.isfile(personal_cluster_file) and cluster_model != "":
|
73 |
-
print("audit_settings", personal_cluster_file, cluster_model)
|
74 |
-
topics_under_top, topics_over_top = utils.get_personal_clusters(cluster_model)
|
75 |
-
pers_cluster = topics_under_top + topics_over_top
|
76 |
-
pers_cluster_options = {
|
77 |
-
"label": "Personalized clusters",
|
78 |
-
"options": [{"value": i, "text": cluster} for i, cluster in enumerate(pers_cluster)],
|
79 |
-
}
|
80 |
-
clusters_options.insert(0, pers_cluster_options)
|
81 |
-
|
82 |
clusters_for_tuning = utils.get_large_clusters(min_n=150)
|
83 |
clusters_for_tuning_options = [{"value": i, "text": cluster} for i, cluster in enumerate(clusters_for_tuning)] # Format for Svelecte UI element
|
84 |
|
@@ -86,54 +82,33 @@ def audit_settings():
|
|
86 |
"personalized_models": user_models,
|
87 |
"personalized_model_grp": grp_models,
|
88 |
"perf_metrics": ["Average rating difference", "Mean Absolute Error (MAE)", "Root Mean Squared Error (RMSE)", "Mean Squared Error (MSE)"],
|
89 |
-
"breakdown_categories": ['Topic', 'Toxicity Category', 'Toxicity Severity'],
|
90 |
"clusters": clusters_options,
|
91 |
"clusters_for_tuning": clusters_for_tuning_options,
|
|
|
92 |
}
|
93 |
return json.dumps(context)
|
94 |
|
95 |
-
########################################
|
96 |
-
# ROUTE: /GET_USERS
|
97 |
-
@app.route("/get_users")
|
98 |
-
def get_users():
|
99 |
-
# Fetch page content
|
100 |
-
with open(f"./data/users_to_models.pkl", "rb") as f:
|
101 |
-
users_to_models = pickle.load(f)
|
102 |
-
users = list(users_to_models.keys())
|
103 |
-
context = {
|
104 |
-
"users": users,
|
105 |
-
}
|
106 |
-
return json.dumps(context)
|
107 |
|
108 |
########################################
|
109 |
# ROUTE: /GET_AUDIT
|
110 |
@app.route("/get_audit")
|
111 |
def get_audit():
|
112 |
pers_model = request.args.get("pers_model")
|
113 |
-
perf_metric = request.args.get("perf_metric")
|
114 |
-
breakdown_axis = request.args.get("breakdown_axis")
|
115 |
-
breakdown_sort = request.args.get("breakdown_sort")
|
116 |
-
n_topics = int(request.args.get("n_topics"))
|
117 |
error_type = request.args.get("error_type")
|
118 |
cur_user = request.args.get("cur_user")
|
119 |
topic_vis_method = request.args.get("topic_vis_method")
|
120 |
if topic_vis_method == "null":
|
121 |
topic_vis_method = "median"
|
122 |
|
123 |
-
if
|
124 |
-
|
125 |
-
elif breakdown_sort == "default":
|
126 |
-
sort_class_plot = False
|
127 |
else:
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
breakdown_axis=breakdown_axis,
|
135 |
-
topic_vis_method=topic_vis_method,
|
136 |
-
)
|
137 |
|
138 |
results = {
|
139 |
"overall_perf": overall_perf,
|
@@ -143,60 +118,34 @@ def get_audit():
|
|
143 |
########################################
|
144 |
# ROUTE: /GET_CLUSTER_RESULTS
|
145 |
@app.route("/get_cluster_results")
|
146 |
-
def get_cluster_results():
|
147 |
pers_model = request.args.get("pers_model")
|
148 |
-
|
149 |
cluster = request.args.get("cluster")
|
150 |
-
example_sort = request.args.get("example_sort")
|
151 |
-
comparison_group = request.args.get("comparison_group")
|
152 |
topic_df_ids = request.args.getlist("topic_df_ids")
|
153 |
topic_df_ids = [int(val) for val in topic_df_ids[0].split(",") if val != ""]
|
154 |
search_type = request.args.get("search_type")
|
155 |
keyword = request.args.get("keyword")
|
156 |
-
n_neighbors = request.args.get("n_neighbors")
|
157 |
-
if n_neighbors != "null":
|
158 |
-
n_neighbors = int(n_neighbors)
|
159 |
-
neighbor_threshold = 0.6
|
160 |
error_type = request.args.get("error_type")
|
161 |
use_model = request.args.get("use_model") == "true"
|
162 |
-
scaffold_method = request.args.get("scaffold_method")
|
163 |
-
|
164 |
|
165 |
-
|
166 |
-
|
167 |
-
if os.path.isfile(cluster_model_file):
|
168 |
-
pers_model = f"{pers_model}_{cluster}"
|
169 |
-
|
170 |
-
print(f"get_cluster_results using model {pers_model}")
|
171 |
-
|
172 |
-
other_ids = []
|
173 |
-
perf_metric = "avg_diff"
|
174 |
-
sort_ascending = True if example_sort == "ascending" else False
|
175 |
|
|
|
176 |
topic_df = None
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
with
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
with open(f"data/preds_dfs/{pers_model}.pkl", "rb") as f:
|
187 |
-
topic_df = pickle.load(f)
|
188 |
-
if search_type == "cluster":
|
189 |
-
# Display examples with comment, your pred, and other users' pred
|
190 |
-
topic_df = topic_df[(topic_df["topic"] == cluster) | (topic_df["item_id"].isin(topic_df_ids))]
|
191 |
-
|
192 |
-
elif search_type == "neighbors":
|
193 |
-
neighbor_ids = utils.get_match(topic_df_ids, K=n_neighbors, threshold=neighbor_threshold, debug=False)
|
194 |
-
topic_df = topic_df[(topic_df["item_id"].isin(neighbor_ids)) | (topic_df["item_id"].isin(topic_df_ids))]
|
195 |
-
elif search_type == "keyword":
|
196 |
-
topic_df = topic_df[(topic_df["comment"].str.contains(keyword, case=False, regex=False)) | (topic_df["item_id"].isin(topic_df_ids))]
|
197 |
-
|
198 |
topic_df = topic_df.drop_duplicates()
|
199 |
-
|
|
|
200 |
|
201 |
# Handle empty results
|
202 |
if len(topic_df) == 0:
|
@@ -217,18 +166,20 @@ def get_cluster_results():
|
|
217 |
|
218 |
topic_df_ids = topic_df["item_id"].unique().tolist()
|
219 |
|
220 |
-
|
221 |
-
|
|
|
|
|
222 |
else:
|
223 |
-
#
|
224 |
-
cluster_overview_plot_json, sampled_df = utils.
|
225 |
|
226 |
-
cluster_comments = utils.get_cluster_comments(sampled_df,error_type=error_type,
|
227 |
|
228 |
results = {
|
229 |
"topic_df_ids": topic_df_ids,
|
230 |
"cluster_overview_plot_json": json.loads(cluster_overview_plot_json),
|
231 |
-
"cluster_comments": cluster_comments,
|
232 |
}
|
233 |
return json.dumps(results)
|
234 |
|
@@ -255,7 +206,7 @@ def get_group_size():
|
|
255 |
########################################
|
256 |
# ROUTE: /GET_GROUP_MODEL
|
257 |
@app.route("/get_group_model")
|
258 |
-
def get_group_model():
|
259 |
# Fetch info for initial labeling component
|
260 |
model_name = request.args.get("model_name")
|
261 |
user = request.args.get("user")
|
@@ -275,28 +226,21 @@ def get_group_model():
|
|
275 |
grp_ids = grp_df["worker_id"].tolist()
|
276 |
|
277 |
ratings_grp = utils.get_grp_model_labels(
|
278 |
-
comments_df=comments_grouped_full_topic_cat,
|
279 |
n_label_per_bin=BIN_DISTRIB,
|
280 |
score_bins=SCORE_BINS,
|
281 |
grp_ids=grp_ids,
|
282 |
)
|
283 |
|
284 |
-
# print("ratings_grp", ratings_grp)
|
285 |
-
|
286 |
# Modify model name
|
287 |
model_name = f"{model_name}_group_gender{sel_gender}_relig{sel_relig}_pol{sel_pol}_race{sel_race_orig}_lgbtq_{sel_lgbtq}"
|
288 |
-
|
289 |
-
label_dir = f"./data/labels/{model_name}"
|
290 |
-
# Create directory for labels if it doesn't yet exist
|
291 |
-
if not os.path.isdir(label_dir):
|
292 |
-
os.mkdir(label_dir)
|
293 |
-
last_label_i = len([name for name in os.listdir(label_dir) if (os.path.isfile(os.path.join(label_dir, name)) and name.endswith('.pkl'))])
|
294 |
|
295 |
# Train group model
|
296 |
-
mae, mse, rmse, avg_diff, ratings_prev = utils.train_updated_model(model_name,
|
297 |
|
298 |
duration = time.time() - start
|
299 |
-
|
|
|
300 |
|
301 |
context = {
|
302 |
"group_size": group_size,
|
@@ -314,11 +258,10 @@ def get_labeling():
|
|
314 |
clusters_for_tuning = utils.get_large_clusters(min_n=150)
|
315 |
clusters_for_tuning_options = [{"value": i, "text": cluster} for i, cluster in enumerate(clusters_for_tuning)] # Format for Svelecte UI element
|
316 |
|
317 |
-
|
318 |
-
model_name_suggestion = f"model_{user}"
|
319 |
|
320 |
context = {
|
321 |
-
"personalized_models": utils.
|
322 |
"model_name_suggestion": model_name_suggestion,
|
323 |
"clusters_for_tuning": clusters_for_tuning_options,
|
324 |
}
|
@@ -326,15 +269,16 @@ def get_labeling():
|
|
326 |
|
327 |
########################################
|
328 |
# ROUTE: /GET_COMMENTS_TO_LABEL
|
329 |
-
|
330 |
-
BIN_DISTRIB = [
|
|
|
|
|
331 |
SCORE_BINS = [(0.0, 0.5), (0.5, 1.5), (1.5, 2.5), (2.5, 3.5), (3.5, 4.01)]
|
332 |
@app.route("/get_comments_to_label")
|
333 |
def get_comments_to_label():
|
334 |
n = int(request.args.get("n"))
|
335 |
# Fetch examples to label
|
336 |
to_label_ids = utils.create_example_sets(
|
337 |
-
comments_df=comments_grouped_full_topic_cat,
|
338 |
n_label_per_bin=BIN_DISTRIB,
|
339 |
score_bins=SCORE_BINS,
|
340 |
keyword=None
|
@@ -351,14 +295,11 @@ def get_comments_to_label():
|
|
351 |
|
352 |
########################################
|
353 |
# ROUTE: /GET_COMMENTS_TO_LABEL_TOPIC
|
354 |
-
N_LABEL_PER_BIN_TOPIC = 2 # 2 * 5 = 10 comments
|
355 |
@app.route("/get_comments_to_label_topic")
|
356 |
def get_comments_to_label_topic():
|
357 |
# Fetch examples to label
|
358 |
topic = request.args.get("topic")
|
359 |
to_label_ids = utils.create_example_sets(
|
360 |
-
comments_df=comments_grouped_full_topic_cat,
|
361 |
-
# n_label_per_bin=N_LABEL_PER_BIN_TOPIC,
|
362 |
n_label_per_bin=BIN_DISTRIB,
|
363 |
score_bins=SCORE_BINS,
|
364 |
keyword=None,
|
@@ -375,38 +316,33 @@ def get_comments_to_label_topic():
|
|
375 |
########################################
|
376 |
# ROUTE: /GET_PERSONALIZED_MODEL
|
377 |
@app.route("/get_personalized_model")
|
378 |
-
def get_personalized_model():
|
379 |
model_name = request.args.get("model_name")
|
380 |
ratings_json = request.args.get("ratings")
|
381 |
mode = request.args.get("mode")
|
382 |
user = request.args.get("user")
|
383 |
ratings = json.loads(ratings_json)
|
384 |
-
|
385 |
-
|
|
|
386 |
|
387 |
-
|
388 |
-
# Create directory for labels if it doesn't yet exist
|
389 |
-
if not os.path.isdir(label_dir):
|
390 |
-
os.mkdir(label_dir)
|
391 |
-
last_label_i = len([name for name in os.listdir(label_dir) if (os.path.isfile(os.path.join(label_dir, name)) and name.endswith('.pkl'))])
|
392 |
|
393 |
# Handle existing or new model cases
|
394 |
if mode == "view":
|
395 |
# Fetch prior model performance
|
396 |
-
|
397 |
-
raise Exception(f"Model {model_name} does not exist")
|
398 |
-
else:
|
399 |
-
mae, mse, rmse, avg_diff, ratings_prev = utils.fetch_existing_data(model_name, last_label_i)
|
400 |
|
401 |
elif mode == "train":
|
402 |
# Train model and cache predictions using new labels
|
403 |
print("get_personalized_model train")
|
404 |
-
mae, mse, rmse, avg_diff, ratings_prev = utils.train_updated_model(model_name,
|
405 |
-
|
406 |
-
|
407 |
-
|
|
|
408 |
|
409 |
-
perf_plot, mae_status = utils.plot_train_perf_results(model_name, mae)
|
410 |
perf_plot_json = perf_plot.to_json()
|
411 |
|
412 |
def round_metric(x):
|
@@ -419,7 +355,6 @@ def get_personalized_model():
|
|
419 |
"mse": round_metric(mse),
|
420 |
"rmse": round_metric(rmse),
|
421 |
"avg_diff": round_metric(avg_diff),
|
422 |
-
"duration": duration,
|
423 |
"ratings_prev": ratings_prev,
|
424 |
"perf_plot_json": json.loads(perf_plot_json),
|
425 |
}
|
@@ -429,34 +364,29 @@ def get_personalized_model():
|
|
429 |
########################################
|
430 |
# ROUTE: /GET_PERSONALIZED_MODEL_TOPIC
|
431 |
@app.route("/get_personalized_model_topic")
|
432 |
-
def get_personalized_model_topic():
|
433 |
model_name = request.args.get("model_name")
|
434 |
ratings_json = request.args.get("ratings")
|
435 |
user = request.args.get("user")
|
436 |
ratings = json.loads(ratings_json)
|
437 |
topic = request.args.get("topic")
|
438 |
-
|
|
|
439 |
start = time.time()
|
440 |
|
441 |
# Modify model name
|
442 |
model_name = f"{model_name}_{topic}"
|
443 |
-
|
444 |
-
label_dir = f"./data/labels/{model_name}"
|
445 |
-
# Create directory for labels if it doesn't yet exist
|
446 |
-
if not os.path.isdir(label_dir):
|
447 |
-
os.mkdir(label_dir)
|
448 |
-
last_label_i = len([name for name in os.listdir(label_dir) if (os.path.isfile(os.path.join(label_dir, name)) and name.endswith('.pkl'))])
|
449 |
|
450 |
# Handle existing or new model cases
|
451 |
# Train model and cache predictions using new labels
|
452 |
-
|
453 |
-
|
|
|
454 |
|
455 |
-
|
456 |
-
|
457 |
-
|
458 |
-
def round_metric(x):
|
459 |
-
return np.round(abs(x), 3)
|
460 |
|
461 |
results = {
|
462 |
"success": "success",
|
@@ -477,17 +407,13 @@ def get_reports():
|
|
477 |
if topic_vis_method == "null":
|
478 |
topic_vis_method = "fp_fn"
|
479 |
|
480 |
-
# Load reports for current user from stored
|
481 |
-
|
482 |
-
|
483 |
-
|
484 |
-
if not os.path.isfile(user_file):
|
485 |
if scaffold_method == "fixed":
|
486 |
reports = get_fixed_scaffold()
|
487 |
elif (scaffold_method == "personal" or scaffold_method == "personal_group" or scaffold_method == "personal_test"):
|
488 |
-
reports = get_personal_scaffold(model, topic_vis_method)
|
489 |
-
elif (scaffold_method == "personal_cluster"):
|
490 |
-
reports = get_personal_cluster_scaffold(model)
|
491 |
elif scaffold_method == "prompts":
|
492 |
reports = get_prompts_scaffold()
|
493 |
elif scaffold_method == "tutorial":
|
@@ -505,8 +431,8 @@ def get_reports():
|
|
505 |
]
|
506 |
else:
|
507 |
# Load from pickle file
|
508 |
-
with open(
|
509 |
-
reports =
|
510 |
|
511 |
results = {
|
512 |
"reports": reports,
|
@@ -572,23 +498,13 @@ def get_tutorial_scaffold():
|
|
572 |
},
|
573 |
]
|
574 |
|
575 |
-
def get_personal_cluster_scaffold(model):
|
576 |
-
topics_under_top, topics_over_top = utils.get_personal_clusters(model)
|
577 |
-
|
578 |
-
report_under = [get_empty_report(topic, "System is under-sensitive") for topic in topics_under_top]
|
579 |
-
|
580 |
-
report_over = [get_empty_report(topic, "System is over-sensitive") for topic in topics_over_top]
|
581 |
-
reports = (report_under + report_over)
|
582 |
-
random.shuffle(reports)
|
583 |
-
return reports
|
584 |
-
|
585 |
def get_topic_errors(df, topic_vis_method, threshold=2):
|
586 |
-
topics = df["
|
587 |
topic_errors = {}
|
588 |
for topic in topics:
|
589 |
-
t_df = df[df["
|
590 |
-
y_true = t_df["pred"].to_numpy()
|
591 |
-
y_pred = t_df["
|
592 |
if topic_vis_method == "mae":
|
593 |
t_err = mean_absolute_error(y_true, y_pred)
|
594 |
elif topic_vis_method == "mse":
|
@@ -596,8 +512,8 @@ def get_topic_errors(df, topic_vis_method, threshold=2):
|
|
596 |
elif topic_vis_method == "avg_diff":
|
597 |
t_err = np.mean(y_true - y_pred)
|
598 |
elif topic_vis_method == "fp_proportion":
|
599 |
-
y_true = [0 if rating < threshold else 1 for rating in
|
600 |
-
y_pred = [0 if rating < threshold else 1 for rating in
|
601 |
try:
|
602 |
tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
|
603 |
except:
|
@@ -605,8 +521,8 @@ def get_topic_errors(df, topic_vis_method, threshold=2):
|
|
605 |
total = float(len(y_true))
|
606 |
t_err = fp / total
|
607 |
elif topic_vis_method == "fn_proportion":
|
608 |
-
y_true = [0 if rating < threshold else 1 for rating in
|
609 |
-
y_pred = [0 if rating < threshold else 1 for rating in
|
610 |
try:
|
611 |
tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
|
612 |
except:
|
@@ -617,65 +533,69 @@ def get_topic_errors(df, topic_vis_method, threshold=2):
|
|
617 |
|
618 |
return topic_errors
|
619 |
|
620 |
-
def get_personal_scaffold(model, topic_vis_method, n_topics=200, n=5):
|
621 |
threshold = utils.get_toxic_threshold()
|
622 |
|
623 |
# Get topics with greatest amount of error
|
624 |
-
|
|
|
625 |
preds_df = pickle.load(f)
|
626 |
-
preds_df_mod = preds_df.
|
627 |
-
preds_df_mod = preds_df_mod[preds_df_mod["
|
628 |
-
preds_df_mod = preds_df_mod[preds_df_mod["topic_id_"] < n_topics]
|
629 |
|
630 |
if topic_vis_method == "median":
|
631 |
-
df = preds_df_mod.groupby(["
|
632 |
elif topic_vis_method == "mean":
|
633 |
-
df = preds_df_mod.groupby(["
|
634 |
elif topic_vis_method == "fp_fn":
|
635 |
for error_type in ["fn_proportion", "fp_proportion"]:
|
636 |
topic_errors = get_topic_errors(preds_df_mod, error_type)
|
637 |
-
preds_df_mod[error_type] = [topic_errors[topic] for topic in preds_df_mod["
|
638 |
-
df = preds_df_mod.groupby(["
|
639 |
else:
|
640 |
# Get error for each topic
|
641 |
topic_errors = get_topic_errors(preds_df_mod, topic_vis_method)
|
642 |
-
preds_df_mod[topic_vis_method] = [topic_errors[topic] for topic in preds_df_mod["
|
643 |
-
df = preds_df_mod.groupby(["
|
644 |
|
645 |
# Get system error
|
646 |
-
|
|
|
647 |
|
648 |
if topic_vis_method == "median" or topic_vis_method == "mean":
|
649 |
-
df["error_magnitude"] = [utils.get_error_magnitude(sys, user, threshold) for sys, user in zip(df["
|
650 |
-
df["error_type"] = [utils.get_error_type_radio(sys, user, threshold) for sys, user in zip(df["
|
651 |
|
652 |
df_under = df[df["error_type"] == "System is under-sensitive"]
|
653 |
df_under = df_under.sort_values(by=["error_magnitude"], ascending=False).head(n) # surface largest errors first
|
654 |
-
report_under = [get_empty_report(row["
|
655 |
|
656 |
df_over = df[df["error_type"] == "System is over-sensitive"]
|
657 |
df_over = df_over.sort_values(by=["error_magnitude"], ascending=False).head(n) # surface largest errors first
|
658 |
-
report_over = [get_empty_report(row["
|
659 |
|
660 |
# Set up reports
|
661 |
-
# return [get_empty_report(row["topic_"], row["error_type"]) for index, row in df.iterrows()]
|
662 |
reports = (report_under + report_over)
|
663 |
random.shuffle(reports)
|
664 |
elif topic_vis_method == "fp_fn":
|
665 |
df_under = df.sort_values(by=["fn_proportion"], ascending=False).head(n)
|
666 |
df_under = df_under[df_under["fn_proportion"] > 0]
|
667 |
-
|
|
|
|
|
668 |
|
669 |
df_over = df.sort_values(by=["fp_proportion"], ascending=False).head(n)
|
670 |
df_over = df_over[df_over["fp_proportion"] > 0]
|
671 |
-
|
|
|
|
|
672 |
|
673 |
reports = (report_under + report_over)
|
674 |
random.shuffle(reports)
|
675 |
else:
|
676 |
df = df.sort_values(by=[topic_vis_method], ascending=False).head(n * 2)
|
677 |
-
df["error_type"] = [utils.get_error_type_radio(sys, user, threshold) for sys, user in zip(df["
|
678 |
-
reports = [get_empty_report(row["
|
679 |
|
680 |
return reports
|
681 |
|
@@ -718,78 +638,88 @@ def get_prompts_scaffold():
|
|
718 |
},
|
719 |
]
|
720 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
721 |
########################################
|
722 |
# ROUTE: /SAVE_REPORTS
|
723 |
@app.route("/save_reports")
|
724 |
-
def save_reports():
|
725 |
cur_user = request.args.get("cur_user")
|
726 |
reports_json = request.args.get("reports")
|
727 |
reports = json.loads(reports_json)
|
728 |
-
|
729 |
|
730 |
-
# Save reports for current user to
|
731 |
-
|
732 |
-
|
733 |
-
|
734 |
-
pickle.dump(reports, f)
|
735 |
|
736 |
results = {
|
737 |
"status": "success",
|
738 |
}
|
|
|
|
|
739 |
return json.dumps(results)
|
740 |
|
741 |
########################################
|
742 |
-
# ROUTE: /
|
743 |
-
@app.route("/
|
744 |
-
def
|
745 |
-
|
746 |
-
|
747 |
-
|
748 |
-
|
749 |
-
df = utils.get_comments_grouped_full_topic_cat().sample(n=n_examples)
|
750 |
|
751 |
-
|
752 |
-
df["system_color"] = [utils.get_user_color(sys, threshold) for sys in df["rating"].tolist()] # get cell colors
|
753 |
|
754 |
-
|
|
|
755 |
|
756 |
results = {
|
757 |
-
"
|
758 |
}
|
759 |
return json.dumps(results)
|
760 |
|
761 |
########################################
|
762 |
-
# ROUTE: /
|
763 |
-
@app.route("/
|
764 |
-
def
|
765 |
-
|
766 |
-
|
767 |
-
users = users.split(",")
|
768 |
-
# print("users", users)
|
769 |
-
|
770 |
-
IGNORE_LIST = ["DemoUser"]
|
771 |
-
report_dir = f"./data/user_reports"
|
772 |
-
|
773 |
|
774 |
-
#
|
775 |
-
|
776 |
-
|
777 |
-
for user in users:
|
778 |
-
if user not in IGNORE_LIST:
|
779 |
-
user_results = {}
|
780 |
-
user_results["user"] = user
|
781 |
-
for scaffold_method in ["personal", "personal_group", "prompts"]:
|
782 |
-
# Get results
|
783 |
-
user_file = os.path.join(report_dir, f"{user}_{scaffold_method}.pkl")
|
784 |
-
if os.path.isfile(user_file):
|
785 |
-
with open(user_file, "rb") as f:
|
786 |
-
user_results[scaffold_method] = pickle.load(f)
|
787 |
-
results.append(user_results)
|
788 |
-
|
789 |
-
# print("results", results)
|
790 |
|
791 |
results = {
|
792 |
-
"
|
793 |
}
|
794 |
return json.dumps(results)
|
795 |
|
|
|
17 |
import altair as alt
|
18 |
import matplotlib.pyplot as plt
|
19 |
import time
|
20 |
+
import friendlywords as fw
|
21 |
|
22 |
import audit_utils as utils
|
23 |
|
24 |
+
import requests
|
25 |
+
|
26 |
+
|
27 |
app = Flask(__name__)
|
28 |
+
DEBUG = False # Debug flag for development; set to False for production
|
29 |
|
30 |
# Path for our main Svelte page
|
31 |
@app.route("/")
|
|
|
40 |
|
41 |
########################################
|
42 |
# ROUTE: /AUDIT_SETTINGS
|
|
|
43 |
|
44 |
@app.route("/audit_settings")
|
45 |
+
def audit_settings(debug=DEBUG):
|
46 |
# Fetch page content
|
47 |
user = request.args.get("user")
|
48 |
scaffold_method = request.args.get("scaffold_method")
|
49 |
|
50 |
+
# Assign user ID if none is provided (default case)
|
51 |
+
if user == "null":
|
52 |
+
# Generate random two-word user ID
|
53 |
+
user = fw.generate(2, separator="_")
|
54 |
+
|
55 |
+
user_models = utils.get_user_model_names(user)
|
56 |
grp_models = [m for m in user_models if m.startswith(f"model_{user}_group_")]
|
57 |
|
58 |
clusters = utils.get_unique_topics()
|
|
|
75 |
"options": [{"value": i, "text": cluster} for i, cluster in enumerate(clusters)],
|
76 |
},]
|
77 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
clusters_for_tuning = utils.get_large_clusters(min_n=150)
|
79 |
clusters_for_tuning_options = [{"value": i, "text": cluster} for i, cluster in enumerate(clusters_for_tuning)] # Format for Svelecte UI element
|
80 |
|
|
|
82 |
"personalized_models": user_models,
|
83 |
"personalized_model_grp": grp_models,
|
84 |
"perf_metrics": ["Average rating difference", "Mean Absolute Error (MAE)", "Root Mean Squared Error (RMSE)", "Mean Squared Error (MSE)"],
|
|
|
85 |
"clusters": clusters_options,
|
86 |
"clusters_for_tuning": clusters_for_tuning_options,
|
87 |
+
"user": user,
|
88 |
}
|
89 |
return json.dumps(context)
|
90 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
|
92 |
########################################
|
93 |
# ROUTE: /GET_AUDIT
|
94 |
@app.route("/get_audit")
|
95 |
def get_audit():
|
96 |
pers_model = request.args.get("pers_model")
|
|
|
|
|
|
|
|
|
97 |
error_type = request.args.get("error_type")
|
98 |
cur_user = request.args.get("cur_user")
|
99 |
topic_vis_method = request.args.get("topic_vis_method")
|
100 |
if topic_vis_method == "null":
|
101 |
topic_vis_method = "median"
|
102 |
|
103 |
+
if pers_model == "" or pers_model == "null" or pers_model == "undefined":
|
104 |
+
overall_perf = None
|
|
|
|
|
105 |
else:
|
106 |
+
overall_perf = utils.show_overall_perf(
|
107 |
+
cur_model=pers_model,
|
108 |
+
error_type=error_type,
|
109 |
+
cur_user=cur_user,
|
110 |
+
topic_vis_method=topic_vis_method,
|
111 |
+
)
|
|
|
|
|
|
|
112 |
|
113 |
results = {
|
114 |
"overall_perf": overall_perf,
|
|
|
118 |
########################################
|
119 |
# ROUTE: /GET_CLUSTER_RESULTS
|
120 |
@app.route("/get_cluster_results")
|
121 |
+
def get_cluster_results(debug=DEBUG):
|
122 |
pers_model = request.args.get("pers_model")
|
123 |
+
cur_user = request.args.get("cur_user")
|
124 |
cluster = request.args.get("cluster")
|
|
|
|
|
125 |
topic_df_ids = request.args.getlist("topic_df_ids")
|
126 |
topic_df_ids = [int(val) for val in topic_df_ids[0].split(",") if val != ""]
|
127 |
search_type = request.args.get("search_type")
|
128 |
keyword = request.args.get("keyword")
|
|
|
|
|
|
|
|
|
129 |
error_type = request.args.get("error_type")
|
130 |
use_model = request.args.get("use_model") == "true"
|
|
|
|
|
131 |
|
132 |
+
if debug:
|
133 |
+
print(f"get_cluster_results using model {pers_model}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
|
135 |
+
# Prepare cluster df (topic_df)
|
136 |
topic_df = None
|
137 |
+
preds_file = utils.get_preds_file(cur_user, pers_model)
|
138 |
+
with open(preds_file, "rb") as f:
|
139 |
+
topic_df = pickle.load(f)
|
140 |
+
if search_type == "cluster":
|
141 |
+
# Display examples with comment, your pred, and other users' pred
|
142 |
+
topic_df = topic_df[(topic_df["topic"] == cluster) | (topic_df["item_id"].isin(topic_df_ids))]
|
143 |
+
elif search_type == "keyword":
|
144 |
+
topic_df = topic_df[(topic_df["comment"].str.contains(keyword, case=False, regex=False)) | (topic_df["item_id"].isin(topic_df_ids))]
|
145 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
146 |
topic_df = topic_df.drop_duplicates()
|
147 |
+
if debug:
|
148 |
+
print("len topic_df", len(topic_df))
|
149 |
|
150 |
# Handle empty results
|
151 |
if len(topic_df) == 0:
|
|
|
166 |
|
167 |
topic_df_ids = topic_df["item_id"].unique().tolist()
|
168 |
|
169 |
+
# Prepare overview plot for the cluster
|
170 |
+
if use_model:
|
171 |
+
# Display results with the model as a reference point
|
172 |
+
cluster_overview_plot_json, sampled_df = utils.plot_overall_vis_cluster(cur_user, topic_df, error_type=error_type, n_comments=500)
|
173 |
else:
|
174 |
+
# Display results without a model
|
175 |
+
cluster_overview_plot_json, sampled_df = utils.plot_overall_vis_cluster_no_model(cur_user, topic_df, n_comments=500)
|
176 |
|
177 |
+
cluster_comments = utils.get_cluster_comments(sampled_df,error_type=error_type, use_model=use_model) # New version of cluster comment table
|
178 |
|
179 |
results = {
|
180 |
"topic_df_ids": topic_df_ids,
|
181 |
"cluster_overview_plot_json": json.loads(cluster_overview_plot_json),
|
182 |
+
"cluster_comments": cluster_comments.to_json(orient="records"),
|
183 |
}
|
184 |
return json.dumps(results)
|
185 |
|
|
|
206 |
########################################
|
207 |
# ROUTE: /GET_GROUP_MODEL
|
208 |
@app.route("/get_group_model")
|
209 |
+
def get_group_model(debug=DEBUG):
|
210 |
# Fetch info for initial labeling component
|
211 |
model_name = request.args.get("model_name")
|
212 |
user = request.args.get("user")
|
|
|
226 |
grp_ids = grp_df["worker_id"].tolist()
|
227 |
|
228 |
ratings_grp = utils.get_grp_model_labels(
|
|
|
229 |
n_label_per_bin=BIN_DISTRIB,
|
230 |
score_bins=SCORE_BINS,
|
231 |
grp_ids=grp_ids,
|
232 |
)
|
233 |
|
|
|
|
|
234 |
# Modify model name
|
235 |
model_name = f"{model_name}_group_gender{sel_gender}_relig{sel_relig}_pol{sel_pol}_race{sel_race_orig}_lgbtq_{sel_lgbtq}"
|
236 |
+
utils.setup_user_model_dirs(user, model_name)
|
|
|
|
|
|
|
|
|
|
|
237 |
|
238 |
# Train group model
|
239 |
+
mae, mse, rmse, avg_diff, ratings_prev = utils.train_updated_model(model_name, ratings_grp, user)
|
240 |
|
241 |
duration = time.time() - start
|
242 |
+
if debug:
|
243 |
+
print("Time to train/cache:", duration)
|
244 |
|
245 |
context = {
|
246 |
"group_size": group_size,
|
|
|
258 |
clusters_for_tuning = utils.get_large_clusters(min_n=150)
|
259 |
clusters_for_tuning_options = [{"value": i, "text": cluster} for i, cluster in enumerate(clusters_for_tuning)] # Format for Svelecte UI element
|
260 |
|
261 |
+
model_name_suggestion = f"my_model"
|
|
|
262 |
|
263 |
context = {
|
264 |
+
"personalized_models": utils.get_user_model_names(user),
|
265 |
"model_name_suggestion": model_name_suggestion,
|
266 |
"clusters_for_tuning": clusters_for_tuning_options,
|
267 |
}
|
|
|
269 |
|
270 |
########################################
|
271 |
# ROUTE: /GET_COMMENTS_TO_LABEL
|
272 |
+
if DEBUG:
|
273 |
+
BIN_DISTRIB = [1, 2, 4, 2, 1] # 10 comments
|
274 |
+
else:
|
275 |
+
BIN_DISTRIB = [2, 4, 8, 4, 2] # 20 comments
|
276 |
SCORE_BINS = [(0.0, 0.5), (0.5, 1.5), (1.5, 2.5), (2.5, 3.5), (3.5, 4.01)]
|
277 |
@app.route("/get_comments_to_label")
|
278 |
def get_comments_to_label():
|
279 |
n = int(request.args.get("n"))
|
280 |
# Fetch examples to label
|
281 |
to_label_ids = utils.create_example_sets(
|
|
|
282 |
n_label_per_bin=BIN_DISTRIB,
|
283 |
score_bins=SCORE_BINS,
|
284 |
keyword=None
|
|
|
295 |
|
296 |
########################################
|
297 |
# ROUTE: /GET_COMMENTS_TO_LABEL_TOPIC
|
|
|
298 |
@app.route("/get_comments_to_label_topic")
|
299 |
def get_comments_to_label_topic():
|
300 |
# Fetch examples to label
|
301 |
topic = request.args.get("topic")
|
302 |
to_label_ids = utils.create_example_sets(
|
|
|
|
|
303 |
n_label_per_bin=BIN_DISTRIB,
|
304 |
score_bins=SCORE_BINS,
|
305 |
keyword=None,
|
|
|
316 |
########################################
|
317 |
# ROUTE: /GET_PERSONALIZED_MODEL
|
318 |
@app.route("/get_personalized_model")
|
319 |
+
def get_personalized_model(debug=DEBUG):
|
320 |
model_name = request.args.get("model_name")
|
321 |
ratings_json = request.args.get("ratings")
|
322 |
mode = request.args.get("mode")
|
323 |
user = request.args.get("user")
|
324 |
ratings = json.loads(ratings_json)
|
325 |
+
if debug:
|
326 |
+
print(ratings)
|
327 |
+
start = time.time()
|
328 |
|
329 |
+
utils.setup_user_model_dirs(user, model_name)
|
|
|
|
|
|
|
|
|
330 |
|
331 |
# Handle existing or new model cases
|
332 |
if mode == "view":
|
333 |
# Fetch prior model performance
|
334 |
+
mae, mse, rmse, avg_diff, ratings_prev = utils.fetch_existing_data(user, model_name)
|
|
|
|
|
|
|
335 |
|
336 |
elif mode == "train":
|
337 |
# Train model and cache predictions using new labels
|
338 |
print("get_personalized_model train")
|
339 |
+
mae, mse, rmse, avg_diff, ratings_prev = utils.train_updated_model(model_name, ratings, user)
|
340 |
+
|
341 |
+
if debug:
|
342 |
+
duration = time.time() - start
|
343 |
+
print("Time to train/cache:", duration)
|
344 |
|
345 |
+
perf_plot, mae_status = utils.plot_train_perf_results(user, model_name, mae)
|
346 |
perf_plot_json = perf_plot.to_json()
|
347 |
|
348 |
def round_metric(x):
|
|
|
355 |
"mse": round_metric(mse),
|
356 |
"rmse": round_metric(rmse),
|
357 |
"avg_diff": round_metric(avg_diff),
|
|
|
358 |
"ratings_prev": ratings_prev,
|
359 |
"perf_plot_json": json.loads(perf_plot_json),
|
360 |
}
|
|
|
364 |
########################################
|
365 |
# ROUTE: /GET_PERSONALIZED_MODEL_TOPIC
|
366 |
@app.route("/get_personalized_model_topic")
|
367 |
+
def get_personalized_model_topic(debug=DEBUG):
|
368 |
model_name = request.args.get("model_name")
|
369 |
ratings_json = request.args.get("ratings")
|
370 |
user = request.args.get("user")
|
371 |
ratings = json.loads(ratings_json)
|
372 |
topic = request.args.get("topic")
|
373 |
+
if debug:
|
374 |
+
print(ratings)
|
375 |
start = time.time()
|
376 |
|
377 |
# Modify model name
|
378 |
model_name = f"{model_name}_{topic}"
|
379 |
+
utils.setup_user_model_dirs(user, model_name)
|
|
|
|
|
|
|
|
|
|
|
380 |
|
381 |
# Handle existing or new model cases
|
382 |
# Train model and cache predictions using new labels
|
383 |
+
if debug:
|
384 |
+
print("get_personalized_model_topic train")
|
385 |
+
mae, mse, rmse, avg_diff, ratings_prev = utils.train_updated_model(model_name, ratings, user, topic=topic)
|
386 |
|
387 |
+
if debug:
|
388 |
+
duration = time.time() - start
|
389 |
+
print("Time to train/cache:", duration)
|
|
|
|
|
390 |
|
391 |
results = {
|
392 |
"success": "success",
|
|
|
407 |
if topic_vis_method == "null":
|
408 |
topic_vis_method = "fp_fn"
|
409 |
|
410 |
+
# Load reports for current user from stored file
|
411 |
+
reports_file = utils.get_reports_file(cur_user, model)
|
412 |
+
if not os.path.isfile(reports_file):
|
|
|
|
|
413 |
if scaffold_method == "fixed":
|
414 |
reports = get_fixed_scaffold()
|
415 |
elif (scaffold_method == "personal" or scaffold_method == "personal_group" or scaffold_method == "personal_test"):
|
416 |
+
reports = get_personal_scaffold(cur_user, model, topic_vis_method)
|
|
|
|
|
417 |
elif scaffold_method == "prompts":
|
418 |
reports = get_prompts_scaffold()
|
419 |
elif scaffold_method == "tutorial":
|
|
|
431 |
]
|
432 |
else:
|
433 |
# Load from pickle file
|
434 |
+
with open(reports_file, "rb") as f:
|
435 |
+
reports = json.load(f)
|
436 |
|
437 |
results = {
|
438 |
"reports": reports,
|
|
|
498 |
},
|
499 |
]
|
500 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
501 |
def get_topic_errors(df, topic_vis_method, threshold=2):
|
502 |
+
topics = df["topic"].unique().tolist()
|
503 |
topic_errors = {}
|
504 |
for topic in topics:
|
505 |
+
t_df = df[df["topic"] == topic]
|
506 |
+
y_true = t_df["pred"].to_numpy() # Predicted user rating (treated as ground truth)
|
507 |
+
y_pred = t_df["rating_sys"].to_numpy() # System rating (which we're auditing)
|
508 |
if topic_vis_method == "mae":
|
509 |
t_err = mean_absolute_error(y_true, y_pred)
|
510 |
elif topic_vis_method == "mse":
|
|
|
512 |
elif topic_vis_method == "avg_diff":
|
513 |
t_err = np.mean(y_true - y_pred)
|
514 |
elif topic_vis_method == "fp_proportion":
|
515 |
+
y_true = [0 if rating < threshold else 1 for rating in y_true]
|
516 |
+
y_pred = [0 if rating < threshold else 1 for rating in y_pred]
|
517 |
try:
|
518 |
tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
|
519 |
except:
|
|
|
521 |
total = float(len(y_true))
|
522 |
t_err = fp / total
|
523 |
elif topic_vis_method == "fn_proportion":
|
524 |
+
y_true = [0 if rating < threshold else 1 for rating in y_true]
|
525 |
+
y_pred = [0 if rating < threshold else 1 for rating in y_pred]
|
526 |
try:
|
527 |
tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
|
528 |
except:
|
|
|
533 |
|
534 |
return topic_errors
|
535 |
|
536 |
+
def get_personal_scaffold(cur_user, model, topic_vis_method, n_topics=200, n=5, debug=DEBUG):
|
537 |
threshold = utils.get_toxic_threshold()
|
538 |
|
539 |
# Get topics with greatest amount of error
|
540 |
+
preds_file = utils.get_preds_file(cur_user, model)
|
541 |
+
with open(preds_file, "rb") as f:
|
542 |
preds_df = pickle.load(f)
|
543 |
+
preds_df_mod = preds_df[preds_df["user_id"] == cur_user].sort_values(by=["item_id"]).reset_index()
|
544 |
+
preds_df_mod = preds_df_mod[preds_df_mod["topic_id"] < n_topics]
|
|
|
545 |
|
546 |
if topic_vis_method == "median":
|
547 |
+
df = preds_df_mod.groupby(["topic", "user_id"]).median().reset_index()
|
548 |
elif topic_vis_method == "mean":
|
549 |
+
df = preds_df_mod.groupby(["topic", "user_id"]).mean().reset_index()
|
550 |
elif topic_vis_method == "fp_fn":
|
551 |
for error_type in ["fn_proportion", "fp_proportion"]:
|
552 |
topic_errors = get_topic_errors(preds_df_mod, error_type)
|
553 |
+
preds_df_mod[error_type] = [topic_errors[topic] for topic in preds_df_mod["topic"].tolist()]
|
554 |
+
df = preds_df_mod.groupby(["topic", "user_id"]).mean().reset_index()
|
555 |
else:
|
556 |
# Get error for each topic
|
557 |
topic_errors = get_topic_errors(preds_df_mod, topic_vis_method)
|
558 |
+
preds_df_mod[topic_vis_method] = [topic_errors[topic] for topic in preds_df_mod["topic"].tolist()]
|
559 |
+
df = preds_df_mod.groupby(["topic", "user_id"]).mean().reset_index()
|
560 |
|
561 |
# Get system error
|
562 |
+
junk_topics = ["53_maiareficco_kallystas_dyisisitmanila_tractorsazi", "-1_dude_bullshit_fight_ain"]
|
563 |
+
df = df[~df["topic"].isin(junk_topics)] # Exclude known "junk topics"
|
564 |
|
565 |
if topic_vis_method == "median" or topic_vis_method == "mean":
|
566 |
+
df["error_magnitude"] = [utils.get_error_magnitude(sys, user, threshold) for sys, user in zip(df["rating_sys"].tolist(), df["pred"].tolist())]
|
567 |
+
df["error_type"] = [utils.get_error_type_radio(sys, user, threshold) for sys, user in zip(df["rating_sys"].tolist(), df["pred"].tolist())]
|
568 |
|
569 |
df_under = df[df["error_type"] == "System is under-sensitive"]
|
570 |
df_under = df_under.sort_values(by=["error_magnitude"], ascending=False).head(n) # surface largest errors first
|
571 |
+
report_under = [get_empty_report(row["topic"], row["error_type"]) for _, row in df_under.iterrows()]
|
572 |
|
573 |
df_over = df[df["error_type"] == "System is over-sensitive"]
|
574 |
df_over = df_over.sort_values(by=["error_magnitude"], ascending=False).head(n) # surface largest errors first
|
575 |
+
report_over = [get_empty_report(row["topic"], row["error_type"]) for _, row in df_over.iterrows()]
|
576 |
|
577 |
# Set up reports
|
|
|
578 |
reports = (report_under + report_over)
|
579 |
random.shuffle(reports)
|
580 |
elif topic_vis_method == "fp_fn":
|
581 |
df_under = df.sort_values(by=["fn_proportion"], ascending=False).head(n)
|
582 |
df_under = df_under[df_under["fn_proportion"] > 0]
|
583 |
+
if debug:
|
584 |
+
print(df_under[["topic", "fn_proportion"]])
|
585 |
+
report_under = [get_empty_report(row["topic"], "System is under-sensitive") for _, row in df_under.iterrows()]
|
586 |
|
587 |
df_over = df.sort_values(by=["fp_proportion"], ascending=False).head(n)
|
588 |
df_over = df_over[df_over["fp_proportion"] > 0]
|
589 |
+
if debug:
|
590 |
+
print(df_over[["topic", "fp_proportion"]])
|
591 |
+
report_over = [get_empty_report(row["topic"], "System is over-sensitive") for _, row in df_over.iterrows()]
|
592 |
|
593 |
reports = (report_under + report_over)
|
594 |
random.shuffle(reports)
|
595 |
else:
|
596 |
df = df.sort_values(by=[topic_vis_method], ascending=False).head(n * 2)
|
597 |
+
df["error_type"] = [utils.get_error_type_radio(sys, user, threshold) for sys, user in zip(df["rating_sys"].tolist(), df["pred"].tolist())]
|
598 |
+
reports = [get_empty_report(row["topic"], row["error_type"]) for _, row in df.iterrows()]
|
599 |
|
600 |
return reports
|
601 |
|
|
|
638 |
},
|
639 |
]
|
640 |
|
641 |
+
# Filter to eligible reports: those that have been marked complete and include at least one piece of evidence.
|
642 |
+
def get_eligible_reports(reports):
|
643 |
+
eligible_reports = []
|
644 |
+
for r in reports:
|
645 |
+
if (r["complete_status"] == True) and (len(r["evidence"]) > 0):
|
646 |
+
eligible_reports.append(r)
|
647 |
+
return eligible_reports
|
648 |
+
|
649 |
+
# Submit all reports to AVID
|
650 |
+
# Logs the responses
|
651 |
+
def submit_reports_to_AVID(reports, cur_user, email, sep_selection, debug=DEBUG):
|
652 |
+
# Set up the connection to AVID
|
653 |
+
root = os.environ.get('AVID_API_URL')
|
654 |
+
api_key = os.environ.get('AVID_API_KEY')
|
655 |
+
key = {"Authorization": api_key}
|
656 |
+
|
657 |
+
reports = get_eligible_reports(reports)
|
658 |
+
if debug:
|
659 |
+
print("Num eligible reports:", len(reports))
|
660 |
+
|
661 |
+
for r in reports:
|
662 |
+
new_report = utils.convert_indie_label_json_to_avid_json(r, cur_user, email, sep_selection)
|
663 |
+
url = root + "submit"
|
664 |
+
response = requests.post(url, json=json.loads(new_report), headers=key) # The loads ensures type compliance
|
665 |
+
uuid = response.json()
|
666 |
+
if debug:
|
667 |
+
print("Report", new_report)
|
668 |
+
print("AVID API response:", response, uuid)
|
669 |
+
|
670 |
########################################
|
671 |
# ROUTE: /SAVE_REPORTS
|
672 |
@app.route("/save_reports")
|
673 |
+
def save_reports(debug=DEBUG):
|
674 |
cur_user = request.args.get("cur_user")
|
675 |
reports_json = request.args.get("reports")
|
676 |
reports = json.loads(reports_json)
|
677 |
+
model = request.args.get("model")
|
678 |
|
679 |
+
# Save reports for current user to file
|
680 |
+
reports_file = utils.get_reports_file(cur_user, model)
|
681 |
+
with open(reports_file, "w", encoding ='utf8') as f:
|
682 |
+
json.dump(reports, f)
|
|
|
683 |
|
684 |
results = {
|
685 |
"status": "success",
|
686 |
}
|
687 |
+
if debug:
|
688 |
+
print(results)
|
689 |
return json.dumps(results)
|
690 |
|
691 |
########################################
|
692 |
+
# ROUTE: /SUBMIT_AVID_REPORT
|
693 |
+
@app.route("/submit_avid_report")
|
694 |
+
def submit_avid_report():
|
695 |
+
cur_user = request.args.get("cur_user")
|
696 |
+
email = request.args.get("email")
|
697 |
+
sep_selection = request.args.get("sep_selection")
|
698 |
+
reports_json = request.args.get("reports")
|
|
|
699 |
|
700 |
+
reports = json.loads(reports_json)
|
|
|
701 |
|
702 |
+
# Submit reports to AVID
|
703 |
+
submit_reports_to_AVID(reports, cur_user, email, sep_selection)
|
704 |
|
705 |
results = {
|
706 |
+
"status": "success",
|
707 |
}
|
708 |
return json.dumps(results)
|
709 |
|
710 |
########################################
|
711 |
+
# ROUTE: /GET_EXPLORE_EXAMPLES
|
712 |
+
@app.route("/get_explore_examples")
|
713 |
+
def get_explore_examples():
|
714 |
+
threshold = utils.get_toxic_threshold()
|
715 |
+
n_examples = int(request.args.get("n_examples"))
|
|
|
|
|
|
|
|
|
|
|
|
|
716 |
|
717 |
+
# Get sample of examples
|
718 |
+
df = utils.get_explore_df(n_examples, threshold)
|
719 |
+
ex_json = df.to_json(orient="records")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
720 |
|
721 |
results = {
|
722 |
+
"examples": ex_json,
|
723 |
}
|
724 |
return json.dumps(results)
|
725 |
|