carolanderson commited on
Commit
dd9e64d
2 Parent(s): edd4636 1882b75

merge changes in user sessions and AVID reporting

Browse files
.gitignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ __pycache__/
2
+ .DS_Store
3
+ data/
4
+ test_nbs/
5
+ data_zips/
audit_utils.py CHANGED
@@ -23,6 +23,7 @@ import time
23
  from sentence_transformers import SentenceTransformer, util
24
  import torch
25
  from bertopic import BERTopic
 
26
 
27
  ########################################
28
  # PRE-LOADING
@@ -37,61 +38,39 @@ alt.renderers.enable('altair_saver', fmts=['vega-lite', 'png'])
37
 
38
  # Data-loading
39
  module_dir = "./"
40
- perf_dir = f"data/perf/"
41
-
42
- # # TEMP reset
43
- # with open(os.path.join(module_dir, "./data/all_model_names.pkl"), "wb") as f:
44
- # all_model_names = []
45
- # pickle.dump(all_model_names, f)
46
- # with open(f"./data/users_to_models.pkl", "wb") as f:
47
- # users_to_models = {}
48
- # pickle.dump(users_to_models, f)
49
-
50
-
51
- with open(os.path.join(module_dir, "data/ids_to_comments.pkl"), "rb") as f:
52
  ids_to_comments = pickle.load(f)
53
- with open(os.path.join(module_dir, "data/comments_to_ids.pkl"), "rb") as f:
54
  comments_to_ids = pickle.load(f)
55
-
56
- all_model_names = sorted([name for name in os.listdir(os.path.join(perf_dir)) if os.path.isdir(os.path.join(perf_dir, name))])
57
- comments_grouped_full_topic_cat = pd.read_pickle("data/comments_grouped_full_topic_cat2_persp.pkl")
58
- sys_eval_df = pd.read_pickle(os.path.join(module_dir, "data/split_data/sys_eval_df.pkl"))
59
- train_df = pd.read_pickle(os.path.join(module_dir, "data/split_data/train_df.pkl"))
60
  train_df_ids = train_df["item_id"].unique().tolist()
61
- model_eval_df = pd.read_pickle(os.path.join(module_dir, "data/split_data/model_eval_df.pkl"))
62
- ratings_df_full = pd.read_pickle(os.path.join(module_dir, "data/ratings_df_full.pkl"))
 
63
 
64
- worker_info_df = pd.read_pickle("./data/worker_info_df.pkl")
65
-
66
- with open(f"./data/users_to_models.pkl", "rb") as f:
67
- users_to_models = pickle.load(f)
68
-
69
- with open("data/perf_1000_topics.pkl", "rb") as f:
70
- perf_1000_topics = pickle.load(f)
71
- with open("data/perf_1000_tox_cat.pkl", "rb") as f:
72
- perf_1000_tox_cat = pickle.load(f)
73
- with open("data/perf_1000_tox_severity.pkl", "rb") as f:
74
- perf_1000_tox_severity = pickle.load(f)
75
- with open("data/user_perf_metrics.pkl", "rb") as f:
76
- user_perf_metrics = pickle.load(f)
77
-
78
- topic_ids = comments_grouped_full_topic_cat.topic_id
79
- topics = comments_grouped_full_topic_cat.topic
80
  topic_ids_to_topics = {topic_ids[i]: topics[i] for i in range(len(topic_ids))}
81
  topics_to_topic_ids = {topics[i]: topic_ids[i] for i in range(len(topic_ids))}
82
- unique_topics_ids = sorted(comments_grouped_full_topic_cat.topic_id.unique())
83
  unique_topics = [topic_ids_to_topics[topic_id] for topic_id in range(len(topic_ids_to_topics) - 1)]
84
 
85
  def get_toxic_threshold():
86
  return TOXIC_THRESHOLD
87
 
88
- def get_all_model_names(user=None):
89
- if (user is None) or (user not in users_to_models):
90
- all_model_names = sorted([name for name in os.listdir(os.path.join(perf_dir)) if os.path.isdir(os.path.join(perf_dir, name))])
91
- return all_model_names
 
 
 
92
  else:
93
- # Fetch the user's models
94
- user_models = users_to_models[user]
 
95
  user_models.sort()
96
  return user_models
97
 
@@ -99,7 +78,7 @@ def get_unique_topics():
99
  return unique_topics
100
 
101
  def get_large_clusters(min_n):
102
- counts_df = comments_grouped_full_topic_cat.groupby(by=["topic_id"]).size().reset_index(name='counts')
103
  counts_df = counts_df[counts_df["counts"] >= min_n]
104
  return [topic_ids_to_topics[t_id] for t_id in sorted(counts_df["topic_id"].tolist()[1:])]
105
 
@@ -137,32 +116,64 @@ readable_to_internal = {
137
  }
138
  internal_to_readable = {v: k for k, v in readable_to_internal.items()}
139
 
140
- # Embeddings for neighbor retrieval
141
- model_name = "paraphrase-MiniLM-L6-v2"
142
- model = SentenceTransformer(model_name)
143
- with open("./data/comments.pkl", "rb") as f:
144
- comments = pickle.load(f)
145
- embeddings = torch.load("./data/embeddings/21_10_embeddings.pt")
146
-
147
- # Perspective API recalibration
148
- def recalib_v1(s):
149
- # convert Perspective score to 0-4 toxicity score
150
- # map 0 persp to 0 (not at all toxic); 0.5 persp to 1 (slightly toxic), 1.0 persp to 4 (extremely toxic)
151
- if s < 0.5:
152
- return (s * 2.)
153
- else:
154
- return ((s - 0.5) * 6.) + 1
155
-
156
- def recalib_v2(s):
157
- # convert Perspective score to 0-4 toxicity score
158
- # just 4x the perspective score
159
- return (s * 4.)
160
-
161
- comments_grouped_full_topic_cat["rating_avg_orig"] = comments_grouped_full_topic_cat["rating"]
162
- comments_grouped_full_topic_cat["rating"] = [recalib_v2(score) for score in comments_grouped_full_topic_cat["persp_score"].tolist()]
163
 
164
- def get_comments_grouped_full_topic_cat():
165
- return comments_grouped_full_topic_cat
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
 
167
  ########################################
168
  # General utils
@@ -192,22 +203,6 @@ def my_bootstrap(vals, n_boot, alpha):
192
 
193
  ########################################
194
  # GET_AUDIT utils
195
- def other_users_perf(perf_metrics, metric, user_metric, alpha=0.95, n_boot=501):
196
- ind = get_metric_ind(metric)
197
-
198
- metric_vals = [metric_vals[ind] for metric_vals in perf_metrics.values()]
199
- metric_avg = np.median(metric_vals)
200
-
201
- # Future: use provided sample to perform bootstrap sampling
202
- ci_1 = mne.stats.bootstrap_confidence_interval(np.array(metric_vals), ci=alpha, n_bootstraps=n_boot, stat_fun="median")
203
-
204
- bs_samples, ci = my_bootstrap(metric_vals, n_boot, alpha)
205
-
206
- # Get user's percentile
207
- percentile = stats.percentileofscore(bs_samples, user_metric)
208
-
209
- return metric_avg, ci, percentile, metric_vals
210
-
211
  def plot_metric_histogram(metric, user_metric, other_metric_vals, n_bins=10):
212
  hist, bin_edges = np.histogram(other_metric_vals, bins=n_bins, density=False)
213
  data = pd.DataFrame({
@@ -239,394 +234,38 @@ def plot_metric_histogram(metric, user_metric, other_metric_vals, n_bins=10):
239
 
240
  return (bar + rule).interactive()
241
 
242
- def get_toxicity_severity_bins(perf_metric, user_df, other_dfs, bins=BINS, bin_labels=BIN_LABELS, ci=0.95, n_boot=501):
243
- # Note: not using other_dfs anymore
244
- y_user = []
245
- y_other = []
246
- used_bins = []
247
- other_ci_low = []
248
- other_ci_high = []
249
- for severity_i in range(len(bin_labels)):
250
- metric_others = [metrics[get_metric_ind(perf_metric)] for metrics in perf_1000_tox_severity[severity_i].values() if metrics[get_metric_ind(perf_metric)]]
251
- ci_low, ci_high = mne.stats.bootstrap_confidence_interval(np.array(metric_others), ci=ci, n_bootstraps=n_boot, stat_fun='median')
252
- metric_other = np.median(metric_others)
253
-
254
- cur_user_df = user_df[user_df["prediction_bin"] == severity_i]
255
- y_true_user = cur_user_df.pred.to_numpy() # user's label
256
- y_pred = cur_user_df.rating_avg.to_numpy() # system's label (avg)
257
-
258
- if len(y_true_user) > 0:
259
- used_bins.append(bin_labels[severity_i])
260
- metric_user = calc_metric_user(y_true_user, y_pred, perf_metric)
261
- y_user.append(metric_user)
262
- y_other.append(metric_other)
263
- other_ci_low.append(ci_low)
264
- other_ci_high.append(ci_high)
265
-
266
- return y_user, y_other, used_bins, other_ci_low, other_ci_high
267
-
268
- def get_topic_bins(perf_metric, user_df, other_dfs, n_topics, ci=0.95, n_boot=501):
269
- # Note: not using other_dfs anymore
270
- y_user = []
271
- y_other = []
272
- used_bins = []
273
- other_ci_low = []
274
- other_ci_high = []
275
- selected_topics = unique_topics_ids[1:(n_topics + 1)]
276
-
277
- for topic_id in selected_topics:
278
- cur_topic = topic_ids_to_topics[topic_id]
279
- metric_others = [metrics[get_metric_ind(perf_metric)] for metrics in perf_1000_topics[topic_id].values() if metrics[get_metric_ind(perf_metric)]]
280
- ci_low, ci_high = mne.stats.bootstrap_confidence_interval(np.array(metric_others), ci=ci, n_bootstraps=n_boot, stat_fun='median')
281
- metric_other = np.median(metric_others)
282
-
283
- cur_user_df = user_df[user_df["topic"] == cur_topic]
284
- y_true_user = cur_user_df.pred.to_numpy() # user's label
285
- y_pred = cur_user_df.rating_avg.to_numpy() # system's label (avg)
286
-
287
- if len(y_true_user) > 0:
288
- used_bins.append(cur_topic)
289
- metric_user = calc_metric_user(y_true_user, y_pred, perf_metric)
290
- y_user.append(metric_user)
291
- y_other.append(metric_other)
292
- other_ci_low.append(ci_low)
293
- other_ci_high.append(ci_high)
294
-
295
- return y_user, y_other, used_bins, other_ci_low, other_ci_high
296
-
297
- def calc_metric_user(y_true_user, y_pred, perf_metric):
298
- if perf_metric == "MAE":
299
- metric_user = mean_absolute_error(y_true_user, y_pred)
300
-
301
- elif perf_metric == "MSE":
302
- metric_user = mean_squared_error(y_true_user, y_pred)
303
-
304
- elif perf_metric == "RMSE":
305
- metric_user = mean_squared_error(y_true_user, y_pred, squared=False)
306
-
307
- elif perf_metric == "avg_diff":
308
- metric_user = np.mean(y_true_user - y_pred)
309
-
310
- return metric_user
311
-
312
- def get_toxicity_category_bins(perf_metric, user_df, other_dfs, threshold=0.5, ci=0.95, n_boot=501):
313
- # Note: not using other_dfs anymore; threshold from pre-calculation is 0.5
314
- cat_cols = ["is_profane_frac", "is_threat_frac", "is_identity_attack_frac", "is_insult_frac", "is_sexual_harassment_frac"]
315
- cat_labels = ["Profanity", "Threats", "Identity Attacks", "Insults", "Sexual Harassment"]
316
- y_user = []
317
- y_other = []
318
- used_bins = []
319
- other_ci_low = []
320
- other_ci_high = []
321
- for i, cur_col_name in enumerate(cat_cols):
322
- metric_others = [metrics[get_metric_ind(perf_metric)] for metrics in perf_1000_tox_cat[cur_col_name].values() if metrics[get_metric_ind(perf_metric)]]
323
- ci_low, ci_high = mne.stats.bootstrap_confidence_interval(np.array(metric_others), ci=ci, n_bootstraps=n_boot, stat_fun='median')
324
- metric_other = np.median(metric_others)
325
-
326
- # Filter to rows where a comment received an average label >= the provided threshold for the category
327
- cur_user_df = user_df[user_df[cur_col_name] >= threshold]
328
- y_true_user = cur_user_df.pred.to_numpy() # user's label
329
- y_pred = cur_user_df.rating_avg.to_numpy() # system's label (avg)
330
-
331
- if len(y_true_user) > 0:
332
- used_bins.append(cat_labels[i])
333
- metric_user = calc_metric_user(y_true_user, y_pred, perf_metric)
334
- y_user.append(metric_user)
335
- y_other.append(metric_other)
336
- other_ci_low.append(ci_low)
337
- other_ci_high.append(ci_high)
338
-
339
- return y_user, y_other, used_bins, other_ci_low, other_ci_high
340
-
341
- def plot_class_cond_results(preds_df, breakdown_axis, perf_metric, other_ids, sort_bars, n_topics, worker_id="A"):
342
- # Note: preds_df already has binned results
343
- # Prepare dfs
344
- user_df = preds_df[preds_df.user_id == worker_id].sort_values(by=["item_id"]).reset_index()
345
- other_dfs = [preds_df[preds_df.user_id == other_id].sort_values(by=["item_id"]).reset_index() for other_id in other_ids]
346
-
347
- if breakdown_axis == "toxicity_severity":
348
- y_user, y_other, used_bins, other_ci_low, other_ci_high = get_toxicity_severity_bins(perf_metric, user_df, other_dfs)
349
- elif breakdown_axis == "topic":
350
- y_user, y_other, used_bins, other_ci_low, other_ci_high = get_topic_bins(perf_metric, user_df, other_dfs, n_topics)
351
- elif breakdown_axis == "toxicity_category":
352
- y_user, y_other, used_bins, other_ci_low, other_ci_high = get_toxicity_category_bins(perf_metric, user_df, other_dfs)
353
-
354
- diffs = list(np.array(y_user) - np.array(y_other))
355
-
356
- # Generate bar chart
357
- data = pd.DataFrame({
358
- "metric_val": y_user + y_other,
359
- "Labeler": ["You" for _ in range(len(y_user))] + ["Other users" for _ in range(len(y_user))],
360
- "used_bins": used_bins + used_bins,
361
- "diffs": diffs + diffs,
362
- "lower_cis": y_user + other_ci_low,
363
- "upper_cis": y_user + other_ci_high,
364
- })
365
-
366
- color_domain = ['You', 'Other users']
367
- color_range = [YOUR_COLOR, OTHER_USERS_COLOR]
368
-
369
- base = alt.Chart()
370
- chart_title=f"{internal_to_readable[breakdown_axis]} Results"
371
- x_axis = alt.X("Labeler:O", sort=("You", "Other users"), title=None, axis=None)
372
- y_axis = alt.Y("metric_val:Q", title=internal_to_readable[perf_metric])
373
- if sort_bars:
374
- col_content = alt.Column("used_bins:O", sort=alt.EncodingSortField(field="diffs", op="mean", order='descending'))
375
- else:
376
- col_content = alt.Column("used_bins:O")
377
-
378
- if n_topics is not None and n_topics > 10:
379
- # Change to horizontal bar chart
380
- bar = base.mark_bar(lineBreak="_").encode(
381
- y=x_axis,
382
- x=y_axis,
383
- color=alt.Color("Labeler:O", scale=alt.Scale(domain=color_domain, range=color_range)),
384
- tooltip=[
385
- alt.Tooltip('Labeler:O', title='Labeler'),
386
- alt.Tooltip('metric_val:Q', title=perf_metric, format=".3f"),
387
- ]
388
- )
389
- error_bars = base.mark_errorbar().encode(
390
- y=x_axis,
391
- x = alt.X("lower_cis:Q", title=internal_to_readable[perf_metric]),
392
- x2 = alt.X2("upper_cis:Q", title=None),
393
- tooltip=[
394
- alt.Tooltip('lower_cis:Q', title='Lower CI', format=".3f"),
395
- alt.Tooltip('upper_cis:Q', title='Upper CI', format=".3f"),
396
- ]
397
- )
398
- combined = alt.layer(
399
- bar, error_bars, data=data
400
- ).facet(
401
- row=col_content
402
- ).properties(
403
- title=chart_title,
404
- ).interactive()
405
  else:
406
- bar = base.mark_bar(lineBreak="_").encode(
407
- x=x_axis,
408
- y=y_axis,
409
- color=alt.Color("Labeler:O", scale=alt.Scale(domain=color_domain, range=color_range)),
410
- tooltip=[
411
- alt.Tooltip('Labeler:O', title='Labeler'),
412
- alt.Tooltip('metric_val:Q', title=perf_metric, format=".3f"),
413
- ]
414
- )
415
- error_bars = base.mark_errorbar().encode(
416
- x=x_axis,
417
- y = alt.Y("lower_cis:Q", title=internal_to_readable[perf_metric]),
418
- y2 = alt.Y2("upper_cis:Q", title=None),
419
- tooltip=[
420
- alt.Tooltip('lower_cis:Q', title='Lower CI', format=".3f"),
421
- alt.Tooltip('upper_cis:Q', title='Upper CI', format=".3f"),
422
- ]
423
- )
424
- combined = alt.layer(
425
- bar, error_bars, data=data
426
- ).facet(
427
- column=col_content
428
- ).properties(
429
- title=chart_title,
430
- ).interactive()
431
-
432
- return combined
433
-
434
- def show_overall_perf(variant, error_type, cur_user, threshold=TOXIC_THRESHOLD, breakdown_axis=None, topic_vis_method="median"):
435
- # Your perf (calculate using model and testset)
436
- breakdown_axis = readable_to_internal[breakdown_axis]
437
-
438
- if breakdown_axis is not None:
439
- with open(os.path.join(module_dir, f"data/preds_dfs/{variant}.pkl"), "rb") as f:
440
- preds_df = pickle.load(f)
441
-
442
- # Read from file
443
- chart_dir = "./data/charts"
444
- chart_file = os.path.join(chart_dir, f"{cur_user}_{variant}.pkl")
445
- if os.path.isfile(chart_file):
446
- with open(chart_file, "r") as f:
447
- topic_overview_plot_json = json.load(f)
448
- else:
449
- preds_df_mod = preds_df.merge(comments_grouped_full_topic_cat, on="item_id", how="left", suffixes=('_', '_avg'))
450
- if topic_vis_method == "median":
451
- preds_df_mod_grp = preds_df_mod.groupby(["topic_", "user_id"]).median()
452
- elif topic_vis_method == "mean":
453
- preds_df_mod_grp = preds_df_mod.groupby(["topic_", "user_id"]).mean()
454
- topic_overview_plot_json = plot_overall_vis(preds_df=preds_df_mod_grp, n_topics=200, threshold=threshold, error_type=error_type, cur_user=cur_user, cur_model=variant)
455
 
456
  return {
457
  "topic_overview_plot_json": json.loads(topic_overview_plot_json),
458
  }
459
 
460
- ########################################
461
- # GET_CLUSTER_RESULTS utils
462
- def get_overall_perf3(preds_df, perf_metric, other_ids, worker_id="A"):
463
- # Prepare dataset to calculate performance
464
- # Note: true is user and pred is system
465
- y_true = preds_df[preds_df["user_id"] == worker_id].pred.to_numpy()
466
- y_pred_user = preds_df[preds_df["user_id"] == worker_id].rating_avg.to_numpy()
467
-
468
- y_true_others = y_pred_others = [preds_df[preds_df["user_id"] == other_id].pred.to_numpy() for other_id in other_ids]
469
- y_pred_others = [preds_df[preds_df["user_id"] == other_id].rating_avg.to_numpy() for other_id in other_ids]
470
-
471
- # Get performance for user's model and for other users
472
- if perf_metric == "MAE":
473
- user_perf = mean_absolute_error(y_true, y_pred_user)
474
- other_perfs = [mean_absolute_error(y_true_others[i], y_pred_others[i]) for i in range(len(y_true_others))]
475
- elif perf_metric == "MSE":
476
- user_perf = mean_squared_error(y_true, y_pred_user)
477
- other_perfs = [mean_squared_error(y_true_others[i], y_pred_others[i]) for i in range(len(y_true_others))]
478
- elif perf_metric == "RMSE":
479
- user_perf = mean_squared_error(y_true, y_pred_user, squared=False)
480
- other_perfs = [mean_squared_error(y_true_others[i], y_pred_others[i], squared=False) for i in range(len(y_true_others))]
481
- elif perf_metric == "avg_diff":
482
- user_perf = np.mean(y_true - y_pred_user)
483
- other_perfs = [np.mean(y_true_others[i] - y_pred_others[i]) for i in range(len(y_true_others))]
484
-
485
- other_perf = np.mean(other_perfs) # average across all other users
486
- return user_perf, other_perf
487
-
488
- def style_color_difference(row):
489
- full_opacity_diff = 3.
490
- pred_user_col = "Your predicted rating"
491
- pred_other_col = "Other users' predicted rating"
492
- pred_system_col = "Status-quo system rating"
493
- diff_user = row[pred_user_col] - row[pred_system_col]
494
- diff_other = row[pred_other_col] - row[pred_system_col]
495
- red = "234, 133, 125"
496
- green = "142, 205, 162"
497
- bkgd_user = green if diff_user < 0 else red # red if more toxic; green if less toxic
498
- opac_user = min(abs(diff_user / full_opacity_diff), 1.)
499
- bkgd_other = green if diff_other < 0 else red # red if more toxic; green if less toxic
500
- opac_other = min(abs(diff_other / full_opacity_diff), 1.)
501
- return ["", f"background-color: rgba({bkgd_user}, {opac_user});", f"background-color: rgba({bkgd_other}, {opac_other});", "", ""]
502
-
503
- def display_examples_cluster(preds_df, other_ids, num_examples, sort_ascending, worker_id="A"):
504
- user_df = preds_df[preds_df.user_id == worker_id].sort_values(by=["item_id"]).reset_index()
505
- others_df = preds_df[preds_df.user_id == other_ids[0]]
506
- for i in range(1, len(other_ids)):
507
- others_df.append(preds_df[preds_df.user_id == other_ids[i]])
508
- others_df.groupby(["item_id"]).mean()
509
- others_df = others_df.sort_values(by=["item_id"]).reset_index()
510
-
511
- df = pd.merge(user_df, others_df, on="item_id", how="left", suffixes=('_user', '_other'))
512
- df["Comment"] = df["comment_user"]
513
- df["Your predicted rating"] = df["pred_user"]
514
- df["Other users' predicted rating"] = df["pred_other"]
515
- df["Status-quo system rating"] = df["rating_avg_user"]
516
- df["Status-quo system std dev"] = df["rating_stddev_user"]
517
- df = df[["Comment", "Your predicted rating", "Other users' predicted rating", "Status-quo system rating", "Status-quo system std dev"]]
518
-
519
- # Add styling
520
- df = df.sort_values(by=['Status-quo system std dev'], ascending=sort_ascending)
521
- n_to_sample = np.min([num_examples, len(df)])
522
- df = df.sample(n=n_to_sample).reset_index(drop=True)
523
- return df.style.apply(style_color_difference, axis=1).render()
524
-
525
- def calc_odds_ratio(df, comparison_group, toxic_threshold=1.5, worker_id="A", debug=False, smoothing_factor=1):
526
- if comparison_group == "status_quo":
527
- other_pred_col = "rating_avg"
528
- # Get unique comments, but fetch average labeler rating
529
- num_toxic_other = len(df[(df.user_id == "A") & (df[other_pred_col] >= toxic_threshold)]) + smoothing_factor
530
- num_nontoxic_other = len(df[(df.user_id == "A") & (df[other_pred_col] < toxic_threshold)]) + smoothing_factor
531
- elif comparison_group == "other_users":
532
- other_pred_col = "pred"
533
- num_toxic_other = len(df[(df.user_id != "A") & (df[other_pred_col] >= toxic_threshold)]) + smoothing_factor
534
- num_nontoxic_other = len(df[(df.user_id != "A") & (df[other_pred_col] < toxic_threshold)]) + smoothing_factor
535
-
536
- num_toxic_user = len(df[(df.user_id == "A") & (df.pred >= toxic_threshold)]) + smoothing_factor
537
- num_nontoxic_user = len(df[(df.user_id == "A") & (df.pred < toxic_threshold)]) + smoothing_factor
538
-
539
- toxic_ratio = num_toxic_user / num_toxic_other
540
- nontoxic_ratio = num_nontoxic_user / num_nontoxic_other
541
- odds_ratio = toxic_ratio / nontoxic_ratio
542
-
543
- if debug:
544
- print(f"Odds ratio: {odds_ratio}")
545
- print(f"num_toxic_user: {num_toxic_user}, num_nontoxic_user: {num_nontoxic_user}")
546
- print(f"num_toxic_other: {num_toxic_other}, num_nontoxic_other: {num_nontoxic_other}")
547
-
548
- contingency_table = [[num_toxic_user, num_nontoxic_user], [num_toxic_other, num_nontoxic_other]]
549
- odds_ratio, p_val = stats.fisher_exact(contingency_table, alternative='two-sided')
550
- if debug:
551
- print(f"Odds ratio: {odds_ratio}, p={p_val}")
552
-
553
- return odds_ratio
554
-
555
- # Neighbor search
556
- def get_match(comment_inds, K=20, threshold=None, debug=False):
557
- match_ids = []
558
- rows = []
559
- for i in comment_inds:
560
- if debug:
561
- print(f"\nComment: {comments[i]}")
562
- query_embedding = model.encode(comments[i], convert_to_tensor=True)
563
- hits = util.semantic_search(query_embedding, embeddings, score_function=util.cos_sim, top_k=K)
564
- # print(hits[0])
565
- for hit in hits[0]:
566
- c_id = hit['corpus_id']
567
- score = np.round(hit['score'], 3)
568
- if threshold is None or score > threshold:
569
- match_ids.append(c_id)
570
- if debug:
571
- print(f"\t(ID={c_id}, Score={score}): {comments[c_id]}")
572
- rows.append([c_id, score, comments[c_id]])
573
-
574
- df = pd.DataFrame(rows, columns=["id", "score", "comment"])
575
- return match_ids
576
-
577
- def display_examples_auto_cluster(preds_df, cluster, other_ids, perf_metric, sort_ascending=True, worker_id="A", num_examples=10):
578
- # Overall performance
579
- topic_df = preds_df
580
- topic_df = topic_df[topic_df["topic"] == cluster]
581
- user_perf, other_perf = get_overall_perf3(topic_df, perf_metric, other_ids)
582
-
583
- user_direction = "LOWER" if user_perf < 0 else "HIGHER"
584
- other_direction = "LOWER" if other_perf < 0 else "HIGHER"
585
- print(f"Your ratings are on average {np.round(abs(user_perf), 3)} {user_direction} than the existing system for this cluster")
586
- print(f"Others' ratings (based on {len(other_ids)} users) are on average {np.round(abs(other_perf), 3)} {other_direction} than the existing system for this cluster")
587
-
588
- # Display example comments
589
- df = display_examples_cluster(preds_df, other_ids, num_examples, sort_ascending)
590
- return df
591
-
592
-
593
- # function to get results for a new provided cluster
594
- def display_examples_manual_cluster(preds_df, cluster_comments, other_ids, perf_metric, sort_ascending=True, worker_id="A"):
595
- # Overall performance
596
- cluster_df = preds_df[preds_df["comment"].isin(cluster_comments)]
597
- user_perf, other_perf = get_overall_perf3(cluster_df, perf_metric, other_ids)
598
-
599
- user_direction = "LOWER" if user_perf < 0 else "HIGHER"
600
- other_direction = "LOWER" if other_perf < 0 else "HIGHER"
601
- print(f"Your ratings are on average {np.round(abs(user_perf), 3)} {user_direction} than the existing system for this cluster")
602
- print(f"Others' ratings (based on {len(other_ids)} users) are on average {np.round(abs(other_perf), 3)} {other_direction} than the existing system for this cluster")
603
-
604
- user_df = preds_df[preds_df.user_id == worker_id].sort_values(by=["item_id"]).reset_index()
605
- others_df = preds_df[preds_df.user_id == other_ids[0]]
606
- for i in range(1, len(other_ids)):
607
- others_df.append(preds_df[preds_df.user_id == other_ids[i]])
608
- others_df.groupby(["item_id"]).mean()
609
- others_df = others_df.sort_values(by=["item_id"]).reset_index()
610
-
611
- # Get cluster_comments
612
- user_df = user_df[user_df["comment"].isin(cluster_comments)]
613
- others_df = others_df[others_df["comment"].isin(cluster_comments)]
614
-
615
- df = pd.merge(user_df, others_df, on="item_id", how="left", suffixes=('_user', '_other'))
616
- df["pred_system"] = df["rating_avg_user"]
617
- df["pred_system_stddev"] = df["rating_stddev_user"]
618
- df = df[["item_id", "comment_user", "pred_user", "pred_other", "pred_system", "pred_system_stddev"]]
619
-
620
- # Add styling
621
- df = df.sort_values(by=['pred_system_stddev'], ascending=sort_ascending)
622
- df = df.style.apply(style_color_difference, axis=1).render()
623
- return df
624
-
625
  ########################################
626
  # GET_LABELING utils
627
- def create_example_sets(comments_df, n_label_per_bin, score_bins, keyword=None, topic=None):
628
  # Restrict to the keyword, if provided
629
- df = comments_df.copy()
630
  if keyword != None:
631
  df = df[df["comment"].str.contains(keyword)]
632
 
@@ -651,8 +290,8 @@ def create_example_sets(comments_df, n_label_per_bin, score_bins, keyword=None,
651
 
652
  return ex_to_label
653
 
654
- def get_grp_model_labels(comments_df, n_label_per_bin, score_bins, grp_ids):
655
- df = comments_df.copy()
656
 
657
  train_df_grp = train_df[train_df["user_id"].isin(grp_ids)]
658
  train_df_grp_avg = train_df_grp.groupby(by=["item_id"]).median().reset_index()
@@ -676,106 +315,207 @@ def get_grp_model_labels(comments_df, n_label_per_bin, score_bins, grp_ids):
676
 
677
  return ratings_grp
678
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
679
  ########################################
680
  # GET_PERSONALIZED_MODEL utils
681
- def fetch_existing_data(model_name, last_label_i):
682
  # Check if we have cached model performance
683
- perf_dir = f"./data/perf/{model_name}"
684
- label_dir = f"./data/labels/{model_name}"
685
- if os.path.isdir(os.path.join(module_dir, perf_dir)):
686
  # Fetch cached results
687
- last_i = len([name for name in os.listdir(os.path.join(module_dir, perf_dir)) if os.path.isfile(os.path.join(module_dir, perf_dir, name))])
688
- with open(os.path.join(module_dir, perf_dir, f"{last_i}.pkl"), "rb") as f:
689
  mae, mse, rmse, avg_diff = pickle.load(f)
690
  else:
691
- # Fetch results from trained model
692
- with open(os.path.join(module_dir, f"./data/trained_models/{model_name}.pkl"), "rb") as f:
693
- cur_model = pickle.load(f)
694
- mae, mse, rmse, avg_diff = users_perf(cur_model)
695
- # Cache results
696
- os.mkdir(os.path.join(module_dir, perf_dir))
697
- with open(os.path.join(module_dir, perf_dir, "1.pkl"), "wb") as f:
698
- pickle.dump((mae, mse, rmse, avg_diff), f)
699
 
700
  # Fetch previous user-provided labels
701
  ratings_prev = None
702
- if last_label_i > 0:
703
- with open(os.path.join(module_dir, label_dir, f"{last_i}.pkl"), "rb") as f:
 
 
704
  ratings_prev = pickle.load(f)
705
  return mae, mse, rmse, avg_diff, ratings_prev
706
 
707
- def train_updated_model(model_name, last_label_i, ratings, user, top_n=20, topic=None):
 
 
 
 
 
 
 
 
708
  # Check if there is previously-labeled data; if so, combine it with this data
709
- perf_dir = f"./data/perf/{model_name}"
710
- label_dir = f"./data/labels/{model_name}"
711
- labeled_df = format_labeled_data(ratings) # Treat ratings as full batch of all ratings
712
  ratings_prev = None
713
 
714
  # Filter out rows with "unsure" (-1)
715
  labeled_df = labeled_df[labeled_df["rating"] != -1]
716
 
717
  # Filter to top N for user study
718
- if topic is None:
719
- # labeled_df = labeled_df.head(top_n)
720
- labeled_df = labeled_df.tail(top_n)
721
  else:
722
  # For topic tuning, need to fetch old labels
723
- if (last_label_i > 0):
 
724
  # Concatenate previous set of labels with this new batch of labels
725
- with open(os.path.join(module_dir, label_dir, f"{last_label_i}.pkl"), "rb") as f:
 
726
  ratings_prev = pickle.load(f)
727
- labeled_df_prev = format_labeled_data(ratings_prev)
728
  labeled_df_prev = labeled_df_prev[labeled_df_prev["rating"] != -1]
729
  ratings.update(ratings_prev) # append old ratings to ratings
730
  labeled_df = pd.concat([labeled_df_prev, labeled_df])
731
-
732
- print("len ratings for training:", len(labeled_df))
733
-
734
- cur_model, perf, _, _ = train_user_model(ratings_df=labeled_df)
735
-
736
- user_perf_metrics[model_name] = users_perf(cur_model)
737
-
738
- mae, mse, rmse, avg_diff = user_perf_metrics[model_name]
739
-
740
- cur_preds_df = get_preds_df(cur_model, ["A"], sys_eval_df=ratings_df_full, topic=topic, model_name=model_name) # Just get results for user
741
-
742
  # Save this batch of labels
743
- with open(os.path.join(module_dir, label_dir, f"{last_label_i + 1}.pkl"), "wb") as f:
 
744
  pickle.dump(ratings, f)
745
 
746
- # Save model results
747
- with open(os.path.join(module_dir, f"./data/preds_dfs/{model_name}.pkl"), "wb") as f:
748
- pickle.dump(cur_preds_df, f)
749
-
750
- if model_name not in all_model_names:
751
- all_model_names.append(model_name)
752
- with open(os.path.join(module_dir, "./data/all_model_names.pkl"), "wb") as f:
753
- pickle.dump(all_model_names, f)
754
-
755
- # Handle user
756
- if user not in users_to_models:
757
- users_to_models[user] = [] # New user
758
- if model_name not in users_to_models[user]:
759
- users_to_models[user].append(model_name) # New model
760
- with open(f"./data/users_to_models.pkl", "wb") as f:
761
- pickle.dump(users_to_models, f)
762
-
763
- with open(os.path.join(module_dir, "./data/user_perf_metrics.pkl"), "wb") as f:
764
- pickle.dump(user_perf_metrics, f)
765
- with open(os.path.join(module_dir, f"./data/trained_models/{model_name}.pkl"), "wb") as f:
766
- pickle.dump(cur_model, f)
767
 
768
- # Cache performance results
769
- if not os.path.isdir(os.path.join(module_dir, perf_dir)):
770
- os.mkdir(os.path.join(module_dir, perf_dir))
771
- last_perf_i = len([name for name in os.listdir(os.path.join(module_dir, perf_dir)) if os.path.isfile(os.path.join(module_dir, perf_dir, name))])
772
- with open(os.path.join(module_dir, perf_dir, f"{last_perf_i + 1}.pkl"), "wb") as f:
773
  pickle.dump((mae, mse, rmse, avg_diff), f)
774
 
 
 
 
 
 
 
 
 
 
 
775
  ratings_prev = ratings
776
  return mae, mse, rmse, avg_diff, ratings_prev
777
 
778
- def format_labeled_data(ratings, worker_id="A", debug=False):
779
  all_rows = []
780
  for comment, rating in ratings.items():
781
  comment_id = comments_to_ids[comment]
@@ -785,7 +525,7 @@ def format_labeled_data(ratings, worker_id="A", debug=False):
785
  df = pd.DataFrame(all_rows, columns=["user_id", "item_id", "rating"])
786
  return df
787
 
788
- def users_perf(model, sys_eval_df=sys_eval_df, avg_ratings_df=comments_grouped_full_topic_cat, worker_id="A"):
789
  # Load the full empty dataset
790
  sys_eval_comment_ids = sys_eval_df.item_id.unique().tolist()
791
  empty_ratings_rows = [[worker_id, c_id, 0] for c_id in sys_eval_comment_ids]
@@ -801,17 +541,17 @@ def users_perf(model, sys_eval_df=sys_eval_df, avg_ratings_df=comments_grouped_f
801
  user_item_preds = get_predictions_by_user_and_item(predictions)
802
  df["pred"] = df.apply(lambda row: user_item_preds[(row.user_id, row.item_id)] if (row.user_id, row.item_id) in user_item_preds else np.nan, axis=1)
803
 
804
- df = df.merge(avg_ratings_df, on="item_id", how="left", suffixes=('_', '_avg'))
805
  df.dropna(subset = ["pred"], inplace=True)
806
- df["rating_"] = df.rating_.astype("int32")
807
 
808
- perf_metrics = get_overall_perf(df, "A") # mae, mse, rmse, avg_diff
809
  return perf_metrics
810
 
811
  def get_overall_perf(preds_df, user_id):
812
  # Prepare dataset to calculate performance
813
- y_pred = preds_df[preds_df["user_id"] == user_id].rating_avg.to_numpy() # Assume system is just average of true labels
814
- y_true = preds_df[preds_df["user_id"] == user_id].pred.to_numpy()
815
 
816
  # Get performance for user's model
817
  mae = mean_absolute_error(y_true, y_pred)
@@ -827,7 +567,11 @@ def get_predictions_by_user_and_item(predictions):
827
  user_item_preds[(uid, iid)] = est
828
  return user_item_preds
829
 
830
- def get_preds_df(model, user_ids, orig_df=ratings_df_full, avg_ratings_df=comments_grouped_full_topic_cat, sys_eval_df=sys_eval_df, bins=BINS, topic=None, model_name=None):
 
 
 
 
831
  # Prep dataframe for all predictions we'd like to request
832
  start = time.time()
833
  sys_eval_comment_ids = sys_eval_df.item_id.unique().tolist()
@@ -836,7 +580,8 @@ def get_preds_df(model, user_ids, orig_df=ratings_df_full, avg_ratings_df=commen
836
  for user_id in user_ids:
837
  empty_ratings_rows.extend([[user_id, c_id, 0] for c_id in sys_eval_comment_ids])
838
  empty_ratings_df = pd.DataFrame(empty_ratings_rows, columns=["user_id", "item_id", "rating"])
839
- print("setup", time.time() - start)
 
840
 
841
  # Evaluate model to get predictions
842
  start = time.time()
@@ -844,16 +589,17 @@ def get_preds_df(model, user_ids, orig_df=ratings_df_full, avg_ratings_df=commen
844
  eval_set_data = Dataset.load_from_df(empty_ratings_df, reader)
845
  _, testset = train_test_split(eval_set_data, test_size=1.)
846
  predictions = model.test(testset)
847
- print("train_test_split", time.time() - start)
 
848
 
849
  # Update dataframe with predictions
850
  start = time.time()
851
  df = empty_ratings_df.copy() # user_id, item_id, rating
852
  user_item_preds = get_predictions_by_user_and_item(predictions)
853
  df["pred"] = df.apply(lambda row: user_item_preds[(row.user_id, row.item_id)] if (row.user_id, row.item_id) in user_item_preds else np.nan, axis=1)
854
- df = df.merge(avg_ratings_df, on="item_id", how="left", suffixes=('_', '_avg'))
855
  df.dropna(subset = ["pred"], inplace=True)
856
- df["rating_"] = df.rating_.astype("int32")
857
 
858
  # Get binned predictions (based on user prediction)
859
  df["prediction_bin"], out_bins = pd.cut(df["pred"], bins, labels=False, retbins=True)
@@ -861,9 +607,14 @@ def get_preds_df(model, user_ids, orig_df=ratings_df_full, avg_ratings_df=commen
861
 
862
  return df
863
 
 
 
 
 
 
864
  def train_user_model(ratings_df, train_df=train_df, model_eval_df=model_eval_df, train_frac=0.75, model_type="SVD", sim_type=None, user_based=True):
865
  # Sample from shuffled labeled dataframe and add batch to train set; specified set size to model_eval set
866
- labeled = ratings_df.sample(frac=1)
867
  batch_size = math.floor(len(labeled) * train_frac)
868
  labeled_train = labeled[:batch_size]
869
  labeled_model_eval = labeled[batch_size:]
@@ -876,7 +627,11 @@ def train_user_model(ratings_df, train_df=train_df, model_eval_df=model_eval_df,
876
 
877
  return model, perf, labeled_train, labeled_model_eval
878
 
879
- def train_model(train_df, model_eval_df, model_type="SVD", sim_type=None, user_based=True):
 
 
 
 
880
  # Train model
881
  reader = Reader(rating_scale=(0, 4))
882
  train_data = Dataset.load_from_df(train_df, reader)
@@ -905,58 +660,18 @@ def train_model(train_df, model_eval_df, model_type="SVD", sim_type=None, user_b
905
  mae = accuracy.mae(predictions)
906
  mse = accuracy.mse(predictions)
907
 
908
- print(f"MAE: {mae}, MSE: {mse}, RMSE: {rmse}, FCP: {fcp}")
 
909
  perf = [mae, mse, rmse, fcp]
910
 
911
  return algo, perf
912
 
913
- def plot_train_perf_results2(model_name):
914
- # Open labels
915
- label_dir = f"./data/labels/{model_name}"
916
- n_label_files = len([name for name in os.listdir(os.path.join(module_dir, label_dir)) if os.path.isfile(os.path.join(module_dir, label_dir, name))])
917
-
918
- all_rows = []
919
- with open(os.path.join(module_dir, label_dir, f"{n_label_files}.pkl"), "rb") as f:
920
- ratings = pickle.load(f)
921
-
922
- labeled_df = format_labeled_data(ratings)
923
- labeled_df = labeled_df[labeled_df["rating"] != -1]
924
-
925
- # Iterate through batches of 5 labels
926
- n_batches = int(np.ceil(len(labeled_df) / 5.))
927
- for i in range(n_batches):
928
- start = time.time()
929
- n_to_sample = np.min([5 * (i + 1), len(labeled_df)])
930
- cur_model, _, _, _ = train_user_model(ratings_df=labeled_df.head(n_to_sample))
931
- mae, mse, rmse, avg_diff = users_perf(cur_model)
932
- all_rows.append([n_to_sample, mae, "MAE"])
933
- print(f"iter {i}: {time.time() - start}")
934
-
935
- print("all_rows", all_rows)
936
-
937
- df = pd.DataFrame(all_rows, columns=["n_to_sample", "perf", "metric"])
938
- chart = alt.Chart(df).mark_line(point=True).encode(
939
- x=alt.X("n_to_sample:Q", title="Number of Comments Labeled"),
940
- y="perf",
941
- color="metric",
942
- tooltip=[
943
- alt.Tooltip('n_to_sample:Q', title="Number of Comments Labeled"),
944
- alt.Tooltip('metric:N', title="Metric"),
945
- alt.Tooltip('perf:Q', title="Metric Value", format=".3f"),
946
- ],
947
- ).properties(
948
- title=f"Performance over number of examples: {model_name}",
949
- width=500,
950
- )
951
- return chart
952
-
953
- def plot_train_perf_results(model_name, mae):
954
- perf_dir = f"./data/perf/{model_name}"
955
- n_perf_files = len([name for name in os.listdir(os.path.join(module_dir, perf_dir)) if os.path.isfile(os.path.join(module_dir, perf_dir, name))])
956
-
957
  all_rows = []
958
- for i in range(1, n_perf_files + 1):
959
- with open(os.path.join(module_dir, perf_dir, f"{i}.pkl"), "rb") as f:
 
960
  mae, mse, rmse, avg_diff = pickle.load(f)
961
  all_rows.append([i, mae, "Your MAE"])
962
 
@@ -975,24 +690,24 @@ def plot_train_perf_results(model_name, mae):
975
  width=500,
976
  )
977
 
978
- PCT_50 = 0.591
979
- PCT_75 = 0.662
980
- PCT_90 = 0.869
981
 
982
  plot_dim_width = 500
983
  domain_min = 0.0
984
- domain_max = 1.0
985
  bkgd = alt.Chart(pd.DataFrame({
986
- "start": [PCT_90, PCT_75, domain_min],
987
- "stop": [domain_max, PCT_90, PCT_75],
988
- "bkgd": ["Needs improvement (< top 90%)", "Okay (top 90%)", "Good (top 75%)"],
989
  })).mark_rect(opacity=0.2).encode(
990
- y=alt.Y("start:Q", scale=alt.Scale(domain=[0, domain_max])),
991
- y2=alt.Y2("stop:Q"),
992
  x=alt.value(0),
993
  x2=alt.value(plot_dim_width),
994
  color=alt.Color("bkgd:O", scale=alt.Scale(
995
- domain=["Needs improvement (< top 90%)", "Okay (top 90%)", "Good (top 75%)"],
996
  range=["red", "yellow", "green"]),
997
  title="How good is your MAE?"
998
  )
@@ -1000,12 +715,12 @@ def plot_train_perf_results(model_name, mae):
1000
 
1001
  plot = (bkgd + chart).properties(width=plot_dim_width).resolve_scale(color='independent')
1002
  mae_status = None
1003
- if mae < PCT_75:
1004
- mae_status = "Your MAE is in the <b>Good</b> range, which means that it's in the top 75% of scores compared to other users. Your model looks good to go."
1005
- elif mae < PCT_90:
1006
- mae_status = "Your MAE is in the <b>Okay</b> range, which means that it's in the top 90% of scores compared to other users. Your model can be used, but you can provide additional labels to improve it."
1007
  else:
1008
- mae_status = "Your MAE is in the <b>Needs improvement</b> range, which means that it's in below the top 95% of scores compared to other users. Your model may need additional labels to improve."
1009
  return plot, mae_status
1010
 
1011
  ########################################
@@ -1104,14 +819,14 @@ def get_decision(rating, threshold):
1104
 
1105
  def get_category(row, threshold=0.3):
1106
  k_to_category = {
1107
- "is_profane_frac_": "Profanity",
1108
- "is_threat_frac_": "Threat",
1109
- "is_identity_attack_frac_": "Identity Attack",
1110
- "is_insult_frac_": "Insult",
1111
- "is_sexual_harassment_frac_": "Sexual Harassment",
1112
  }
1113
  categories = []
1114
- for k in ["is_profane_frac_", "is_threat_frac_", "is_identity_attack_frac_", "is_insult_frac_", "is_sexual_harassment_frac_"]:
1115
  if row[k] > threshold:
1116
  categories.append(k_to_category[k])
1117
 
@@ -1124,19 +839,20 @@ def get_comment_url(row):
1124
  return f"#{row['item_id']}/#comment"
1125
 
1126
  def get_topic_url(row):
1127
- return f"#{row['topic_']}/#topic"
1128
 
1129
- def plot_overall_vis(preds_df, error_type, cur_user, cur_model, n_topics=None, bins=VIS_BINS, threshold=TOXIC_THRESHOLD, bin_step=0.05):
 
1130
  df = preds_df.copy().reset_index()
1131
 
1132
  if n_topics is not None:
1133
- df = df[df["topic_id_"] < n_topics]
1134
 
1135
  df["vis_pred_bin"], out_bins = pd.cut(df["pred"], bins, labels=VIS_BINS_LABELS, retbins=True)
1136
- df = df[df["user_id"] == "A"].sort_values(by=["item_id"]).reset_index()
1137
- df["system_label"] = [("toxic" if r > threshold else "non-toxic") for r in df["rating"].tolist()]
1138
- df["threshold"] = [threshold for r in df["rating"].tolist()]
1139
- df["key"] = [get_key(sys, user, threshold) for sys, user in zip(df["rating"].tolist(), df["pred"].tolist())]
1140
  df["url"] = df.apply(lambda row: get_topic_url(row), axis=1)
1141
 
1142
  # Plot sizing
@@ -1154,12 +870,12 @@ def plot_overall_vis(preds_df, error_type, cur_user, cur_model, n_topics=None, b
1154
  # Main chart
1155
  chart = alt.Chart(df).mark_square(opacity=0.8, size=mark_size, stroke="grey", strokeWidth=0.5).transform_window(
1156
  groupby=['vis_pred_bin'],
1157
- sort=[{'field': 'rating'}],
1158
  id='row_number()',
1159
  ignorePeers=True,
1160
  ).encode(
1161
  x=alt.X('vis_pred_bin:Q', title="Our prediction of your rating", scale=alt.Scale(domain=(domain_min, domain_max))),
1162
- y=alt.Y('id:O', title="Comments (ordered by System toxicity rating)", axis=alt.Axis(values=list(range(0, max_items, 5))), sort='descending'),
1163
  color = alt.Color("key:O", scale=alt.Scale(
1164
  domain=["System agrees: Non-toxic", "System agrees: Toxic", "System differs: Error > 1.5", "System differs: Error > 1.0", "System differs: Error > 0.5", "System differs: Error <=0.5"],
1165
  range=["white", "#cbcbcb", "red", "#ff7a5c", "#ffa894", "#ffd1c7"]),
@@ -1167,9 +883,9 @@ def plot_overall_vis(preds_df, error_type, cur_user, cur_model, n_topics=None, b
1167
  ),
1168
  href="url:N",
1169
  tooltip = [
1170
- alt.Tooltip("topic_:N", title="Topic"),
1171
  alt.Tooltip("system_label:N", title="System label"),
1172
- alt.Tooltip("rating:Q", title="System rating", format=".2f"),
1173
  alt.Tooltip("pred:Q", title="Your rating", format=".2f")
1174
  ]
1175
  )
@@ -1233,31 +949,17 @@ def plot_overall_vis(preds_df, error_type, cur_user, cur_model, n_topics=None, b
1233
  )
1234
 
1235
  plot = (bkgd + annotation + chart + rule).properties(height=(plot_dim_height), width=plot_dim_width).resolve_scale(color='independent').to_json()
1236
-
1237
- # Save to file
1238
- chart_dir = "./data/charts"
1239
- chart_file = os.path.join(chart_dir, f"{cur_user}_{cur_model}.pkl")
1240
- with open(chart_file, "w") as f:
1241
- json.dump(plot, f)
1242
-
1243
  return plot
1244
 
1245
- def get_cluster_overview_plot(preds_df, error_type, threshold=TOXIC_THRESHOLD, use_model=True):
1246
- preds_df_mod = preds_df.merge(comments_grouped_full_topic_cat, on="item_id", how="left", suffixes=('_', '_avg'))
1247
-
1248
- if use_model:
1249
- return plot_overall_vis_cluster(preds_df_mod, error_type=error_type, n_comments=500, threshold=threshold)
1250
- else:
1251
- return plot_overall_vis_cluster2(preds_df_mod, error_type=error_type, n_comments=500, threshold=threshold)
1252
-
1253
- def plot_overall_vis_cluster2(preds_df, error_type, n_comments=None, bins=VIS_BINS, threshold=TOXIC_THRESHOLD, bin_step=0.05):
1254
  df = preds_df.copy().reset_index()
1255
 
1256
- df["vis_pred_bin"], out_bins = pd.cut(df["rating"], bins, labels=VIS_BINS_LABELS, retbins=True)
1257
- df = df[df["user_id"] == "A"].sort_values(by=["rating"]).reset_index()
1258
- df["system_label"] = [("toxic" if r > threshold else "non-toxic") for r in df["rating"].tolist()]
1259
- df["key"] = [get_key_no_model(sys, threshold) for sys in df["rating"].tolist()]
1260
- print("len(df)", len(df)) # always 0 for some reason (from keyword search)
1261
  df["category"] = df.apply(lambda row: get_category(row), axis=1)
1262
  df["url"] = df.apply(lambda row: get_comment_url(row), axis=1)
1263
 
@@ -1279,7 +981,7 @@ def plot_overall_vis_cluster2(preds_df, error_type, n_comments=None, bins=VIS_BI
1279
  # Main chart
1280
  chart = alt.Chart(df).mark_square(opacity=0.8, size=mark_size, stroke="grey", strokeWidth=0.25).transform_window(
1281
  groupby=['vis_pred_bin'],
1282
- sort=[{'field': 'rating'}],
1283
  id='row_number()',
1284
  ignorePeers=True
1285
  ).encode(
@@ -1293,8 +995,8 @@ def plot_overall_vis_cluster2(preds_df, error_type, n_comments=None, bins=VIS_BI
1293
  ),
1294
  href="url:N",
1295
  tooltip = [
1296
- alt.Tooltip("comment_:N", title="comment"),
1297
- alt.Tooltip("rating:Q", title="System rating", format=".2f"),
1298
  ]
1299
  )
1300
 
@@ -1345,24 +1047,22 @@ def plot_overall_vis_cluster2(preds_df, error_type, n_comments=None, bins=VIS_BI
1345
  final_plot = (bkgd + annotation + chart + rule).properties(height=(plot_dim_height), width=plot_dim_width).resolve_scale(color='independent').to_json()
1346
 
1347
  return final_plot, df
1348
-
1349
- def plot_overall_vis_cluster(preds_df, error_type, n_comments=None, bins=VIS_BINS, threshold=TOXIC_THRESHOLD, bin_step=0.05):
1350
- df = preds_df.copy().reset_index(drop=True)
1351
- # df = df[df["topic_"] == topic]
1352
 
1353
  df["vis_pred_bin"], out_bins = pd.cut(df["pred"], bins, labels=VIS_BINS_LABELS, retbins=True)
1354
- df = df[df["user_id"] == "A"].sort_values(by=["rating"]).reset_index(drop=True)
1355
- df["system_label"] = [("toxic" if r > threshold else "non-toxic") for r in df["rating"].tolist()]
1356
- df["key"] = [get_key(sys, user, threshold) for sys, user in zip(df["rating"].tolist(), df["pred"].tolist())]
1357
- print("len(df)", len(df)) # always 0 for some reason (from keyword search)
1358
- # print("columns", df.columns)
1359
  df["category"] = df.apply(lambda row: get_category(row), axis=1)
1360
  df["url"] = df.apply(lambda row: get_comment_url(row), axis=1)
1361
 
1362
  if n_comments is not None:
1363
  n_to_sample = np.min([n_comments, len(df)])
1364
  df = df.sample(n=n_to_sample)
1365
-
1366
  # Plot sizing
1367
  domain_min = 0
1368
  domain_max = 4
@@ -1377,7 +1077,7 @@ def plot_overall_vis_cluster(preds_df, error_type, n_comments=None, bins=VIS_BIN
1377
  # Main chart
1378
  chart = alt.Chart(df).mark_square(opacity=0.8, size=mark_size, stroke="grey", strokeWidth=0.25).transform_window(
1379
  groupby=['vis_pred_bin'],
1380
- sort=[{'field': 'rating'}],
1381
  id='row_number()',
1382
  ignorePeers=True
1383
  ).encode(
@@ -1390,8 +1090,8 @@ def plot_overall_vis_cluster(preds_df, error_type, n_comments=None, bins=VIS_BIN
1390
  ),
1391
  href="url:N",
1392
  tooltip = [
1393
- alt.Tooltip("comment_:N", title="comment"),
1394
- alt.Tooltip("rating:Q", title="System rating", format=".2f"),
1395
  alt.Tooltip("pred:Q", title="Your rating", format=".2f"),
1396
  alt.Tooltip("category:N", title="Potential toxicity categories")
1397
  ]
@@ -1457,30 +1157,27 @@ def plot_overall_vis_cluster(preds_df, error_type, n_comments=None, bins=VIS_BIN
1457
 
1458
  return final_plot, df
1459
 
1460
- def get_cluster_comments(df, error_type, threshold=TOXIC_THRESHOLD, worker_id="A", num_examples=50, use_model=True):
1461
  df["user_color"] = [get_user_color(user, threshold) for user in df["pred"].tolist()] # get cell colors
1462
- df["system_color"] = [get_user_color(sys, threshold) for sys in df["rating"].tolist()] # get cell colors
1463
- df["error_color"] = [get_system_color(sys, user, threshold) for sys, user in zip(df["rating"].tolist(), df["pred"].tolist())] # get cell colors
1464
- df["error_type"] = [get_error_type(sys, user, threshold) for sys, user in zip(df["rating"].tolist(), df["pred"].tolist())] # get error type in words
1465
- df["error_amt"] = [abs(sys - threshold) for sys in df["rating"].tolist()] # get raw error
1466
  df["judgment"] = ["" for _ in range(len(df))] # template for "agree" or "disagree" buttons
1467
 
1468
  if use_model:
1469
  df = df.sort_values(by=["error_amt"], ascending=False) # surface largest errors first
1470
  else:
1471
- print("get_cluster_comments; not using model")
1472
- df = df.sort_values(by=["rating"], ascending=True)
 
1473
 
1474
  df["id"] = df["item_id"]
1475
- # df["comment"] already exists
1476
- df["comment"] = df["comment_"]
1477
  df["toxicity_category"] = df["category"]
1478
  df["user_rating"] = df["pred"]
1479
  df["user_decision"] = [get_decision(rating, threshold) for rating in df["pred"].tolist()]
1480
- df["system_rating"] = df["rating"]
1481
- df["system_decision"] = [get_decision(rating, threshold) for rating in df["rating"].tolist()]
1482
- df["error_type"] = df["error_type"]
1483
- df = df.head(num_examples)
1484
  df = df.round(decimals=2)
1485
 
1486
  # Filter to specified error type
@@ -1493,7 +1190,7 @@ def get_cluster_comments(df, error_type, threshold=TOXIC_THRESHOLD, worker_id="A
1493
  elif error_type == "Both":
1494
  df = df[(df["error_type"] == "System may be under-sensitive") | (df["error_type"] == "System may be over-sensitive")]
1495
 
1496
- return df.to_json(orient="records")
1497
 
1498
  # PERSONALIZED CLUSTERS utils
1499
  def get_disagreement_comments(preds_df, mode, n=10_000, threshold=TOXIC_THRESHOLD):
@@ -1512,58 +1209,10 @@ def get_disagreement_comments(preds_df, mode, n=10_000, threshold=TOXIC_THRESHOL
1512
  df = df.sort_values(by=["diff"], ascending=asc)
1513
  df = df.head(n)
1514
 
1515
- return df["comment_"].tolist(), df
1516
-
1517
- def get_personal_clusters(model, n=3):
1518
- personal_cluster_file = f"./data/personal_cluster_dfs/{model}.pkl"
1519
- if (os.path.isfile(personal_cluster_file)):
1520
- with open(personal_cluster_file, "rb") as f:
1521
- cluster_df = pickle.load(f)
1522
- cluster_df = cluster_df.sort_values(by=["topic_id"])
1523
- topics_under = cluster_df[cluster_df["error_type"] == "System may be under-sensitive"]["topic"].unique().tolist()
1524
- topics_under = topics_under[1:(n + 1)]
1525
- topics_over = cluster_df[cluster_df["error_type"] == "System may be over-sensitive"]["topic"].unique().tolist()
1526
- topics_over = topics_over[1:(n + 1)]
1527
- return topics_under, topics_over
1528
- else:
1529
- topics_under_top = []
1530
- topics_over_top = []
1531
- preds_df_file = f"./data/preds_dfs/{model}.pkl"
1532
- if (os.path.isfile(preds_df_file)):
1533
- with open(preds_df_file, "rb") as f:
1534
- preds_df = pickle.load(f)
1535
- preds_df_mod = preds_df.merge(comments_grouped_full_topic_cat, on="item_id", how="left", suffixes=('_', '_avg')).reset_index()
1536
- preds_df_mod = preds_df_mod[preds_df_mod["user_id"] == "A"]
1537
-
1538
- comments_under, comments_under_df = get_disagreement_comments(preds_df_mod, mode="under-sensitive", n=1000)
1539
- if len(comments_under) > 0:
1540
- topics_under = BERTopic(embedding_model="paraphrase-MiniLM-L6-v2").fit(comments_under)
1541
- topics_under_top = topics_under.get_topic_info().head(n)["Name"].tolist()
1542
- print("topics_under", topics_under_top)
1543
- # Get topics per comment
1544
- topics_assigned, _ = topics_under.transform(comments_under)
1545
- comments_under_df["topic_id"] = topics_assigned
1546
- cur_topic_ids = topics_under.get_topic_info().Topic
1547
- topic_short_names = topics_under.get_topic_info().Name
1548
- topic_ids_to_names = {cur_topic_ids[i]: topic_short_names[i] for i in range(len(cur_topic_ids))}
1549
- comments_under_df["topic"] = [topic_ids_to_names[topic_id] for topic_id in comments_under_df["topic_id"].tolist()]
1550
-
1551
- comments_over, comments_over_df = get_disagreement_comments(preds_df_mod, mode="over-sensitive", n=1000)
1552
- if len(comments_over) > 0:
1553
- topics_over = BERTopic(embedding_model="paraphrase-MiniLM-L6-v2").fit(comments_over)
1554
- topics_over_top = topics_over.get_topic_info().head(n)["Name"].tolist()
1555
- print("topics_over", topics_over_top)
1556
- # Get topics per comment
1557
- topics_assigned, _ = topics_over.transform(comments_over)
1558
- comments_over_df["topic_id"] = topics_assigned
1559
- cur_topic_ids = topics_over.get_topic_info().Topic
1560
- topic_short_names = topics_over.get_topic_info().Name
1561
- topic_ids_to_names = {cur_topic_ids[i]: topic_short_names[i] for i in range(len(cur_topic_ids))}
1562
- comments_over_df["topic"] = [topic_ids_to_names[topic_id] for topic_id in comments_over_df["topic_id"].tolist()]
1563
-
1564
- cluster_df = pd.concat([comments_under_df, comments_over_df])
1565
- with open(f"./data/personal_cluster_dfs/{model}.pkl", "wb") as f:
1566
- pickle.dump(cluster_df, f)
1567
-
1568
- return topics_under_top, topics_over_top
1569
- return [], []
 
23
  from sentence_transformers import SentenceTransformer, util
24
  import torch
25
  from bertopic import BERTopic
26
+ from datetime import date
27
 
28
  ########################################
29
  # PRE-LOADING
 
38
 
39
  # Data-loading
40
  module_dir = "./"
41
+ with open(os.path.join(module_dir, "data/input/ids_to_comments.pkl"), "rb") as f:
 
 
 
 
 
 
 
 
 
 
 
42
  ids_to_comments = pickle.load(f)
43
+ with open(os.path.join(module_dir, "data/input/comments_to_ids.pkl"), "rb") as f:
44
  comments_to_ids = pickle.load(f)
45
+ system_preds_df = pd.read_pickle("data/input/system_preds_df.pkl")
46
+ sys_eval_df = pd.read_pickle(os.path.join(module_dir, "data/input/split_data/sys_eval_df.pkl"))
47
+ train_df = pd.read_pickle(os.path.join(module_dir, "data/input/split_data/train_df.pkl"))
 
 
48
  train_df_ids = train_df["item_id"].unique().tolist()
49
+ model_eval_df = pd.read_pickle(os.path.join(module_dir, "data/input/split_data/model_eval_df.pkl"))
50
+ ratings_df_full = pd.read_pickle(os.path.join(module_dir, "data/input/ratings_df_full.pkl"))
51
+ worker_info_df = pd.read_pickle("./data/input/worker_info_df.pkl")
52
 
53
+ topic_ids = system_preds_df.topic_id
54
+ topics = system_preds_df.topic
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  topic_ids_to_topics = {topic_ids[i]: topics[i] for i in range(len(topic_ids))}
56
  topics_to_topic_ids = {topics[i]: topic_ids[i] for i in range(len(topic_ids))}
57
+ unique_topics_ids = sorted(system_preds_df.topic_id.unique())
58
  unique_topics = [topic_ids_to_topics[topic_id] for topic_id in range(len(topic_ids_to_topics) - 1)]
59
 
60
  def get_toxic_threshold():
61
  return TOXIC_THRESHOLD
62
 
63
+ def get_user_model_names(user):
64
+ # Fetch the user's models
65
+ output_dir = f"./data/output"
66
+ users = [name for name in os.listdir(output_dir) if os.path.isdir(os.path.join(output_dir, name))]
67
+ if user not in users:
68
+ # User does not exist
69
+ return []
70
  else:
71
+ # Fetch trained model names for the user
72
+ user_dir = f"./data/output/{user}"
73
+ user_models = [name for name in os.listdir(user_dir) if os.path.isdir(os.path.join(user_dir, name))]
74
  user_models.sort()
75
  return user_models
76
 
 
78
  return unique_topics
79
 
80
  def get_large_clusters(min_n):
81
+ counts_df = system_preds_df.groupby(by=["topic_id"]).size().reset_index(name='counts')
82
  counts_df = counts_df[counts_df["counts"] >= min_n]
83
  return [topic_ids_to_topics[t_id] for t_id in sorted(counts_df["topic_id"].tolist()[1:])]
84
 
 
116
  }
117
  internal_to_readable = {v: k for k, v in readable_to_internal.items()}
118
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
 
120
+ ########################################
121
+ # Data storage helper functions
122
+ # Set up all directories for new user
123
+ def setup_user_dirs(cur_user):
124
+ user_dir = f"./data/output/{cur_user}"
125
+ if not os.path.isdir(user_dir):
126
+ os.mkdir(user_dir)
127
+ def setup_model_dirs(cur_user, cur_model):
128
+ model_dir = f"./data/output/{cur_user}/{cur_model}"
129
+ if not os.path.isdir(model_dir):
130
+ os.mkdir(model_dir) # Set up model dir
131
+ # Set up subdirs
132
+ os.mkdir(os.path.join(model_dir, "labels"))
133
+ os.mkdir(os.path.join(model_dir, "perf"))
134
+ def setup_user_model_dirs(cur_user, cur_model):
135
+ setup_user_dirs(cur_user)
136
+ setup_model_dirs(cur_user, cur_model)
137
+
138
+ # Charts
139
+ def get_chart_file(cur_user, cur_model):
140
+ chart_dir = f"./data/output/{cur_user}/{cur_model}"
141
+ return os.path.join(chart_dir, f"chart_overall_vis.json")
142
+
143
+ # Labels
144
+ def get_label_dir(cur_user, cur_model):
145
+ return f"./data/output/{cur_user}/{cur_model}/labels"
146
+ def get_n_label_files(cur_user, cur_model):
147
+ label_dir = get_label_dir(cur_user, cur_model)
148
+ return len([name for name in os.listdir(label_dir) if os.path.isfile(os.path.join(label_dir, name))])
149
+ def get_label_file(cur_user, cur_model, label_i=None):
150
+ if label_i is None:
151
+ # Get index to add on to end of list
152
+ label_i = get_n_label_files(cur_user, cur_model)
153
+ label_dir = get_label_dir(cur_user, cur_model)
154
+ return os.path.join(label_dir, f"{label_i}.pkl")
155
+
156
+ # Performance
157
+ def get_perf_dir(cur_user, cur_model):
158
+ return f"./data/output/{cur_user}/{cur_model}/perf"
159
+ def get_n_perf_files(cur_user, cur_model):
160
+ perf_dir = get_perf_dir(cur_user, cur_model)
161
+ return len([name for name in os.listdir(perf_dir) if os.path.isfile(os.path.join(perf_dir, name))])
162
+ def get_perf_file(cur_user, cur_model, perf_i=None):
163
+ if perf_i is None:
164
+ # Get index to add on to end of list
165
+ perf_i = get_n_perf_files(cur_user, cur_model)
166
+ perf_dir = get_perf_dir(cur_user, cur_model)
167
+ return os.path.join(perf_dir, f"{perf_i}.pkl")
168
+
169
+ # Predictions dataframe
170
+ def get_preds_file(cur_user, cur_model):
171
+ preds_dir = f"./data/output/{cur_user}/{cur_model}"
172
+ return os.path.join(preds_dir, f"preds_df.pkl")
173
+
174
+ # Reports
175
+ def get_reports_file(cur_user, cur_model):
176
+ return f"./data/output/{cur_user}/{cur_model}/reports.json"
177
 
178
  ########################################
179
  # General utils
 
203
 
204
  ########################################
205
  # GET_AUDIT utils
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
  def plot_metric_histogram(metric, user_metric, other_metric_vals, n_bins=10):
207
  hist, bin_edges = np.histogram(other_metric_vals, bins=n_bins, density=False)
208
  data = pd.DataFrame({
 
234
 
235
  return (bar + rule).interactive()
236
 
237
+ # Generates the summary plot across all topics for the user
238
+ def show_overall_perf(cur_model, error_type, cur_user, threshold=TOXIC_THRESHOLD, topic_vis_method="median", use_cache=True):
239
+ # Your perf (calculate using model and testset)
240
+ preds_file = get_preds_file(cur_user, cur_model)
241
+ with open(preds_file, "rb") as f:
242
+ preds_df = pickle.load(f)
243
+
244
+ chart_file = get_chart_file(cur_user, cur_model)
245
+ if use_cache and os.path.isfile(chart_file):
246
+ # Read from file if it exists
247
+ with open(chart_file, "r") as f:
248
+ topic_overview_plot_json = json.load(f)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
249
  else:
250
+ # Otherwise, generate chart and save to file
251
+ if topic_vis_method == "median": # Default
252
+ preds_df_grp = preds_df.groupby(["topic", "user_id"]).median()
253
+ elif topic_vis_method == "mean":
254
+ preds_df_grp = preds_df.groupby(["topic", "user_id"]).mean()
255
+ topic_overview_plot_json = plot_overall_vis(preds_df=preds_df_grp, n_topics=200, threshold=threshold, error_type=error_type, cur_user=cur_user, cur_model=cur_model)
256
+ # Save to file
257
+ with open(chart_file, "w") as f:
258
+ json.dump(topic_overview_plot_json, f)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
 
260
  return {
261
  "topic_overview_plot_json": json.loads(topic_overview_plot_json),
262
  }
263
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
264
  ########################################
265
  # GET_LABELING utils
266
+ def create_example_sets(n_label_per_bin, score_bins, keyword=None, topic=None):
267
  # Restrict to the keyword, if provided
268
+ df = system_preds_df.copy()
269
  if keyword != None:
270
  df = df[df["comment"].str.contains(keyword)]
271
 
 
290
 
291
  return ex_to_label
292
 
293
+ def get_grp_model_labels(n_label_per_bin, score_bins, grp_ids):
294
+ df = system_preds_df.copy()
295
 
296
  train_df_grp = train_df[train_df["user_id"].isin(grp_ids)]
297
  train_df_grp_avg = train_df_grp.groupby(by=["item_id"]).median().reset_index()
 
315
 
316
  return ratings_grp
317
 
318
+ ########################################
319
+ # SAVE_REPORT utils
320
+
321
+ # Convert the SEP field selection from the UI to the SEP enum value
322
+ def get_sep_enum(sep_selection):
323
+ if sep_selection == "Adversarial Example":
324
+ return "S0403: Adversarial Example"
325
+ elif sep_selection == "Accuracy":
326
+ return "P0204: Accuracy"
327
+ elif sep_selection == "Bias/Discrimination":
328
+ return "E0100: Bias/ Discrimination"
329
+ else:
330
+ return "P0200: Model issues"
331
+
332
+ # Format the description for the report including the provided title, error type, and text entry field ("Summary/Suggestions" text box)
333
+ def format_description(indie_label_json):
334
+ title = indie_label_json["title"]
335
+ error_type = indie_label_json["error_type"]
336
+ text_entry = indie_label_json["text_entry"]
337
+ return f"Title: {title}\nError Type: {error_type}\nSummary/Suggestions: {text_entry}"
338
+
339
+ # Convert indielabel json to AVID json format.
340
+ # See the AVID format in https://avidml.org/avidtools/reference/report
341
+ #
342
+ # Important mappings:
343
+ # IndieLabel Attribute AVID Attribute Example
344
+ # text_entry description "I think the Perspective API
345
+ # is too sensitive. Here are some examples."
346
+ # topic feature 0_shes_woman_lady_face
347
+ # persp_score model_score 0.94
348
+ # comment ori_input "She looks beautiful"
349
+ # user_rating personal_model_score 0.92
350
+ # user_decision user_decision "Non-toxic"
351
+ # Note that this is at the individual report level.
352
+ def convert_indie_label_json_to_avid_json(indie_label_json, cur_user, email, sep_selection):
353
+
354
+ # Setting up the structure with a dict to enable programmatic additions
355
+ avid_json_dict = {
356
+ "data_type": "AVID",
357
+ "data_version": None,
358
+ "metadata": None,
359
+ "affects": {
360
+ "developer": [],
361
+ "deployer": [
362
+ "Hugging Face"
363
+ ],
364
+ # TODO: Make artifacts malleable during modularity work
365
+ "artifacts": [
366
+ {
367
+ "type": "Model",
368
+ "name": "Perspective API"
369
+ }
370
+ ]
371
+ },
372
+ "problemtype": {
373
+ "classof": "Undefined", # I don't think any of the other ClassEnums are applicable. Link: https://avidml.org/avidtools/_modules/avidtools/datamodels/enums#ClassEnum
374
+ "type": "Detection",
375
+ "description": {
376
+ "lang": "eng", # TODO: Make language selectable
377
+ "value": "This report contains results from an end user audit conducted on Hugging Face."
378
+ }
379
+ },
380
+ "metrics": [ # Note: For the end users use case, I made each comment an example.
381
+ ],
382
+ "references": [],
383
+ "description": {
384
+ "lang": "eng", # TODO: Make language selectable
385
+ "value": "" # Leaving empty so the report comments can be contained here.
386
+ },
387
+ "impact": {
388
+ "avid": {
389
+ "risk_domain": [
390
+ "Ethics"
391
+ ],
392
+ "sep_view": [
393
+ "E0101: Group fairness"
394
+ ],
395
+ "lifecycle_view": [
396
+ "L05: Evaluation"
397
+ ],
398
+ "taxonomy_version": "0.2"
399
+ }
400
+ },
401
+ "credit": "", # Leaving empty so that credit can be assigned
402
+ "reported_date": "" # Leaving empty so that it can be dynamically filled in
403
+ }
404
+
405
+ avid_json_dict["description"] = format_description(indie_label_json)
406
+ avid_json_dict["reported_date"] = str(date.today())
407
+ # Assign credit to email if provided, otherwise default to randomly assigned username
408
+ if email != "":
409
+ avid_json_dict["credit"] = email
410
+ else:
411
+ avid_json_dict["credit"] = cur_user
412
+
413
+ sep_enum = get_sep_enum(sep_selection)
414
+ avid_json_dict["impact"]["avid"]["sep_view"] = [sep_enum]
415
+
416
+ for e in indie_label_json["evidence"]:
417
+ curr_metric = {}
418
+ curr_metric["name"] = "Perspective API"
419
+ curr_metric["detection_method"] = {
420
+ "type": "Detection",
421
+ "name": "Individual Example from End User Audit"
422
+ }
423
+ res_dict = {}
424
+ res_dict["feature"] = e["topic"]
425
+ res_dict["model_score"] = str(e["persp_score"]) # Converted to string to avoid Float type error with DB
426
+ res_dict["ori_input"] = e["comment"]
427
+ res_dict["personal_model_score"] = str(e["user_rating"]) # See above
428
+ res_dict["user_decision"] = e["user_decision"]
429
+ curr_metric["results"] = res_dict
430
+ avid_json_dict["metrics"].append(curr_metric)
431
+
432
+ new_report = json.dumps(avid_json_dict)
433
+ return new_report
434
+
435
  ########################################
436
  # GET_PERSONALIZED_MODEL utils
437
+ def fetch_existing_data(user, model_name):
438
  # Check if we have cached model performance
439
+ n_perf_files = get_n_perf_files(user, model_name)
440
+ if n_perf_files > 0:
 
441
  # Fetch cached results
442
+ perf_file = get_perf_file(user, model_name, n_perf_files - 1) # Get last performance file
443
+ with open(perf_file, "rb") as f:
444
  mae, mse, rmse, avg_diff = pickle.load(f)
445
  else:
446
+ raise Exception(f"Model {model_name} does not exist")
 
 
 
 
 
 
 
447
 
448
  # Fetch previous user-provided labels
449
  ratings_prev = None
450
+ n_label_files = get_n_label_files(user, model_name)
451
+ if n_label_files > 0:
452
+ label_file = get_label_file(user, model_name, n_label_files - 1) # Get last label file
453
+ with open(label_file, "rb") as f:
454
  ratings_prev = pickle.load(f)
455
  return mae, mse, rmse, avg_diff, ratings_prev
456
 
457
+ # Main function called by server's `get_personalized_model` endpoint
458
+ # Trains an updated model with the specified name, user, and ratings
459
+ # Saves ratings, performance metrics, and pre-computed predictions to files
460
+ # - model_name: name of the model to train
461
+ # - ratings: dictionary of comments to ratings
462
+ # - user: user name
463
+ # - top_n: number of comments to train on (used when a set was held out for original user study)
464
+ # - topic: topic to train on (used when tuning for a specific topic)
465
+ def train_updated_model(model_name, ratings, user, top_n=None, topic=None, debug=False):
466
  # Check if there is previously-labeled data; if so, combine it with this data
467
+ labeled_df = format_labeled_data(ratings, worker_id=user) # Treat ratings as full batch of all ratings
 
 
468
  ratings_prev = None
469
 
470
  # Filter out rows with "unsure" (-1)
471
  labeled_df = labeled_df[labeled_df["rating"] != -1]
472
 
473
  # Filter to top N for user study
474
+ if (topic is None) and (top_n is not None):
475
+ labeled_df = labeled_df.head(top_n)
 
476
  else:
477
  # For topic tuning, need to fetch old labels
478
+ n_label_files = get_n_label_files(user, model_name)
479
+ if n_label_files > 0:
480
  # Concatenate previous set of labels with this new batch of labels
481
+ label_file = get_label_file(user, model_name, n_label_files - 1) # Get last label file
482
+ with open(label_file, "rb") as f:
483
  ratings_prev = pickle.load(f)
484
+ labeled_df_prev = format_labeled_data(ratings_prev, worker_id=user)
485
  labeled_df_prev = labeled_df_prev[labeled_df_prev["rating"] != -1]
486
  ratings.update(ratings_prev) # append old ratings to ratings
487
  labeled_df = pd.concat([labeled_df_prev, labeled_df])
488
+ if debug:
489
+ print("len ratings for training:", len(labeled_df))
 
 
 
 
 
 
 
 
 
490
  # Save this batch of labels
491
+ label_file = get_label_file(user, model_name)
492
+ with open(label_file, "wb") as f:
493
  pickle.dump(ratings, f)
494
 
495
+ # Train model
496
+ cur_model, _, _, _ = train_user_model(ratings_df=labeled_df)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
497
 
498
+ # Compute performance metrics
499
+ mae, mse, rmse, avg_diff = users_perf(cur_model, worker_id=user)
500
+ # Save performance metrics
501
+ perf_file = get_perf_file(user, model_name)
502
+ with open(perf_file, "wb") as f:
503
  pickle.dump((mae, mse, rmse, avg_diff), f)
504
 
505
+ # Pre-compute predictions for full dataset
506
+ cur_preds_df = get_preds_df(cur_model, [user], sys_eval_df=ratings_df_full)
507
+ # Save pre-computed predictions
508
+ preds_file = get_preds_file(user, model_name)
509
+ with open(preds_file, "wb") as f:
510
+ pickle.dump(cur_preds_df, f)
511
+
512
+ # Replace cached summary plot if it exists
513
+ show_overall_perf(cur_model=model_name, error_type="Both", cur_user=user, use_cache=False)
514
+
515
  ratings_prev = ratings
516
  return mae, mse, rmse, avg_diff, ratings_prev
517
 
518
+ def format_labeled_data(ratings, worker_id):
519
  all_rows = []
520
  for comment, rating in ratings.items():
521
  comment_id = comments_to_ids[comment]
 
525
  df = pd.DataFrame(all_rows, columns=["user_id", "item_id", "rating"])
526
  return df
527
 
528
+ def users_perf(model, worker_id, sys_eval_df=sys_eval_df):
529
  # Load the full empty dataset
530
  sys_eval_comment_ids = sys_eval_df.item_id.unique().tolist()
531
  empty_ratings_rows = [[worker_id, c_id, 0] for c_id in sys_eval_comment_ids]
 
541
  user_item_preds = get_predictions_by_user_and_item(predictions)
542
  df["pred"] = df.apply(lambda row: user_item_preds[(row.user_id, row.item_id)] if (row.user_id, row.item_id) in user_item_preds else np.nan, axis=1)
543
 
544
+ df = df.merge(system_preds_df, on="item_id", how="left", suffixes=('', '_sys'))
545
  df.dropna(subset = ["pred"], inplace=True)
546
+ df["rating"] = df.rating.astype("int32")
547
 
548
+ perf_metrics = get_overall_perf(df, worker_id) # mae, mse, rmse, avg_diff
549
  return perf_metrics
550
 
551
  def get_overall_perf(preds_df, user_id):
552
  # Prepare dataset to calculate performance
553
+ y_pred = preds_df[preds_df["user_id"] == user_id].rating_sys.to_numpy() # system's prediction
554
+ y_true = preds_df[preds_df["user_id"] == user_id].pred.to_numpy() # user's (predicted) ground truth
555
 
556
  # Get performance for user's model
557
  mae = mean_absolute_error(y_true, y_pred)
 
567
  user_item_preds[(uid, iid)] = est
568
  return user_item_preds
569
 
570
+ # Pre-computes predictions for the provided model and specified users on the system-eval dataset
571
+ # - model: trained model
572
+ # - user_ids: list of user IDs to compute predictions for
573
+ # - sys_eval_df: dataframe of system eval labels (pre-computed)
574
+ def get_preds_df(model, user_ids, sys_eval_df=sys_eval_df, bins=BINS, debug=False):
575
  # Prep dataframe for all predictions we'd like to request
576
  start = time.time()
577
  sys_eval_comment_ids = sys_eval_df.item_id.unique().tolist()
 
580
  for user_id in user_ids:
581
  empty_ratings_rows.extend([[user_id, c_id, 0] for c_id in sys_eval_comment_ids])
582
  empty_ratings_df = pd.DataFrame(empty_ratings_rows, columns=["user_id", "item_id", "rating"])
583
+ if debug:
584
+ print("setup", time.time() - start)
585
 
586
  # Evaluate model to get predictions
587
  start = time.time()
 
589
  eval_set_data = Dataset.load_from_df(empty_ratings_df, reader)
590
  _, testset = train_test_split(eval_set_data, test_size=1.)
591
  predictions = model.test(testset)
592
+ if debug:
593
+ print("train_test_split", time.time() - start)
594
 
595
  # Update dataframe with predictions
596
  start = time.time()
597
  df = empty_ratings_df.copy() # user_id, item_id, rating
598
  user_item_preds = get_predictions_by_user_and_item(predictions)
599
  df["pred"] = df.apply(lambda row: user_item_preds[(row.user_id, row.item_id)] if (row.user_id, row.item_id) in user_item_preds else np.nan, axis=1)
600
+ df = df.merge(system_preds_df, on="item_id", how="left", suffixes=('', '_sys'))
601
  df.dropna(subset = ["pred"], inplace=True)
602
+ df["rating"] = df.rating.astype("int32")
603
 
604
  # Get binned predictions (based on user prediction)
605
  df["prediction_bin"], out_bins = pd.cut(df["pred"], bins, labels=False, retbins=True)
 
607
 
608
  return df
609
 
610
+ # Given the full set of ratings, trains the specified model type and evaluates on the model eval set
611
+ # - ratings_df: dataframe of all ratings
612
+ # - train_df: dataframe of training labels
613
+ # - model_eval_df: dataframe of model eval labels (validation set)
614
+ # - train_frac: fraction of ratings to use for training
615
  def train_user_model(ratings_df, train_df=train_df, model_eval_df=model_eval_df, train_frac=0.75, model_type="SVD", sim_type=None, user_based=True):
616
  # Sample from shuffled labeled dataframe and add batch to train set; specified set size to model_eval set
617
+ labeled = ratings_df.sample(frac=1) # Shuffle the data
618
  batch_size = math.floor(len(labeled) * train_frac)
619
  labeled_train = labeled[:batch_size]
620
  labeled_model_eval = labeled[batch_size:]
 
627
 
628
  return model, perf, labeled_train, labeled_model_eval
629
 
630
+ # Given a set of labels split into training and validation (model_eval), trains the specified model type on the training labels and evaluates on the model_eval labels
631
+ # - train_df: dataframe of training labels
632
+ # - model_eval_df: dataframe of model eval labels (validation set)
633
+ # - model_type: type of model to train
634
+ def train_model(train_df, model_eval_df, model_type="SVD", sim_type=None, user_based=True, debug=False):
635
  # Train model
636
  reader = Reader(rating_scale=(0, 4))
637
  train_data = Dataset.load_from_df(train_df, reader)
 
660
  mae = accuracy.mae(predictions)
661
  mse = accuracy.mse(predictions)
662
 
663
+ if debug:
664
+ print(f"MAE: {mae}, MSE: {mse}, RMSE: {rmse}, FCP: {fcp}")
665
  perf = [mae, mse, rmse, fcp]
666
 
667
  return algo, perf
668
 
669
+ def plot_train_perf_results(user, model_name, mae):
670
+ n_perf_files = get_n_perf_files(user, model_name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
671
  all_rows = []
672
+ for i in range(n_perf_files):
673
+ perf_file = get_perf_file(user, model_name, i)
674
+ with open(perf_file, "rb") as f:
675
  mae, mse, rmse, avg_diff = pickle.load(f)
676
  all_rows.append([i, mae, "Your MAE"])
677
 
 
690
  width=500,
691
  )
692
 
693
+ # Manually set for now
694
+ mae_good = 1.0
695
+ mae_okay = 1.2
696
 
697
  plot_dim_width = 500
698
  domain_min = 0.0
699
+ domain_max = 2.0
700
  bkgd = alt.Chart(pd.DataFrame({
701
+ "start": [mae_okay, mae_good, domain_min],
702
+ "stop": [domain_max, mae_okay, mae_good],
703
+ "bkgd": ["Needs improvement", "Okay", "Good"],
704
  })).mark_rect(opacity=0.2).encode(
705
+ y=alt.Y("start:Q", scale=alt.Scale(domain=[0, domain_max]), title=""),
706
+ y2=alt.Y2("stop:Q", title="Performance (MAE)"),
707
  x=alt.value(0),
708
  x2=alt.value(plot_dim_width),
709
  color=alt.Color("bkgd:O", scale=alt.Scale(
710
+ domain=["Needs improvement", "Okay", "Good"],
711
  range=["red", "yellow", "green"]),
712
  title="How good is your MAE?"
713
  )
 
715
 
716
  plot = (bkgd + chart).properties(width=plot_dim_width).resolve_scale(color='independent')
717
  mae_status = None
718
+ if mae < mae_good:
719
+ mae_status = "Your MAE is in the <b>Good</b> range. Your model looks ready to go."
720
+ elif mae < mae_okay:
721
+ mae_status = "Your MAE is in the <b>Okay</b> range. Your model can be used, but you can provide additional labels to improve it."
722
  else:
723
+ mae_status = "Your MAE is in the <b>Needs improvement</b> range. Your model may need additional labels to improve."
724
  return plot, mae_status
725
 
726
  ########################################
 
819
 
820
  def get_category(row, threshold=0.3):
821
  k_to_category = {
822
+ "is_profane_frac": "Profanity",
823
+ "is_threat_frac": "Threat",
824
+ "is_identity_attack_frac": "Identity Attack",
825
+ "is_insult_frac": "Insult",
826
+ "is_sexual_harassment_frac": "Sexual Harassment",
827
  }
828
  categories = []
829
+ for k in ["is_profane_frac", "is_threat_frac", "is_identity_attack_frac", "is_insult_frac", "is_sexual_harassment_frac"]:
830
  if row[k] > threshold:
831
  categories.append(k_to_category[k])
832
 
 
839
  return f"#{row['item_id']}/#comment"
840
 
841
  def get_topic_url(row):
842
+ return f"#{row['topic']}/#topic"
843
 
844
+ # Plots overall results histogram (each block is a topic)
845
+ def plot_overall_vis(preds_df, error_type, cur_user, cur_model, n_topics=None, bins=VIS_BINS, threshold=TOXIC_THRESHOLD, sys_col="rating_sys"):
846
  df = preds_df.copy().reset_index()
847
 
848
  if n_topics is not None:
849
+ df = df[df["topic_id"] < n_topics]
850
 
851
  df["vis_pred_bin"], out_bins = pd.cut(df["pred"], bins, labels=VIS_BINS_LABELS, retbins=True)
852
+ df = df[df["user_id"] == cur_user].sort_values(by=["item_id"]).reset_index()
853
+ df["system_label"] = [("toxic" if r > threshold else "non-toxic") for r in df[sys_col].tolist()]
854
+ df["threshold"] = [threshold for r in df[sys_col].tolist()]
855
+ df["key"] = [get_key(sys, user, threshold) for sys, user in zip(df[sys_col].tolist(), df["pred"].tolist())]
856
  df["url"] = df.apply(lambda row: get_topic_url(row), axis=1)
857
 
858
  # Plot sizing
 
870
  # Main chart
871
  chart = alt.Chart(df).mark_square(opacity=0.8, size=mark_size, stroke="grey", strokeWidth=0.5).transform_window(
872
  groupby=['vis_pred_bin'],
873
+ sort=[{'field': sys_col}],
874
  id='row_number()',
875
  ignorePeers=True,
876
  ).encode(
877
  x=alt.X('vis_pred_bin:Q', title="Our prediction of your rating", scale=alt.Scale(domain=(domain_min, domain_max))),
878
+ y=alt.Y('id:O', title="Topics (ordered by System toxicity rating)", axis=alt.Axis(values=list(range(0, max_items, 5))), sort='descending'),
879
  color = alt.Color("key:O", scale=alt.Scale(
880
  domain=["System agrees: Non-toxic", "System agrees: Toxic", "System differs: Error > 1.5", "System differs: Error > 1.0", "System differs: Error > 0.5", "System differs: Error <=0.5"],
881
  range=["white", "#cbcbcb", "red", "#ff7a5c", "#ffa894", "#ffd1c7"]),
 
883
  ),
884
  href="url:N",
885
  tooltip = [
886
+ alt.Tooltip("topic:N", title="Topic"),
887
  alt.Tooltip("system_label:N", title="System label"),
888
+ alt.Tooltip(f"{sys_col}:Q", title="System rating", format=".2f"),
889
  alt.Tooltip("pred:Q", title="Your rating", format=".2f")
890
  ]
891
  )
 
949
  )
950
 
951
  plot = (bkgd + annotation + chart + rule).properties(height=(plot_dim_height), width=plot_dim_width).resolve_scale(color='independent').to_json()
 
 
 
 
 
 
 
952
  return plot
953
 
954
+ # Plots cluster results histogram (each block is a comment), but *without* a model
955
+ # as a point of reference (in contrast to plot_overall_vis_cluster)
956
+ def plot_overall_vis_cluster_no_model(cur_user, preds_df, n_comments=None, bins=VIS_BINS, threshold=TOXIC_THRESHOLD, sys_col="rating_sys"):
 
 
 
 
 
 
957
  df = preds_df.copy().reset_index()
958
 
959
+ df["vis_pred_bin"], out_bins = pd.cut(df[sys_col], bins, labels=VIS_BINS_LABELS, retbins=True)
960
+ df = df[df["user_id"] == cur_user].sort_values(by=[sys_col]).reset_index()
961
+ df["system_label"] = [("toxic" if r > threshold else "non-toxic") for r in df[sys_col].tolist()]
962
+ df["key"] = [get_key_no_model(sys, threshold) for sys in df[sys_col].tolist()]
 
963
  df["category"] = df.apply(lambda row: get_category(row), axis=1)
964
  df["url"] = df.apply(lambda row: get_comment_url(row), axis=1)
965
 
 
981
  # Main chart
982
  chart = alt.Chart(df).mark_square(opacity=0.8, size=mark_size, stroke="grey", strokeWidth=0.25).transform_window(
983
  groupby=['vis_pred_bin'],
984
+ sort=[{'field': sys_col}],
985
  id='row_number()',
986
  ignorePeers=True
987
  ).encode(
 
995
  ),
996
  href="url:N",
997
  tooltip = [
998
+ alt.Tooltip("comment:N", title="comment"),
999
+ alt.Tooltip(f"{sys_col}:Q", title="System rating", format=".2f"),
1000
  ]
1001
  )
1002
 
 
1047
  final_plot = (bkgd + annotation + chart + rule).properties(height=(plot_dim_height), width=plot_dim_width).resolve_scale(color='independent').to_json()
1048
 
1049
  return final_plot, df
1050
+
1051
+ # Plots cluster results histogram (each block is a comment) *with* a model as a point of reference
1052
+ def plot_overall_vis_cluster(cur_user, preds_df, error_type, n_comments=None, bins=VIS_BINS, threshold=TOXIC_THRESHOLD, sys_col="rating_sys"):
1053
+ df = preds_df.copy().reset_index()
1054
 
1055
  df["vis_pred_bin"], out_bins = pd.cut(df["pred"], bins, labels=VIS_BINS_LABELS, retbins=True)
1056
+ df = df[df["user_id"] == cur_user].sort_values(by=[sys_col]).reset_index(drop=True)
1057
+ df["system_label"] = [("toxic" if r > threshold else "non-toxic") for r in df[sys_col].tolist()]
1058
+ df["key"] = [get_key(sys, user, threshold) for sys, user in zip(df[sys_col].tolist(), df["pred"].tolist())]
 
 
1059
  df["category"] = df.apply(lambda row: get_category(row), axis=1)
1060
  df["url"] = df.apply(lambda row: get_comment_url(row), axis=1)
1061
 
1062
  if n_comments is not None:
1063
  n_to_sample = np.min([n_comments, len(df)])
1064
  df = df.sample(n=n_to_sample)
1065
+
1066
  # Plot sizing
1067
  domain_min = 0
1068
  domain_max = 4
 
1077
  # Main chart
1078
  chart = alt.Chart(df).mark_square(opacity=0.8, size=mark_size, stroke="grey", strokeWidth=0.25).transform_window(
1079
  groupby=['vis_pred_bin'],
1080
+ sort=[{'field': sys_col}],
1081
  id='row_number()',
1082
  ignorePeers=True
1083
  ).encode(
 
1090
  ),
1091
  href="url:N",
1092
  tooltip = [
1093
+ alt.Tooltip("comment:N", title="comment"),
1094
+ alt.Tooltip(f"{sys_col}:Q", title="System rating", format=".2f"),
1095
  alt.Tooltip("pred:Q", title="Your rating", format=".2f"),
1096
  alt.Tooltip("category:N", title="Potential toxicity categories")
1097
  ]
 
1157
 
1158
  return final_plot, df
1159
 
1160
+ def get_cluster_comments(df, error_type, threshold=TOXIC_THRESHOLD, sys_col="rating_sys", use_model=True, debug=False):
1161
  df["user_color"] = [get_user_color(user, threshold) for user in df["pred"].tolist()] # get cell colors
1162
+ df["system_color"] = [get_user_color(sys, threshold) for sys in df[sys_col].tolist()] # get cell colors
1163
+ df["error_color"] = [get_system_color(sys, user, threshold) for sys, user in zip(df[sys_col].tolist(), df["pred"].tolist())] # get cell colors
1164
+ df["error_type"] = [get_error_type(sys, user, threshold) for sys, user in zip(df[sys_col].tolist(), df["pred"].tolist())] # get error type in words
1165
+ df["error_amt"] = [abs(sys - threshold) for sys in df[sys_col].tolist()] # get raw error
1166
  df["judgment"] = ["" for _ in range(len(df))] # template for "agree" or "disagree" buttons
1167
 
1168
  if use_model:
1169
  df = df.sort_values(by=["error_amt"], ascending=False) # surface largest errors first
1170
  else:
1171
+ if debug:
1172
+ print("get_cluster_comments; not using model")
1173
+ df = df.sort_values(by=[sys_col], ascending=True)
1174
 
1175
  df["id"] = df["item_id"]
 
 
1176
  df["toxicity_category"] = df["category"]
1177
  df["user_rating"] = df["pred"]
1178
  df["user_decision"] = [get_decision(rating, threshold) for rating in df["pred"].tolist()]
1179
+ df["system_rating"] = df[sys_col]
1180
+ df["system_decision"] = [get_decision(rating, threshold) for rating in df[sys_col].tolist()]
 
 
1181
  df = df.round(decimals=2)
1182
 
1183
  # Filter to specified error type
 
1190
  elif error_type == "Both":
1191
  df = df[(df["error_type"] == "System may be under-sensitive") | (df["error_type"] == "System may be over-sensitive")]
1192
 
1193
+ return df
1194
 
1195
  # PERSONALIZED CLUSTERS utils
1196
  def get_disagreement_comments(preds_df, mode, n=10_000, threshold=TOXIC_THRESHOLD):
 
1209
  df = df.sort_values(by=["diff"], ascending=asc)
1210
  df = df.head(n)
1211
 
1212
+ return df["comment"].tolist(), df
1213
+
1214
+ def get_explore_df(n_examples, threshold):
1215
+ df = system_preds_df.sample(n=n_examples)
1216
+ df["system_decision"] = [get_decision(rating, threshold) for rating in df["rating"].tolist()]
1217
+ df["system_color"] = [get_user_color(sys, threshold) for sys in df["rating"].tolist()] # get cell colors
1218
+ return df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
indie_label_svelte/public/global.css CHANGED
@@ -2,6 +2,7 @@ html, body {
2
  position: relative;
3
  width: 100%;
4
  height: 100%;
 
5
  }
6
 
7
  * {
@@ -88,13 +89,13 @@ h3 {
88
  }
89
  h5 {
90
  color: rgb(80, 80, 80);
91
- font-size: 25px;
92
  }
93
  h6 {
94
  margin-top: 50px;
95
  margin-bottom: 20px;
96
  text-transform: uppercase;
97
- font-size: 18px;
98
  }
99
 
100
  .head_3 {
@@ -107,7 +108,7 @@ h6 {
107
 
108
  .head_5 {
109
  color: rgb(80, 80, 80);
110
- font-size: 24px;
111
  font-weight: bold;
112
  margin-top: 25px;
113
  margin-bottom: 25px;
@@ -117,7 +118,7 @@ h6 {
117
  color: rgb(80, 80, 80);
118
  text-transform: uppercase;
119
  font-weight: bold;
120
- font-size: 18px;
121
  margin-top: 25px;
122
  margin-bottom: 25px;
123
  }
@@ -125,7 +126,7 @@ h6 {
125
  .head_6_non_cap {
126
  color: rgb(80, 80, 80);
127
  font-weight: bold;
128
- font-size: 18px;
129
  margin-top: 25px;
130
  margin-bottom: 25px;
131
  }
@@ -234,6 +235,10 @@ table {
234
  overflow-y: scroll;
235
  }
236
 
 
 
 
 
237
  .tab_header {
238
  position: fixed;
239
  background-color: #e3d6fd;
@@ -242,6 +247,11 @@ table {
242
  /* border-bottom: 1px solid grey; */
243
  }
244
 
 
 
 
 
 
245
  .label_table {
246
  height: 750px;
247
  overflow-y: scroll;
@@ -279,6 +289,7 @@ table {
279
 
280
  .audit_section {
281
  padding-top: 40px;
 
282
  }
283
 
284
  .comment_table_small {
 
2
  position: relative;
3
  width: 100%;
4
  height: 100%;
5
+ font-size: 12px;
6
  }
7
 
8
  * {
 
89
  }
90
  h5 {
91
  color: rgb(80, 80, 80);
92
+ font-size: 20px;
93
  }
94
  h6 {
95
  margin-top: 50px;
96
  margin-bottom: 20px;
97
  text-transform: uppercase;
98
+ font-size: 16px;
99
  }
100
 
101
  .head_3 {
 
108
 
109
  .head_5 {
110
  color: rgb(80, 80, 80);
111
+ font-size: 20px;
112
  font-weight: bold;
113
  margin-top: 25px;
114
  margin-bottom: 25px;
 
118
  color: rgb(80, 80, 80);
119
  text-transform: uppercase;
120
  font-weight: bold;
121
+ font-size: 16px;
122
  margin-top: 25px;
123
  margin-bottom: 25px;
124
  }
 
126
  .head_6_non_cap {
127
  color: rgb(80, 80, 80);
128
  font-weight: bold;
129
+ font-size: 16px;
130
  margin-top: 25px;
131
  margin-bottom: 25px;
132
  }
 
235
  overflow-y: scroll;
236
  }
237
 
238
+ .page_header {
239
+ height: 64px !important;
240
+ }
241
+
242
  .tab_header {
243
  position: fixed;
244
  background-color: #e3d6fd;
 
247
  /* border-bottom: 1px solid grey; */
248
  }
249
 
250
+ .tab_header span {
251
+ font-size: 14px;
252
+ line-height: normal !important;
253
+ }
254
+
255
  .label_table {
256
  height: 750px;
257
  overflow-y: scroll;
 
289
 
290
  .audit_section {
291
  padding-top: 40px;
292
+ width: 100%;
293
  }
294
 
295
  .comment_table_small {
indie_label_svelte/src/App.svelte CHANGED
@@ -4,18 +4,10 @@
4
 
5
  import HypothesisPanel from "./HypothesisPanel.svelte";
6
  import MainPanel from "./MainPanel.svelte";
7
- import SelectUserDialog from "./SelectUserDialog.svelte";
8
  import Explore from "./Explore.svelte";
9
- import Results from "./Results.svelte";
10
- import StudyLinks from "./StudyLinks.svelte";
11
- import { user } from './stores/cur_user_store.js';
12
- import { users } from "./stores/all_users_store.js";
13
 
14
  let personalized_model;
15
  let personalized_models = [];
16
-
17
- // let topic = "";
18
-
19
  let error_type_options = ['Both', 'System is under-sensitive', 'System is over-sensitive', 'Show errors and non-errors'];
20
  let error_type = error_type_options[0];
21
 
@@ -24,27 +16,6 @@
24
  let mode = searchParams.get("mode");
25
  let cur_user = searchParams.get("user");
26
 
27
- // Set cur_user if it's provided in URL params
28
- if (cur_user !== null) {
29
- user.update((value) => cur_user);
30
- }
31
-
32
- // Handle user dialog
33
- let user_dialog_open = false;
34
- user.subscribe(value => {
35
- cur_user = value;
36
- });
37
-
38
- // Handle all users
39
- let all_users = [];
40
- async function getUsers() {
41
- const response = await fetch("./get_users");
42
- const text = await response.text();
43
- const data = JSON.parse(text);
44
- all_users = data["users"];
45
- users.update((value) => all_users);
46
- }
47
-
48
  function getAuditSettings() {
49
  let req_params = {
50
  user: cur_user,
@@ -55,27 +26,12 @@
55
  .then(function (r_orig) {
56
  let r = JSON.parse(r_orig);
57
  personalized_models = r["personalized_models"];
58
- personalized_model = personalized_models[0]; // TEMP
59
- console.log("personalized_model", personalized_model);
60
- // personalized_model = "model_1632886687_iterA";
61
- // let clusters = r["clusters"];
62
- // topic = clusters[0]; // TEMP
63
  });
64
-
65
- // fetch("./audit_settings")
66
- // .then((r) => r.text())
67
- // .then(function (r_orig) {
68
- // let r = JSON.parse(r_orig);
69
- // personalized_models = r["personalized_models"];
70
- // personalized_model = personalized_models[0]; // TEMP
71
- // // personalized_model = "model_1632886687_iterA";
72
- // let clusters = r["clusters"];
73
- // topic = clusters[0]; // TEMP
74
- // });
75
  }
76
  onMount(async () => {
77
  getAuditSettings();
78
- getUsers();
79
  });
80
  </script>
81
 
@@ -88,22 +44,13 @@
88
  <div>
89
  <Explore />
90
  </div>
91
- {:else if mode == "results"}
92
- <div>
93
- <Results />
94
- </div>
95
- {:else if mode == "study_links"}
96
- <div>
97
- <StudyLinks />
98
- </div>
99
  {:else }
100
- <SelectUserDialog bind:open={user_dialog_open} cur_user={cur_user} />
101
  <div>
102
  {#key personalized_model }
103
- <HypothesisPanel model={personalized_model} bind:user_dialog_open={user_dialog_open}/>
104
  {/key}
105
 
106
- <MainPanel bind:model={personalized_model} bind:error_type={error_type} on:change />
107
  </div>
108
  {/if}
109
  </main>
 
4
 
5
  import HypothesisPanel from "./HypothesisPanel.svelte";
6
  import MainPanel from "./MainPanel.svelte";
 
7
  import Explore from "./Explore.svelte";
 
 
 
 
8
 
9
  let personalized_model;
10
  let personalized_models = [];
 
 
 
11
  let error_type_options = ['Both', 'System is under-sensitive', 'System is over-sensitive', 'Show errors and non-errors'];
12
  let error_type = error_type_options[0];
13
 
 
16
  let mode = searchParams.get("mode");
17
  let cur_user = searchParams.get("user");
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  function getAuditSettings() {
20
  let req_params = {
21
  user: cur_user,
 
26
  .then(function (r_orig) {
27
  let r = JSON.parse(r_orig);
28
  personalized_models = r["personalized_models"];
29
+ personalized_model = personalized_models[0];
30
+ cur_user = r["user"];
 
 
 
31
  });
 
 
 
 
 
 
 
 
 
 
 
32
  }
33
  onMount(async () => {
34
  getAuditSettings();
 
35
  });
36
  </script>
37
 
 
44
  <div>
45
  <Explore />
46
  </div>
 
 
 
 
 
 
 
 
47
  {:else }
 
48
  <div>
49
  {#key personalized_model }
50
+ <HypothesisPanel model={personalized_model} cur_user={cur_user}/>
51
  {/key}
52
 
53
+ <MainPanel bind:model={personalized_model} bind:error_type={error_type} cur_user={cur_user} on:change />
54
  </div>
55
  {/if}
56
  </main>
indie_label_svelte/src/AppOld.svelte DELETED
@@ -1,127 +0,0 @@
1
- <svelte:head>
2
- <title>IndieLabel</title>
3
- </svelte:head>
4
-
5
- <script lang="ts">
6
- import { onMount } from "svelte";
7
- import Section from "./Section.svelte";
8
- import IterativeClustering from "./IterativeClustering.svelte";
9
- import OverallResults from "./OverallResults.svelte";
10
- import Labeling from "./Labeling.svelte";
11
- import HypothesisPanel from "./HypothesisPanel.svelte"
12
-
13
- let personalized_model;
14
- let personalized_models = [];
15
- let breakdown_category;
16
- let breakdown_categories = [];
17
- let systems = ["Perspective comment toxicity classifier"]; // Only one system for now
18
- let clusters = [];
19
- let promise = Promise.resolve(null);
20
-
21
- function getAuditSettings() {
22
- fetch("./audit_settings")
23
- .then((r) => r.text())
24
- .then(function (r_orig) {
25
- let r = JSON.parse(r_orig);
26
- breakdown_categories = r["breakdown_categories"];
27
- breakdown_category = breakdown_categories[0];
28
- personalized_models = r["personalized_models"];
29
- personalized_model = personalized_models[0];
30
- clusters = r["clusters"];
31
- });
32
- }
33
- onMount(async () => {
34
- getAuditSettings();
35
- });
36
-
37
- function handleAuditButton() {
38
- promise = getAudit();
39
- }
40
-
41
- async function getAudit() {
42
- let req_params = {
43
- pers_model: personalized_model,
44
- breakdown_axis: breakdown_category,
45
- perf_metric: "avg_diff",
46
- breakdown_sort: "difference",
47
- n_topics: 10,
48
- };
49
- let params = new URLSearchParams(req_params).toString();
50
- const response = await fetch("./get_audit?" + params);
51
- const text = await response.text();
52
- const data = JSON.parse(text);
53
- return data;
54
- }
55
-
56
- </script>
57
-
58
- <main>
59
- <HypothesisPanel model={personalized_model} />
60
-
61
- <Labeling />
62
-
63
- <IterativeClustering clusters={clusters} ind={1} personalized_model={personalized_model} />
64
-
65
- <div id="audit-settings" class="section">
66
- <h5>Audit settings</h5>
67
- <Section
68
- section_id="systems"
69
- section_title="What status-quo system would you like to audit?"
70
- section_opts={systems}
71
- bind:value={systems[0]}
72
- />
73
- <Section
74
- section_id="personalized_model"
75
- section_title="What model would you like to use to represent your views?"
76
- section_opts={personalized_models}
77
- bind:value={personalized_model}
78
- />
79
- <Section
80
- section_id="breakdown_category"
81
- section_title="How would you like to explore the performance of the system?"
82
- section_opts={breakdown_categories}
83
- bind:value={breakdown_category}
84
- />
85
- <button on:click={handleAuditButton}> Generate results </button>
86
- <div>
87
- Personalized model: {personalized_model}, Breakdown category: {breakdown_category}
88
- </div>
89
- </div>
90
-
91
- {#await promise}
92
- <p>...waiting</p>
93
- {:then audit_results}
94
- {#if audit_results}
95
- <OverallResults data={audit_results} clusters={clusters} personalized_model={personalized_model} />
96
- {/if}
97
- {:catch error}
98
- <p style="color: red">{error.message}</p>
99
- {/await}
100
- </main>
101
-
102
- <style>
103
- main {
104
- text-align: left;
105
- padding: 1em;
106
- max-width: 240px;
107
- margin: 0 0;
108
- }
109
- h3 {
110
- color: rgb(80, 80, 80);
111
- font-size: 30px;
112
- }
113
- h5 {
114
- color: rgb(80, 80, 80);
115
- font-size: 25px;
116
- }
117
- h6 {
118
- margin-top: 50px;
119
- text-transform: uppercase;
120
- font-size: 14px;
121
- }
122
- @media (min-width: 640px) {
123
- main {
124
- max-width: none;
125
- }
126
- }
127
- </style>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
indie_label_svelte/src/Auditing.svelte CHANGED
@@ -7,7 +7,6 @@
7
  import HelpTooltip from "./HelpTooltip.svelte";
8
  import TopicTraining from "./TopicTraining.svelte";
9
 
10
- import { user } from './stores/cur_user_store.js';
11
  import { error_type } from './stores/error_type_store.js';
12
  import { topic_chosen } from './stores/cur_topic_store.js';
13
  import { model_chosen } from './stores/cur_model_store.js';
@@ -17,15 +16,13 @@
17
  import LayoutGrid, { Cell } from "@smui/layout-grid";
18
  import Radio from '@smui/radio';
19
  import FormField from '@smui/form-field';
20
- import Card, { Content } from '@smui/card';
21
  import{ Wrapper } from '@smui/tooltip';
22
  import IconButton from '@smui/icon-button';
23
- import Select, { Option } from "@smui/select";
24
  import Svelecte from '../node_modules/svelecte/src/Svelecte.svelte';
25
 
26
  export let personalized_model;
27
- // export let topic;
28
  export let cur_error_type = "Both";
 
29
 
30
  let evidence = [];
31
  let show_audit_settings = false;
@@ -54,8 +51,6 @@
54
  ]
55
 
56
  let personalized_models = [];
57
- let breakdown_category;
58
- let breakdown_categories = [];
59
  let systems = ["YouSocial comment toxicity classifier"]; // Only one system for now
60
  let clusters = [];
61
  let clusters_for_tuning = []
@@ -75,7 +70,6 @@
75
  let audit_type;
76
  if (scaffold_method == "fixed" || scaffold_method == "personal" || scaffold_method == "personal_group" || scaffold_method == "personal_test" || scaffold_method == "personal_cluster" || scaffold_method == "topic_train" || scaffold_method == "prompts") {
77
  audit_type = audit_types[1];
78
- // audit_type = audit_types[0];
79
  } else {
80
  // No scaffolding mode or tutorial
81
  audit_type = audit_types[0];
@@ -99,19 +93,8 @@
99
  use_group_model = true;
100
  }
101
 
102
- // TEMP
103
  let promise_cluster = Promise.resolve(null);
104
 
105
- // Get current user from store
106
- let cur_user;
107
- user.subscribe(value => {
108
- if (value != cur_user) {
109
- cur_user = value;
110
- personalized_model = "";
111
- getAuditSettings();
112
- }
113
- });
114
-
115
  // Get current topic from store
116
  let topic;
117
  topic_chosen.subscribe(value => {
@@ -126,8 +109,7 @@
126
  if (!personalized_models.includes(personalized_model)) {
127
  personalized_models.push(personalized_model);
128
  }
129
-
130
- handleClusterButton(); // re-render cluster results
131
  });
132
 
133
  // Save current error type
@@ -137,17 +119,13 @@
137
  handleClusterButton();
138
  }
139
 
140
- // Handle topic-specific training
141
- // let topic_training = null;
142
-
143
  async function updateTopicChosen() {
144
  if (topic != null) {
145
- console.log("updateTopicChosen", topic)
146
  topic_chosen.update((value) => topic);
147
  }
148
  }
149
 
150
- function getAuditSettings() {
151
  let req_params = {
152
  user: cur_user,
153
  scaffold_method: scaffold_method,
@@ -157,8 +135,6 @@
157
  .then((r) => r.text())
158
  .then(function (r_orig) {
159
  let r = JSON.parse(r_orig);
160
- breakdown_categories = r["breakdown_categories"];
161
- breakdown_category = breakdown_categories[0];
162
  personalized_models = r["personalized_models"];
163
  if (use_group_model) {
164
  let personalized_model_grp = r["personalized_model_grp"];
@@ -170,26 +146,27 @@
170
  model_chosen.update((value) => personalized_model);
171
  clusters = r["clusters"];
172
  clusters_for_tuning = r["clusters_for_tuning"];
173
- console.log("clusters", clusters); // TEMP
174
  topic = clusters[0]["options"][0]["text"];
175
  topic_chosen.update((value) => topic);
176
- handleAuditButton(); // TEMP
177
- handleClusterButton(); // TEMP
178
  });
179
  }
180
  onMount(async () => {
181
- getAuditSettings();
182
  });
183
 
184
  function handleAuditButton() {
185
  model_chosen.update((value) => personalized_model);
186
- promise = getAudit();
 
 
 
187
  }
188
 
189
- async function getAudit() {
190
  let req_params = {
191
- pers_model: personalized_model,
192
- breakdown_axis: breakdown_category,
193
  perf_metric: "avg_diff",
194
  breakdown_sort: "difference",
195
  n_topics: 10,
@@ -205,23 +182,22 @@
205
  }
206
 
207
  function handleClusterButton() {
208
- promise_cluster = getCluster();
209
  }
210
 
211
- async function getCluster() {
212
- if (personalized_model == "" || personalized_model == undefined) {
213
  return null;
214
  }
215
  let req_params = {
216
  cluster: topic,
217
  topic_df_ids: [],
218
- n_examples: 500, // TEMP
219
- pers_model: personalized_model,
220
  example_sort: "descending", // TEMP
221
  comparison_group: "status_quo", // TEMP
222
  search_type: "cluster",
223
  keyword: "",
224
- n_neighbors: 0,
225
  error_type: cur_error_type,
226
  use_model: use_model,
227
  scaffold_method: scaffold_method,
@@ -230,7 +206,6 @@
230
  const response = await fetch("./get_cluster_results?" + params);
231
  const text = await response.text();
232
  const data = JSON.parse(text);
233
- console.log(topic);
234
  return data;
235
  }
236
  </script>
@@ -240,16 +215,13 @@
240
  <div>
241
  <div style="margin-top: 30px">
242
  <span class="head_3">Auditing</span>
243
- <IconButton
244
- class="material-icons grey_button"
245
- size="normal"
246
- on:click={() => (show_audit_settings = !show_audit_settings)}
247
- >
248
- help_outline
249
- </IconButton>
250
  </div>
251
  <div style="width: 80%">
 
252
  <p>In this section, we'll be auditing the content moderation system. Here, you’ll be aided by a personalized model that will help direct your attention towards potential problem areas in the model’s performance. This model isn’t meant to be perfect, but is designed to help you better focus on areas that need human review.</p>
 
 
 
253
  </div>
254
 
255
  {#if show_audit_settings}
@@ -299,11 +271,14 @@
299
  </LayoutGrid>
300
  </div>
301
  </div>
 
 
302
  <p>Current model: {personalized_model}</p>
303
  {/if}
304
  </div>
305
 
306
  <!-- 1: All topics overview -->
 
307
  {#if audit_type == audit_types[0]}
308
  <div class="audit_section">
309
  <div class="head_5">Overview of all topics</div>
@@ -364,7 +339,7 @@
364
  </li>
365
  </ul>
366
  {#key topic}
367
- <TopicTraining topic={topic} />
368
  {/key}
369
  </div>
370
 
@@ -425,7 +400,7 @@
425
  clusters={clusters}
426
  model={personalized_model}
427
  data={cluster_results}
428
- table_width_pct={90}
429
  table_id={"main"}
430
  use_model={use_model}
431
  bind:evidence={evidence}
@@ -447,7 +422,7 @@
447
  <p>Next, you can optionally search for more comments to serve as evidence through manual keyword search (for individual words or phrases).</p>
448
  <div class="section_indent">
449
  {#key error_type}
450
- <KeywordSearch clusters={clusters} personalized_model={personalized_model} bind:evidence={evidence} use_model={use_model} on:change/>
451
  {/key}
452
  </div>
453
  </div>
@@ -457,7 +432,7 @@
457
  <div class="head_5">Finalize your current report</div>
458
  <p>Finally, review the report you've generated on the side panel and provide a brief summary of the problem you see. You may also list suggestions or insights into addressing this problem if you have ideas. This report will be directly used by the model developers to address the issue you've raised</p>
459
  </div>
460
-
461
  </div>
462
 
463
  <style>
 
7
  import HelpTooltip from "./HelpTooltip.svelte";
8
  import TopicTraining from "./TopicTraining.svelte";
9
 
 
10
  import { error_type } from './stores/error_type_store.js';
11
  import { topic_chosen } from './stores/cur_topic_store.js';
12
  import { model_chosen } from './stores/cur_model_store.js';
 
16
  import LayoutGrid, { Cell } from "@smui/layout-grid";
17
  import Radio from '@smui/radio';
18
  import FormField from '@smui/form-field';
 
19
  import{ Wrapper } from '@smui/tooltip';
20
  import IconButton from '@smui/icon-button';
 
21
  import Svelecte from '../node_modules/svelecte/src/Svelecte.svelte';
22
 
23
  export let personalized_model;
 
24
  export let cur_error_type = "Both";
25
+ export let cur_user;
26
 
27
  let evidence = [];
28
  let show_audit_settings = false;
 
51
  ]
52
 
53
  let personalized_models = [];
 
 
54
  let systems = ["YouSocial comment toxicity classifier"]; // Only one system for now
55
  let clusters = [];
56
  let clusters_for_tuning = []
 
70
  let audit_type;
71
  if (scaffold_method == "fixed" || scaffold_method == "personal" || scaffold_method == "personal_group" || scaffold_method == "personal_test" || scaffold_method == "personal_cluster" || scaffold_method == "topic_train" || scaffold_method == "prompts") {
72
  audit_type = audit_types[1];
 
73
  } else {
74
  // No scaffolding mode or tutorial
75
  audit_type = audit_types[0];
 
93
  use_group_model = true;
94
  }
95
 
 
96
  let promise_cluster = Promise.resolve(null);
97
 
 
 
 
 
 
 
 
 
 
 
98
  // Get current topic from store
99
  let topic;
100
  topic_chosen.subscribe(value => {
 
109
  if (!personalized_models.includes(personalized_model)) {
110
  personalized_models.push(personalized_model);
111
  }
112
+ getAuditResults();
 
113
  });
114
 
115
  // Save current error type
 
119
  handleClusterButton();
120
  }
121
 
 
 
 
122
  async function updateTopicChosen() {
123
  if (topic != null) {
 
124
  topic_chosen.update((value) => topic);
125
  }
126
  }
127
 
128
+ function getAuditResults() {
129
  let req_params = {
130
  user: cur_user,
131
  scaffold_method: scaffold_method,
 
135
  .then((r) => r.text())
136
  .then(function (r_orig) {
137
  let r = JSON.parse(r_orig);
 
 
138
  personalized_models = r["personalized_models"];
139
  if (use_group_model) {
140
  let personalized_model_grp = r["personalized_model_grp"];
 
146
  model_chosen.update((value) => personalized_model);
147
  clusters = r["clusters"];
148
  clusters_for_tuning = r["clusters_for_tuning"];
 
149
  topic = clusters[0]["options"][0]["text"];
150
  topic_chosen.update((value) => topic);
151
+ handleAuditButton();
152
+ handleClusterButton();
153
  });
154
  }
155
  onMount(async () => {
156
+ getAuditResults();
157
  });
158
 
159
  function handleAuditButton() {
160
  model_chosen.update((value) => personalized_model);
161
+ if (personalized_model == "" || personalized_model == undefined) {
162
+ return;
163
+ }
164
+ promise = getAudit(personalized_model);
165
  }
166
 
167
+ async function getAudit(pers_model) {
168
  let req_params = {
169
+ pers_model: pers_model,
 
170
  perf_metric: "avg_diff",
171
  breakdown_sort: "difference",
172
  n_topics: 10,
 
182
  }
183
 
184
  function handleClusterButton() {
185
+ promise_cluster = getCluster(personalized_model);
186
  }
187
 
188
+ async function getCluster(pers_model) {
189
+ if (pers_model == "" || pers_model == undefined) {
190
  return null;
191
  }
192
  let req_params = {
193
  cluster: topic,
194
  topic_df_ids: [],
195
+ cur_user: cur_user,
196
+ pers_model: pers_model,
197
  example_sort: "descending", // TEMP
198
  comparison_group: "status_quo", // TEMP
199
  search_type: "cluster",
200
  keyword: "",
 
201
  error_type: cur_error_type,
202
  use_model: use_model,
203
  scaffold_method: scaffold_method,
 
206
  const response = await fetch("./get_cluster_results?" + params);
207
  const text = await response.text();
208
  const data = JSON.parse(text);
 
209
  return data;
210
  }
211
  </script>
 
215
  <div>
216
  <div style="margin-top: 30px">
217
  <span class="head_3">Auditing</span>
 
 
 
 
 
 
 
218
  </div>
219
  <div style="width: 80%">
220
+ {#if personalized_model}
221
  <p>In this section, we'll be auditing the content moderation system. Here, you’ll be aided by a personalized model that will help direct your attention towards potential problem areas in the model’s performance. This model isn’t meant to be perfect, but is designed to help you better focus on areas that need human review.</p>
222
+ {:else}
223
+ <p>Please first train your personalized model by following the steps in the "Labeling" tab (click the top left tab above).</p>
224
+ {/if}
225
  </div>
226
 
227
  {#if show_audit_settings}
 
271
  </LayoutGrid>
272
  </div>
273
  </div>
274
+ {/if}
275
+ {#if personalized_model}
276
  <p>Current model: {personalized_model}</p>
277
  {/if}
278
  </div>
279
 
280
  <!-- 1: All topics overview -->
281
+ {#if personalized_model}
282
  {#if audit_type == audit_types[0]}
283
  <div class="audit_section">
284
  <div class="head_5">Overview of all topics</div>
 
339
  </li>
340
  </ul>
341
  {#key topic}
342
+ <TopicTraining topic={topic} cur_user={cur_user}/>
343
  {/key}
344
  </div>
345
 
 
400
  clusters={clusters}
401
  model={personalized_model}
402
  data={cluster_results}
403
+ table_width_pct={100}
404
  table_id={"main"}
405
  use_model={use_model}
406
  bind:evidence={evidence}
 
422
  <p>Next, you can optionally search for more comments to serve as evidence through manual keyword search (for individual words or phrases).</p>
423
  <div class="section_indent">
424
  {#key error_type}
425
+ <KeywordSearch clusters={clusters} personalized_model={personalized_model} cur_user={cur_user} bind:evidence={evidence} use_model={use_model} on:change/>
426
  {/key}
427
  </div>
428
  </div>
 
432
  <div class="head_5">Finalize your current report</div>
433
  <p>Finally, review the report you've generated on the side panel and provide a brief summary of the problem you see. You may also list suggestions or insights into addressing this problem if you have ideas. This report will be directly used by the model developers to address the issue you've raised</p>
434
  </div>
435
+ {/if}
436
  </div>
437
 
438
  <style>
indie_label_svelte/src/ClusterResults.svelte CHANGED
@@ -35,6 +35,7 @@
35
  export let evidence;
36
  export let table_id;
37
  export let use_model = true;
 
38
 
39
  let N_COMMENTS = 500;
40
  let show_num_ratings = false;
@@ -54,12 +55,10 @@
54
  //your code goes here on location change
55
  let cur_url = window.location.href;
56
  let cur_url_elems = cur_url.split("#");
57
- // console.log(cur_url_elems)
58
  if (cur_url_elems.length > 0) {
59
  let path = cur_url_elems[2];
60
  if (path == "comment") {
61
  let comment_id = cur_url_elems[1].split("/")[0];
62
- console.log("comment_id", comment_id)
63
  selected_comment_id = parseInt(comment_id);
64
  let table_ind = null;
65
  for (let i = 0; i < items.length; i++) {
@@ -129,7 +128,6 @@
129
  items = data["cluster_comments"];
130
  set_length = items.length;
131
  }
132
- // console.log(set_length);
133
 
134
  let cur_open_evidence;
135
  open_evidence.subscribe(value => {
@@ -323,8 +321,10 @@
323
  <Cell>Potential toxicity<br>categories</Cell>
324
  {/if}
325
  {/if}
326
-
 
327
  <Cell>Do you agree<br>with the system?</Cell>
 
328
 
329
  {#if !show_checkboxes}
330
  <Cell>Remove</Cell>
@@ -396,7 +396,8 @@
396
  </Cell>
397
  {/if}
398
  {/if}
399
-
 
400
  <Cell>
401
  <div>
402
  <FormField>
@@ -417,6 +418,7 @@
417
  </FormField>
418
  </div>
419
  </Cell>
 
420
 
421
  {#if !show_checkboxes}
422
  <Cell>
 
35
  export let evidence;
36
  export let table_id;
37
  export let use_model = true;
38
+ export let show_agree_disagree = false;
39
 
40
  let N_COMMENTS = 500;
41
  let show_num_ratings = false;
 
55
  //your code goes here on location change
56
  let cur_url = window.location.href;
57
  let cur_url_elems = cur_url.split("#");
 
58
  if (cur_url_elems.length > 0) {
59
  let path = cur_url_elems[2];
60
  if (path == "comment") {
61
  let comment_id = cur_url_elems[1].split("/")[0];
 
62
  selected_comment_id = parseInt(comment_id);
63
  let table_ind = null;
64
  for (let i = 0; i < items.length; i++) {
 
128
  items = data["cluster_comments"];
129
  set_length = items.length;
130
  }
 
131
 
132
  let cur_open_evidence;
133
  open_evidence.subscribe(value => {
 
321
  <Cell>Potential toxicity<br>categories</Cell>
322
  {/if}
323
  {/if}
324
+
325
+ {#if show_agree_disagree}
326
  <Cell>Do you agree<br>with the system?</Cell>
327
+ {/if}
328
 
329
  {#if !show_checkboxes}
330
  <Cell>Remove</Cell>
 
396
  </Cell>
397
  {/if}
398
  {/if}
399
+
400
+ {#if show_agree_disagree}
401
  <Cell>
402
  <div>
403
  <FormField>
 
418
  </FormField>
419
  </div>
420
  </Cell>
421
+ {/if}
422
 
423
  {#if !show_checkboxes}
424
  <Cell>
indie_label_svelte/src/CommentTable.svelte CHANGED
@@ -4,22 +4,18 @@
4
  import Button, { Label } from "@smui/button";
5
  import DataTable, { Head, Body, Row, Cell } from "@smui/data-table";
6
  import LinearProgress from '@smui/linear-progress';
7
- import IconButton from '@smui/icon-button';
8
- import { user } from './stores/cur_user_store.js';
9
 
10
  export let mode;
11
  export let model_name;
 
12
 
13
  let to_label = {};
14
  let promise = Promise.resolve(null);
15
  let n_complete_ratings;
16
  let n_unsure_ratings;
17
-
18
- // Get current user
19
- let cur_user;
20
- user.subscribe(value => {
21
- cur_user = value;
22
- });
23
 
24
  function getCommentsToLabel(cur_mode, n) {
25
  if (cur_mode == "train") {
@@ -48,6 +44,7 @@
48
  }
49
 
50
  function handleTrainModelButton() {
 
51
  promise = getModel("train");
52
  }
53
 
@@ -91,11 +88,14 @@
91
  user: cur_user,
92
  };
93
  let params = new URLSearchParams(req_params).toString();
94
- const response = await fetch("./get_personalized_model?" + params);
95
- const text = await response.text();
96
- const data = JSON.parse(text);
97
- to_label = data["ratings_prev"];
98
- console.log(data);
 
 
 
99
  return data;
100
  }
101
  </script>
@@ -221,12 +221,14 @@
221
  {/key}
222
 
223
  <div class="spacing_vert_40">
224
- <Button on:click={handleTrainModelButton} variant="outlined" disabled={(!n_complete_ratings) || (n_complete_ratings < 40)}>
225
  <Label>Train Model</Label>
226
  </Button>
 
227
  <Button on:click={getCompleteRatings} variant="outlined">
228
  <Label>Get Number of Comments Labeled</Label>
229
  </Button>
 
230
  <Button on:click={() => handleLoadCommentsButton(5)} variant="outlined">
231
  <Label>Fetch More Comments To Label</Label>
232
  </Button>
 
4
  import Button, { Label } from "@smui/button";
5
  import DataTable, { Head, Body, Row, Cell } from "@smui/data-table";
6
  import LinearProgress from '@smui/linear-progress';
7
+
8
+ import { model_chosen } from './stores/cur_model_store.js';
9
 
10
  export let mode;
11
  export let model_name;
12
+ export let cur_user;
13
 
14
  let to_label = {};
15
  let promise = Promise.resolve(null);
16
  let n_complete_ratings;
17
  let n_unsure_ratings;
18
+ let show_comments_labeled_count = false;
 
 
 
 
 
19
 
20
  function getCommentsToLabel(cur_mode, n) {
21
  if (cur_mode == "train") {
 
44
  }
45
 
46
  function handleTrainModelButton() {
47
+ getCompleteRatings();
48
  promise = getModel("train");
49
  }
50
 
 
88
  user: cur_user,
89
  };
90
  let params = new URLSearchParams(req_params).toString();
91
+ const data = await fetch("./get_personalized_model?" + params)
92
+ .then((r) => r.text())
93
+ .then(function (text) {
94
+ let data = JSON.parse(text);
95
+ to_label = data["ratings_prev"];
96
+ model_chosen.update((value) => model_name);
97
+ return data;
98
+ });
99
  return data;
100
  }
101
  </script>
 
221
  {/key}
222
 
223
  <div class="spacing_vert_40">
224
+ <Button on:click={handleTrainModelButton} variant="outlined">
225
  <Label>Train Model</Label>
226
  </Button>
227
+ {#if show_comments_labeled_count}
228
  <Button on:click={getCompleteRatings} variant="outlined">
229
  <Label>Get Number of Comments Labeled</Label>
230
  </Button>
231
+ {/if}
232
  <Button on:click={() => handleLoadCommentsButton(5)} variant="outlined">
233
  <Label>Fetch More Comments To Label</Label>
234
  </Button>
indie_label_svelte/src/Explore.svelte CHANGED
@@ -48,7 +48,6 @@
48
  const text = await response.text();
49
  const data = JSON.parse(text);
50
  cur_examples = JSON.parse(data["examples"]);
51
- console.log(cur_examples); // TEMP
52
  return true;
53
  }
54
  </script>
 
48
  const text = await response.text();
49
  const data = JSON.parse(text);
50
  cur_examples = JSON.parse(data["examples"]);
 
51
  return true;
52
  }
53
  </script>
indie_label_svelte/src/Hunch.svelte CHANGED
@@ -1,9 +1,7 @@
1
  <script lang="ts">
2
  import { onMount } from "svelte";
3
- import IterativeClustering from "./IterativeClustering.svelte";
4
  import Button, { Label } from "@smui/button";
5
  import Textfield from '@smui/textfield';
6
- import LinearProgress from "@smui/linear-progress";
7
 
8
  export let ind;
9
  export let hunch;
@@ -32,7 +30,6 @@
32
 
33
  <div>
34
  <div>
35
- <!-- <h6>Hunch {ind + 1}</h6> -->
36
  <h6>Topic:</h6>
37
  {topic}
38
  </div>
@@ -46,13 +43,6 @@
46
  label="My current hunch is that..."
47
  >
48
  </Textfield>
49
- <!-- <Button
50
- on:click={handleTestOnExamples}
51
- class="button_float_right spacing_vert"
52
- variant="outlined"
53
- >
54
- <Label>Test on examples</Label>
55
- </Button> -->
56
  </div>
57
 
58
  <div class="spacing_vert">
@@ -63,23 +53,7 @@
63
  <Label>Submit</Label>
64
  </Button>
65
  </div>
66
-
67
- <!-- {#await example_block}
68
- <div class="app_loading">
69
- <LinearProgress indeterminate />
70
- </div>
71
- {:then} -->
72
- <!-- {#if example_block}
73
- <IterativeClustering clusters={clusters} ind={ind + 1} personalized_model={model} />
74
- {/if} -->
75
- <!-- {:catch error}
76
- <p style="color: red">{error.message}</p>
77
- {/await} -->
78
  </div>
79
 
80
  <style>
81
- /* * {
82
- z-index: 11;
83
- overflow-x: hidden;
84
- } */
85
  </style>
 
1
  <script lang="ts">
2
  import { onMount } from "svelte";
 
3
  import Button, { Label } from "@smui/button";
4
  import Textfield from '@smui/textfield';
 
5
 
6
  export let ind;
7
  export let hunch;
 
30
 
31
  <div>
32
  <div>
 
33
  <h6>Topic:</h6>
34
  {topic}
35
  </div>
 
43
  label="My current hunch is that..."
44
  >
45
  </Textfield>
 
 
 
 
 
 
 
46
  </div>
47
 
48
  <div class="spacing_vert">
 
53
  <Label>Submit</Label>
54
  </Button>
55
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
56
  </div>
57
 
58
  <style>
 
 
 
 
59
  </style>
indie_label_svelte/src/HypothesisPanel.svelte CHANGED
@@ -1,12 +1,10 @@
1
  <script lang="ts">
2
  import { onMount } from "svelte";
3
  import ClusterResults from "./ClusterResults.svelte";
4
- import HelpTooltip from "./HelpTooltip.svelte";
5
 
6
  import Button, { Label } from "@smui/button";
7
  import Textfield from '@smui/textfield';
8
- import { user } from './stores/cur_user_store.js';
9
- import { error_type } from './stores/error_type_store.js';
10
  import { new_evidence } from './stores/new_evidence_store.js';
11
  import { open_evidence } from './stores/open_evidence_store.js';
12
  import { topic_chosen } from './stores/cur_topic_store.js';
@@ -24,37 +22,27 @@
24
  import Checkbox from '@smui/checkbox';
25
  import FormField from '@smui/form-field';
26
  import IconButton from "@smui/icon-button";
27
- import{ Wrapper } from '@smui/tooltip';
28
  import Radio from '@smui/radio';
29
- import Switch from '@smui/switch';
30
 
31
  export let model;
32
- // export let topic;
33
- export let user_dialog_open;
34
 
35
  let all_reports = [];
36
-
37
- let cur_user;
38
- user.subscribe(value => {
39
- cur_user = value;
40
- });
41
-
42
  let cur_topic;
43
  topic_chosen.subscribe(value => {
44
  cur_topic = value;
45
  });
 
 
46
 
47
  // Handle routing
48
  let searchParams = new URLSearchParams(window.location.search);
49
  let scaffold_method = searchParams.get("scaffold");
 
 
 
50
  let topic_vis_method = searchParams.get("topic_vis_method");
51
 
52
- // TODO: connect to selected["error_type"] so changes on main panel affect report panel
53
- // let cur_error_type;
54
- // error_type.subscribe(value => {
55
- // cur_error_type = value;
56
- // });
57
-
58
  // Handle drawer
59
  let open = false;
60
  let selected = null;
@@ -69,8 +57,6 @@
69
  cur_open_evidence = selected["evidence"];
70
  open_evidence.update((value) => cur_open_evidence);
71
  let isolated_topic = selected["title"].replace(/^(Topic: )/,'');
72
- console.log("selected title", selected["title"]);
73
- console.log(selected);
74
 
75
  // Close panel
76
  open = false;
@@ -151,6 +137,7 @@
151
  cur_user: cur_user,
152
  reports: JSON.stringify(all_reports),
153
  scaffold_method: scaffold_method,
 
154
  };
155
  let params = new URLSearchParams(req_params).toString();
156
  const response = await fetch("./save_reports?" + params);
@@ -218,328 +205,309 @@
218
  // Save current error type
219
  async function updateErrorType() {
220
  // Update error type on main page to be the selected error type
221
- // error_type.update((value) => cur_error_type);
222
- // selected["error_type"] = cur_error_type;
223
  editErrorType = false;
224
  }
225
 
 
 
 
 
 
 
 
 
 
226
  </script>
227
 
228
- <div class="hypothesis_panel">
229
- <div class="panel_header">
230
- <div class="panel_header_content">
231
- <div class="page_header">
232
- <!-- <span class="page_title">IndieLabel</span> -->
233
- <img src="/logo.png" style="height: 60px; padding: 0px 20px;" alt="IndieLabel" />
234
- <Button on:click={() => (user_dialog_open = true)} class="user_button" color="secondary" style="margin: 12px 10px;" >
235
- <Label>User: {cur_user}</Label>
236
- </Button>
237
- </div>
238
- <div class="hypotheses_header">
239
- <h5 style="float: left; margin: 0; padding: 5px 20px;">Your Audit Reports</h5>
240
- <Button
241
- on:click={() => (open = !open)}
242
- color="primary"
243
- style="float: right; padding: 10px; margin-right: 10px;"
244
- >
245
- {#if open}
246
- <Label>Close</Label>
247
- {:else}
248
- {#key unfinished_count}
249
- <Label>Unfinished reports ({unfinished_count})</Label>
250
- {/key}
251
- {/if}
252
- </Button>
 
 
 
 
 
 
 
 
 
253
  </div>
254
  </div>
255
- </div>
256
 
257
- <div class="panel_contents">
258
- <!-- Drawer -->
259
- {#await promise}
260
- <div class="app_loading_fullwidth">
261
- <LinearProgress indeterminate />
262
- </div>
263
- {:then reports}
264
- {#if reports}
265
- <div class="drawer-container">
266
- {#key open}
267
- <Drawer variant="dismissible" bind:open>
268
- <Header>
269
- <Title>Your Reports</Title>
270
- <Subtitle>Select a report to view.</Subtitle>
271
- </Header>
272
- <Content>
273
- <List twoLine>
274
- {#each reports as report}
275
- <Item
276
- href="javascript:void(0)"
277
- on:click={() => setActive(report)}
278
- activated={selected === report}
279
- >
280
- {#if report["complete_status"]}
281
- <Graphic class="material-icons" aria-hidden="true">task_alt</Graphic>
282
- {:else}
283
- <Graphic class="material-icons" aria-hidden="true">radio_button_unchecked</Graphic>
284
- {/if}
285
- <Text>
286
- <PrimaryText>
287
- {report["title"]}
288
- </PrimaryText>
289
- <SecondaryText>
290
- {report["error_type"]}
291
- </SecondaryText>
292
- </Text>
293
- </Item>
294
- {/each}
295
- </List>
296
- </Content>
297
- </Drawer>
298
- {/key}
299
- <AppContent class="app-content">
300
- <main class="main-content">
301
- {#if selected}
302
- <div class="head_6_highlight">
303
- Current Report
304
- </div>
305
- <div class="panel_contents2">
306
- <!-- Title -->
307
- <div class="spacing_vert">
308
- <div class="edit_button_row">
309
- {#if editTitle}
310
- <div class="edit_button_row_input">
311
- <Textfield
312
- bind:value={selected["title"]}
313
- label="Your report title"
314
- input$rows={4}
315
- textarea
316
- variant="outlined"
317
- style="width: 100%;"
318
- helperLine$style="width: 100%;"
319
- />
320
- </div>
321
- <div>
322
- <IconButton class="material-icons grey_button" size="button" on:click={() => (editTitle = false)}>
323
- check
324
- </IconButton>
325
- </div>
326
- {:else}
327
- {#if selected["title"] != ""}
328
- <div class="head_5">
329
- {selected["title"]}
330
- </div>
331
  {:else}
332
- <div class="grey_text">Enter a report title</div>
333
  {/if}
334
-
335
- <div>
336
- <IconButton class="material-icons grey_button" size="button" on:click={() => (editTitle = true)}>
337
- create
338
- </IconButton>
339
- </div>
340
- {/if}
341
- </div>
 
 
 
 
 
 
 
 
 
 
 
342
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
343
 
344
- <!-- Error type -->
345
- <div class="spacing_vert_40">
346
- <div class="head_6">
347
- <b>Error Type</b>
 
 
 
348
  </div>
349
- <div class="edit_button_row">
350
- {#if editErrorType}
351
- <div>
352
- {#each error_type_options as e}
353
- <div style="display: flex; align-items: center;">
354
- <!-- <Wrapper rich>
 
 
 
 
 
355
  <FormField>
356
  <Radio bind:group={selected["error_type"]} value={e.opt} on:change={updateErrorType} color="secondary" />
357
  <span slot="label">
358
- {e.opt}
359
- <IconButton class="material-icons" size="button" disabled>help_outline</IconButton>
360
  </span>
361
  </FormField>
362
- <HelpTooltip text={e.help} />
363
- </Wrapper> -->
364
-
365
- <FormField>
366
- <Radio bind:group={selected["error_type"]} value={e.opt} on:change={updateErrorType} color="secondary" />
367
- <span slot="label">
368
- <b>{e.opt}</b> {e.descr}
369
- </span>
370
- </FormField>
371
  </div>
372
- {/each}
373
- </div>
374
- <!-- <div>
375
- <IconButton class="material-icons grey_button" size="button" on:click={() => (editErrorType = false)}>
376
- check
377
- </IconButton>
378
- </div> -->
379
- {:else}
380
- {#if selected["error_type"] != ""}
381
  <div>
382
- <p>{selected["error_type"]}</p>
 
 
383
  </div>
384
- {:else}
385
- <div class="grey_text">Select an error type</div>
386
  {/if}
387
-
388
- <div>
389
- <IconButton class="material-icons grey_button" size="button" on:click={() => (editErrorType = true)}>
390
- create
391
- </IconButton>
392
- </div>
393
- {/if}
394
  </div>
395
- </div>
396
-
397
- <!-- Evidence -->
398
- <div class="spacing_vert_40">
399
- <div class="head_6">
400
- <b>Evidence</b>
401
- </div>
402
- {#key cur_open_evidence}
403
- <div>
404
- {#if cur_open_evidence.length > 0}
405
- <ClusterResults
406
- cluster={cur_topic}
407
- model={model}
408
- data={{"cluster_comments": cur_open_evidence}}
409
- show_vis={false}
410
- show_checkboxes={false}
411
- table_width_pct={100}
412
- rowsPerPage={25}
413
- table_id={"panel"}
414
- />
415
- {:else}
416
- <p class="grey_text">
417
- Add examples from the main panel to see them here!
418
- </p>
419
- {/if}
 
420
  </div>
421
- {/key}
422
- </div>
423
 
424
- <div class="spacing_vert_60">
425
- <div class="head_6">
426
- <b>Summary/Suggestions</b>
 
 
 
 
 
 
 
 
 
 
 
 
 
427
  </div>
428
- <div class="spacing_vert">
429
- <Textfield
430
- style="width: 100%;"
431
- helperLine$style="width: 100%;"
432
- input$rows={8}
433
- textarea
434
- bind:value={selected["text_entry"]}
435
- label="My current hunch is that..."
436
- >
437
- </Textfield>
438
- </div>
439
-
440
- </div>
441
 
442
- <div class="spacing_vert_40">
443
- <div class="head_6">
444
- <b>Mark report as complete?</b>
445
- <FormField>
446
- <Checkbox checked={selected["complete_status"]} on:change={handleMarkComplete} />
447
- </FormField>
 
 
448
  </div>
449
-
450
  </div>
451
- </div>
452
- {/if}
453
- </main>
454
- </AppContent>
455
- </div>
456
- {/if}
457
- {:catch error}
458
- <p style="color: red">{error.message}</p>
459
- {/await}
460
- </div>
 
 
 
 
 
 
 
 
 
 
461
 
462
- <div class="panel_footer">
463
- <div class="panel_footer_contents">
464
-
465
-
466
- <Button
467
- on:click={handleNewReport}
468
- variant="outlined"
469
- color="secondary"
470
- style=""
471
- >
472
- <Label>New</Label>
473
- </Button>
474
-
475
- <Button
476
- on:click={handleDeleteReport}
477
- variant="outlined"
478
- color="secondary"
479
- style=""
480
- >
481
- <Label>Delete</Label>
482
- </Button>
483
-
484
- <Button
485
- on:click={handleSaveReport}
486
- variant="outlined"
487
- color="secondary"
488
- >
489
- <Label>Save</Label>
490
- </Button>
491
-
492
- <div>
493
- <span style="color: grey"><i>Last saved:
494
- {#await promise_save}
495
- <CircularProgress style="height: 32px; width: 32px;" indeterminate />
496
- {:then result}
497
- {#if result}
498
- {new Date().toLocaleTimeString()}
499
- {:else}
500
-
501
- {/if}
502
- {:catch error}
503
- <p style="color: red">{error.message}</p>
504
- {/await}
505
- </i></span>
506
  </div>
507
  </div>
 
508
  </div>
509
-
510
- <!-- TEMP -->
511
- <!-- {#key model}
512
- <div>Model: {model}</div>
513
- {/key} -->
514
  </div>
515
 
516
  <style>
517
- /* Drawer */
518
- /* .drawer-container {
519
- position: relative;
520
- display: flex;
521
- height: 350px;
522
- max-width: 600px;
523
- border: 1px solid
524
- var(--mdc-theme-text-hint-on-background, rgba(0, 0, 0, 0.1));
525
- overflow: hidden;
526
- z-index: 0;
527
- }
528
-
529
- * :global(.app-content) {
530
- flex: auto;
531
- overflow: auto;
532
- position: relative;
533
- flex-grow: 1;
534
- }
535
-
536
- .main-content {
537
- overflow: auto;
538
- padding: 16px;
539
- height: 100%;
540
- box-sizing: border-box;
541
- } */
542
-
543
  .panel_contents {
544
  padding: 0 20px;
545
  overflow-y: auto;
@@ -578,6 +546,13 @@
578
  :global(.mdc-button.user_button) {
579
  float: right;
580
  margin-right: 20px;
 
 
 
 
 
 
 
581
  }
582
 
583
  .page_header {
 
1
  <script lang="ts">
2
  import { onMount } from "svelte";
3
  import ClusterResults from "./ClusterResults.svelte";
4
+ import SubmitReportDialog from "./SubmitReportDialog.svelte";
5
 
6
  import Button, { Label } from "@smui/button";
7
  import Textfield from '@smui/textfield';
 
 
8
  import { new_evidence } from './stores/new_evidence_store.js';
9
  import { open_evidence } from './stores/open_evidence_store.js';
10
  import { topic_chosen } from './stores/cur_topic_store.js';
 
22
  import Checkbox from '@smui/checkbox';
23
  import FormField from '@smui/form-field';
24
  import IconButton from "@smui/icon-button";
 
25
  import Radio from '@smui/radio';
 
26
 
27
  export let model;
28
+ export let cur_user;
 
29
 
30
  let all_reports = [];
 
 
 
 
 
 
31
  let cur_topic;
32
  topic_chosen.subscribe(value => {
33
  cur_topic = value;
34
  });
35
+ // Handle submit report dialog
36
+ let submit_dialog_open = false;
37
 
38
  // Handle routing
39
  let searchParams = new URLSearchParams(window.location.search);
40
  let scaffold_method = searchParams.get("scaffold");
41
+ if (scaffold_method == null) {
42
+ scaffold_method = "personal"; // Default to personalized model scaffold
43
+ }
44
  let topic_vis_method = searchParams.get("topic_vis_method");
45
 
 
 
 
 
 
 
46
  // Handle drawer
47
  let open = false;
48
  let selected = null;
 
57
  cur_open_evidence = selected["evidence"];
58
  open_evidence.update((value) => cur_open_evidence);
59
  let isolated_topic = selected["title"].replace(/^(Topic: )/,'');
 
 
60
 
61
  // Close panel
62
  open = false;
 
137
  cur_user: cur_user,
138
  reports: JSON.stringify(all_reports),
139
  scaffold_method: scaffold_method,
140
+ model: model,
141
  };
142
  let params = new URLSearchParams(req_params).toString();
143
  const response = await fetch("./save_reports?" + params);
 
205
  // Save current error type
206
  async function updateErrorType() {
207
  // Update error type on main page to be the selected error type
 
 
208
  editErrorType = false;
209
  }
210
 
211
+ let promise_submit = Promise.resolve(null);
212
+ function handleSubmitReport() {
213
+ promise_submit = submitReport();
214
+ }
215
+ async function submitReport() {
216
+ submit_dialog_open = true;
217
+ return true;
218
+ }
219
+
220
  </script>
221
 
222
+ <div>
223
+ {#await promise_submit}
224
+ <CircularProgress style="height: 32px; width: 32px;" indeterminate />
225
+ {:then}
226
+ <SubmitReportDialog bind:open={submit_dialog_open} cur_user={cur_user} all_reports={all_reports}/>
227
+ {:catch error}
228
+ <p style="color: red">{error.message}</p>
229
+ {/await}
230
+ <div class="hypothesis_panel">
231
+ <div class="panel_header">
232
+ <div class="panel_header_content">
233
+ <div class="page_header">
234
+ <img src="/logo.png" style="height: 50px; padding: 0px 20px;" alt="IndieLabel" />
235
+ <Button class="user_button" color="secondary" style="margin: 12px 10px;" >
236
+ <Label>User: {cur_user}</Label>
237
+ </Button>
238
+ </div>
239
+ <div class="hypotheses_header">
240
+ <h5 style="float: left; margin: 0; padding: 5px 20px;">Your Audit Reports</h5>
241
+ <Button
242
+ on:click={() => (open = !open)}
243
+ color="primary"
244
+ disabled={model == null}
245
+ style="float: right; padding: 10px; margin-right: 10px;"
246
+ >
247
+ {#if open}
248
+ <Label>Close</Label>
249
+ {:else}
250
+ {#key unfinished_count}
251
+ <Label>Unfinished reports ({unfinished_count})</Label>
252
+ {/key}
253
+ {/if}
254
+ </Button>
255
+ </div>
256
  </div>
257
  </div>
 
258
 
259
+ {#if model == null}
260
+ <div class="panel_contents">
261
+ <p>You can start to author audit reports in this panel after you've trained your personalized model in the "Labeling" tab.</p>
262
+ </div>
263
+ {:else}
264
+ <div class="panel_contents">
265
+ <!-- Drawer -->
266
+ {#await promise}
267
+ <div class="app_loading_fullwidth">
268
+ <LinearProgress indeterminate />
269
+ </div>
270
+ {:then reports}
271
+ {#if reports}
272
+ <div class="drawer-container">
273
+ {#key open}
274
+ <Drawer variant="dismissible" bind:open>
275
+ <Header>
276
+ <Title>Your Reports</Title>
277
+ <Subtitle>Select a report to view.</Subtitle>
278
+ </Header>
279
+ <Content>
280
+ <List twoLine>
281
+ {#each reports as report}
282
+ <Item
283
+ href="javascript:void(0)"
284
+ on:click={() => setActive(report)}
285
+ activated={selected === report}
286
+ >
287
+ {#if report["complete_status"]}
288
+ <Graphic class="material-icons" aria-hidden="true">task_alt</Graphic>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
  {:else}
290
+ <Graphic class="material-icons" aria-hidden="true">radio_button_unchecked</Graphic>
291
  {/if}
292
+ <Text>
293
+ <PrimaryText>
294
+ {report["title"]}
295
+ </PrimaryText>
296
+ <SecondaryText>
297
+ {report["error_type"]}
298
+ </SecondaryText>
299
+ </Text>
300
+ </Item>
301
+ {/each}
302
+ </List>
303
+ </Content>
304
+ </Drawer>
305
+ {/key}
306
+ <AppContent class="app-content">
307
+ <main class="main-content">
308
+ {#if selected}
309
+ <div class="head_6_highlight">
310
+ Current Report
311
  </div>
312
+ <div class="panel_contents2">
313
+ <!-- Title -->
314
+ <div class="spacing_vert">
315
+ <div class="edit_button_row">
316
+ {#if editTitle}
317
+ <div class="edit_button_row_input">
318
+ <Textfield
319
+ bind:value={selected["title"]}
320
+ label="Your report title"
321
+ input$rows={4}
322
+ textarea
323
+ variant="outlined"
324
+ style="width: 100%;"
325
+ helperLine$style="width: 100%;"
326
+ />
327
+ </div>
328
+ <div>
329
+ <IconButton class="material-icons grey_button" size="button" on:click={() => (editTitle = false)}>
330
+ check
331
+ </IconButton>
332
+ </div>
333
+ {:else}
334
+ {#if selected["title"] != ""}
335
+ <div class="head_5">
336
+ {selected["title"]}
337
+ </div>
338
+ {:else}
339
+ <div class="grey_text">Enter a report title</div>
340
+ {/if}
341
 
342
+ <div>
343
+ <IconButton class="material-icons grey_button" size="button" on:click={() => (editTitle = true)}>
344
+ create
345
+ </IconButton>
346
+ </div>
347
+ {/if}
348
+ </div>
349
  </div>
350
+
351
+ <!-- Error type -->
352
+ <div class="spacing_vert_40">
353
+ <div class="head_6">
354
+ <b>Error Type</b>
355
+ </div>
356
+ <div class="edit_button_row">
357
+ {#if editErrorType}
358
+ <div>
359
+ {#each error_type_options as e}
360
+ <div style="display: flex; align-items: center;">
361
  <FormField>
362
  <Radio bind:group={selected["error_type"]} value={e.opt} on:change={updateErrorType} color="secondary" />
363
  <span slot="label">
364
+ <b>{e.opt}</b> {e.descr}
 
365
  </span>
366
  </FormField>
367
+ </div>
368
+ {/each}
369
+ </div>
370
+ {:else}
371
+ {#if selected["error_type"] != ""}
372
+ <div>
373
+ <p>{selected["error_type"]}</p>
 
 
374
  </div>
375
+ {:else}
376
+ <div class="grey_text">Select an error type</div>
377
+ {/if}
378
+
 
 
 
 
 
379
  <div>
380
+ <IconButton class="material-icons grey_button" size="button" on:click={() => (editErrorType = true)}>
381
+ create
382
+ </IconButton>
383
  </div>
 
 
384
  {/if}
385
+ </div>
 
 
 
 
 
 
386
  </div>
387
+
388
+ <!-- Evidence -->
389
+ <div class="spacing_vert_40">
390
+ <div class="head_6">
391
+ <b>Evidence</b>
392
+ </div>
393
+ {#key cur_open_evidence}
394
+ <div>
395
+ {#if cur_open_evidence.length > 0}
396
+ <ClusterResults
397
+ cluster={cur_topic}
398
+ model={model}
399
+ data={{"cluster_comments": cur_open_evidence}}
400
+ show_vis={false}
401
+ show_checkboxes={false}
402
+ table_width_pct={100}
403
+ rowsPerPage={25}
404
+ table_id={"panel"}
405
+ />
406
+ {:else}
407
+ <p class="grey_text">
408
+ Add examples from the main panel to see them here!
409
+ </p>
410
+ {/if}
411
+ </div>
412
+ {/key}
413
  </div>
 
 
414
 
415
+ <div class="spacing_vert_60">
416
+ <div class="head_6">
417
+ <b>Summary/Suggestions</b>
418
+ </div>
419
+ <div class="spacing_vert">
420
+ <Textfield
421
+ style="width: 100%;"
422
+ helperLine$style="width: 100%;"
423
+ input$rows={8}
424
+ textarea
425
+ bind:value={selected["text_entry"]}
426
+ label="My current hunch is that..."
427
+ >
428
+ </Textfield>
429
+ </div>
430
+
431
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
432
 
433
+ <div class="spacing_vert_40">
434
+ <div class="head_6">
435
+ <b>Mark report as complete?</b>
436
+ <FormField>
437
+ <Checkbox checked={selected["complete_status"]} on:change={handleMarkComplete} />
438
+ </FormField>
439
+ </div>
440
+
441
  </div>
 
442
  </div>
443
+ {/if}
444
+ </main>
445
+ </AppContent>
446
+ </div>
447
+ {/if}
448
+ {:catch error}
449
+ <p style="color: red">{error.message}</p>
450
+ {/await}
451
+ </div>
452
+
453
+ <div class="panel_footer">
454
+ <div class="panel_footer_contents">
455
+ <Button
456
+ on:click={handleNewReport}
457
+ variant="outlined"
458
+ color="secondary"
459
+ style=""
460
+ >
461
+ <Label>New</Label>
462
+ </Button>
463
 
464
+ <!-- <Button
465
+ on:click={handleDeleteReport}
466
+ variant="outlined"
467
+ color="secondary"
468
+ style=""
469
+ >
470
+ <Label>Delete</Label>
471
+ </Button> -->
472
+
473
+ <Button
474
+ on:click={handleSaveReport}
475
+ variant="outlined"
476
+ color="secondary"
477
+ >
478
+ <Label>Save</Label>
479
+ </Button>
480
+
481
+ <Button
482
+ on:click={handleSubmitReport}
483
+ variant="outlined"
484
+ color="secondary"
485
+ >
486
+ <Label>Send Reports</Label>
487
+ </Button>
488
+
489
+ <div>
490
+ <span style="color: grey"><i>Last saved:
491
+ {#await promise_save}
492
+ <CircularProgress style="height: 32px; width: 32px;" indeterminate />
493
+ {:then result}
494
+ {#if result}
495
+ {new Date().toLocaleTimeString()}
496
+ {:else}
497
+
498
+ {/if}
499
+ {:catch error}
500
+ <p style="color: red">{error.message}</p>
501
+ {/await}
502
+ </i></span>
503
+ </div>
 
 
 
 
504
  </div>
505
  </div>
506
+ {/if}
507
  </div>
 
 
 
 
 
508
  </div>
509
 
510
  <style>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
511
  .panel_contents {
512
  padding: 0 20px;
513
  overflow-y: auto;
 
546
  :global(.mdc-button.user_button) {
547
  float: right;
548
  margin-right: 20px;
549
+ max-width: 200px;
550
+ }
551
+
552
+ :global(.mdc-button.user_button span) {
553
+ text-overflow: ellipsis;
554
+ white-space: nowrap;
555
+ overflow: hidden;
556
  }
557
 
558
  .page_header {
indie_label_svelte/src/IterativeClustering.svelte DELETED
@@ -1,164 +0,0 @@
1
- <script>
2
- import Section from "./Section.svelte";
3
- import ClusterResults from "./ClusterResults.svelte";
4
- import Button, { Label } from "@smui/button";
5
- import Textfield from "@smui/textfield";
6
- import LayoutGrid, { Cell } from "@smui/layout-grid";
7
- import LinearProgress from "@smui/linear-progress";
8
- import Chip, { Set, Text } from '@smui/chips';
9
-
10
- export let clusters;
11
- export let personalized_model;
12
- export let evidence;
13
- export let width_pct = 80;
14
-
15
- let topic_df_ids = [];
16
- let promise_iter_cluster = Promise.resolve(null);
17
- let keyword = null;
18
- let n_neighbors = null;
19
- let cur_iter_cluster = null;
20
- let history = [];
21
-
22
- async function getIterCluster(search_type) {
23
- let req_params = {
24
- cluster: cur_iter_cluster,
25
- topic_df_ids: topic_df_ids,
26
- n_examples: 500, // TEMP
27
- pers_model: personalized_model,
28
- example_sort: "descending", // TEMP
29
- comparison_group: "status_quo", // TEMP
30
- search_type: search_type,
31
- keyword: keyword,
32
- n_neighbors: n_neighbors,
33
- };
34
- console.log("topic_df_ids", topic_df_ids);
35
- let params = new URLSearchParams(req_params).toString();
36
- const response = await fetch("./get_cluster_results?" + params);
37
- const text = await response.text();
38
- const data = JSON.parse(text);
39
- // if (data["cluster_comments"] == null) {
40
- // return false
41
- // }
42
- topic_df_ids = data["topic_df_ids"];
43
- return data;
44
- }
45
-
46
- function findCluster() {
47
- promise_iter_cluster = getIterCluster("cluster");
48
- history = history.concat("bulk-add cluster: " + cur_iter_cluster);
49
- }
50
-
51
- function findNeighbors() {
52
- promise_iter_cluster = getIterCluster("neighbors");
53
- history = history.concat("find " + n_neighbors + " neighbors");
54
- }
55
-
56
- function findKeywords() {
57
- promise_iter_cluster = getIterCluster("keyword");
58
- history = history.concat("keyword search: " + keyword);
59
- }
60
- </script>
61
-
62
- <div>
63
- <div>
64
- <!-- <h6>Hunch {ind} examples</h6> -->
65
- <div>
66
- <h6>Search Settings</h6>
67
- <!-- Start with cluster -->
68
- <!-- <div class="">
69
- <Section
70
- section_id="iter_cluster"
71
- section_title="Bulk-add cluster"
72
- section_opts={clusters}
73
- bind:value={cur_iter_cluster}
74
- width_pct={100}
75
- />
76
- <Button
77
- on:click={findCluster}
78
- variant="outlined"
79
- class="button_float_right"
80
- disabled={cur_iter_cluster == null}
81
- >
82
- <Label>Search</Label>
83
- </Button>
84
- </div> -->
85
-
86
- <!-- Manual keyword -->
87
- <div class="spacing_vert">
88
- <Textfield
89
- bind:value={keyword}
90
- label="Keyword search"
91
- variant="outlined"
92
- style="width: {width_pct}%"
93
- />
94
- <Button
95
- on:click={findKeywords}
96
- variant="outlined"
97
- class="button_float_right spacing_vert"
98
- disabled={keyword == null}
99
- >
100
- <Label>Search</Label>
101
- </Button>
102
- </div>
103
-
104
- <!-- Find neighbors of current set -->
105
- <div class="spacing_vert">
106
- <Textfield
107
- bind:value={n_neighbors}
108
- label="Number of neighbors to retrieve"
109
- type="number"
110
- min="1"
111
- max="50"
112
- variant="outlined"
113
- style="width: {width_pct}%"
114
- />
115
- <Button
116
- on:click={findNeighbors}
117
- variant="outlined"
118
- class="button_float_right spacing_vert"
119
- disabled={n_neighbors == null}
120
- >
121
- <Label>Search</Label>
122
- </Button>
123
- </div>
124
- </div>
125
- </div>
126
-
127
- {#await promise_iter_cluster}
128
- <div class="app_loading" style="width: {width_pct}%">
129
- <LinearProgress indeterminate />
130
- </div>
131
- {:then iter_cluster_results}
132
- {#if iter_cluster_results}
133
- {#if history.length > 0}
134
- <div class="bold" style="padding-top:40px;">Search History</div>
135
- <Set chips={history} let:chip choice>
136
- <Chip {chip}>
137
- <Text>{chip}</Text>
138
- </Chip>
139
- </Set>
140
- {/if}
141
- {#if iter_cluster_results.cluster_comments != null}
142
- <ClusterResults
143
- cluster={""}
144
- clusters={clusters}
145
- model={personalized_model}
146
- data={iter_cluster_results}
147
- show_vis={false}
148
- table_width_pct={80}
149
- bind:evidence={evidence}
150
- on:change
151
- />
152
- {:else}
153
- <div class="bold" style="padding-top:40px;">
154
- No results found
155
- </div>
156
- {/if}
157
- {/if}
158
- {:catch error}
159
- <p style="color: red">{error.message}</p>
160
- {/await}
161
- </div>
162
-
163
- <style>
164
- </style>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
indie_label_svelte/src/KeywordSearch.svelte CHANGED
@@ -4,12 +4,11 @@
4
 
5
  import Button, { Label } from "@smui/button";
6
  import Textfield from "@smui/textfield";
7
- import LinearProgress from "@smui/linear-progress";
8
- import Chip, { Set, Text } from '@smui/chips';
9
-
10
 
11
  export let clusters;
12
  export let personalized_model;
 
13
  export let evidence;
14
  export let width_pct = 80;
15
  export let use_model = true;
@@ -17,7 +16,6 @@
17
  let topic_df_ids = [];
18
  let promise_iter_cluster = Promise.resolve(null);
19
  let keyword = null;
20
- let n_neighbors = null;
21
  let cur_iter_cluster = null;
22
  let history = [];
23
 
@@ -30,23 +28,18 @@
30
  let req_params = {
31
  cluster: cur_iter_cluster,
32
  topic_df_ids: topic_df_ids,
33
- n_examples: 500, // TEMP
34
  pers_model: personalized_model,
35
  example_sort: "descending", // TEMP
36
  comparison_group: "status_quo", // TEMP
37
  search_type: search_type,
38
  keyword: keyword,
39
- n_neighbors: n_neighbors,
40
  error_type: cur_error_type,
41
  };
42
- console.log("topic_df_ids", topic_df_ids);
43
  let params = new URLSearchParams(req_params).toString();
44
  const response = await fetch("./get_cluster_results?" + params);
45
  const text = await response.text();
46
  const data = JSON.parse(text);
47
- // if (data["cluster_comments"] == null) {
48
- // return false
49
- // }
50
  topic_df_ids = data["topic_df_ids"];
51
  return data;
52
  }
 
4
 
5
  import Button, { Label } from "@smui/button";
6
  import Textfield from "@smui/textfield";
7
+ import LinearProgress from "@smui/linear-progress";
 
 
8
 
9
  export let clusters;
10
  export let personalized_model;
11
+ export let cur_user;
12
  export let evidence;
13
  export let width_pct = 80;
14
  export let use_model = true;
 
16
  let topic_df_ids = [];
17
  let promise_iter_cluster = Promise.resolve(null);
18
  let keyword = null;
 
19
  let cur_iter_cluster = null;
20
  let history = [];
21
 
 
28
  let req_params = {
29
  cluster: cur_iter_cluster,
30
  topic_df_ids: topic_df_ids,
31
+ cur_user: cur_user,
32
  pers_model: personalized_model,
33
  example_sort: "descending", // TEMP
34
  comparison_group: "status_quo", // TEMP
35
  search_type: search_type,
36
  keyword: keyword,
 
37
  error_type: cur_error_type,
38
  };
 
39
  let params = new URLSearchParams(req_params).toString();
40
  const response = await fetch("./get_cluster_results?" + params);
41
  const text = await response.text();
42
  const data = JSON.parse(text);
 
 
 
43
  topic_df_ids = data["topic_df_ids"];
44
  return data;
45
  }
indie_label_svelte/src/Labeling.svelte CHANGED
@@ -8,7 +8,8 @@
8
  import Button, { Label } from "@smui/button";
9
  import LinearProgress from '@smui/linear-progress';
10
  import Svelecte from '../node_modules/svelecte/src/Svelecte.svelte';
11
- import { user } from './stores/cur_user_store.js';
 
12
 
13
  let model_name = "";
14
  let personalized_models = [];
@@ -16,24 +17,13 @@
16
  let label_modes = [
17
  "Create a new model",
18
  "Edit an existing model",
19
- "Tune your model for a topic area",
20
- "Set up a group-based model",
21
  ];
22
 
23
  let clusters_for_tuning = [];
24
  let topic;
25
 
26
- // Get current user
27
- let cur_user;
28
- user.subscribe(value => {
29
- if (value != cur_user) {
30
- cur_user = value;
31
- personalized_models = [];
32
- getLabeling();
33
- }
34
- cur_user = value;
35
- });
36
-
37
  // Handle routing
38
  let label_mode = label_modes[0];
39
  let searchParams = new URLSearchParams(window.location.search);
@@ -43,8 +33,10 @@
43
  } else if (req_label_mode == 1) {
44
  label_mode = label_modes[1];
45
  } else if (req_label_mode == 2) {
 
46
  label_mode = label_modes[2];
47
  } else if (req_label_mode == 3) {
 
48
  label_mode = label_modes[3];
49
  }
50
 
@@ -101,7 +93,6 @@
101
  const response = await fetch("./get_group_model?" + params);
102
  const text = await response.text();
103
  const data = JSON.parse(text);
104
- console.log("getGroupModel", data);
105
  return data
106
  }
107
 
@@ -172,7 +163,7 @@
172
  </li>
173
  </ul>
174
 
175
- <CommentTable mode={"train"} model_name={model_name}/>
176
  </div>
177
  {:else if label_mode == label_modes[1]}
178
  <!-- EXISTING MODEL -->
@@ -202,7 +193,7 @@
202
  </li>
203
  </ul>
204
  {#key existing_model_name}
205
- <CommentTable mode={"view"} model_name={existing_model_name}/>
206
  {/key}
207
  </div>
208
  {:else if label_mode == label_modes[2]}
@@ -239,7 +230,7 @@
239
  </li>
240
  </ul>
241
  {#key topic}
242
- <TopicTraining topic={topic} model_name={model_name} />
243
  {/key}
244
  </div>
245
 
 
8
  import Button, { Label } from "@smui/button";
9
  import LinearProgress from '@smui/linear-progress';
10
  import Svelecte from '../node_modules/svelecte/src/Svelecte.svelte';
11
+
12
+ export let cur_user;
13
 
14
  let model_name = "";
15
  let personalized_models = [];
 
17
  let label_modes = [
18
  "Create a new model",
19
  "Edit an existing model",
20
+ // "Tune your model for a topic area",
21
+ // "Set up a group-based model",
22
  ];
23
 
24
  let clusters_for_tuning = [];
25
  let topic;
26
 
 
 
 
 
 
 
 
 
 
 
 
27
  // Handle routing
28
  let label_mode = label_modes[0];
29
  let searchParams = new URLSearchParams(window.location.search);
 
33
  } else if (req_label_mode == 1) {
34
  label_mode = label_modes[1];
35
  } else if (req_label_mode == 2) {
36
+ // Unused; previous topic-based mode
37
  label_mode = label_modes[2];
38
  } else if (req_label_mode == 3) {
39
+ // Unused; previous group-based mode
40
  label_mode = label_modes[3];
41
  }
42
 
 
93
  const response = await fetch("./get_group_model?" + params);
94
  const text = await response.text();
95
  const data = JSON.parse(text);
 
96
  return data
97
  }
98
 
 
163
  </li>
164
  </ul>
165
 
166
+ <CommentTable mode={"train"} model_name={model_name} cur_user={cur_user}/>
167
  </div>
168
  {:else if label_mode == label_modes[1]}
169
  <!-- EXISTING MODEL -->
 
193
  </li>
194
  </ul>
195
  {#key existing_model_name}
196
+ <CommentTable mode={"view"} model_name={existing_model_name} cur_user={cur_user}/>
197
  {/key}
198
  </div>
199
  {:else if label_mode == label_modes[2]}
 
230
  </li>
231
  </ul>
232
  {#key topic}
233
+ <TopicTraining topic={topic} model_name={model_name} cur_user={cur_user}/>
234
  {/key}
235
  </div>
236
 
indie_label_svelte/src/MainPanel.svelte CHANGED
@@ -1,24 +1,20 @@
1
  <script lang="ts">
2
  import Labeling from "./Labeling.svelte";
3
  import Auditing from "./Auditing.svelte";
4
- import AppOld from "./AppOld.svelte";
5
 
6
  import Tab, { Label } from "@smui/tab";
7
  import TabBar from "@smui/tab-bar";
8
 
9
  export let model;
10
- // export let topic;
11
  export let error_type;
12
-
13
- let app_versions = ["old", "new"];
14
- let app_version = "new";
15
 
16
  // Handle routing
17
- let active = "auditing";
18
  let searchParams = new URLSearchParams(window.location.search);
19
  let tab = searchParams.get("tab");
20
- if (tab == "labeling") {
21
- active = "labeling";
22
  }
23
 
24
  </script>
@@ -37,37 +33,16 @@
37
  </div>
38
 
39
  <div class="panel_contents">
40
- <!-- VERSION SELECTION -->
41
- <!-- <div>
42
- <Section
43
- section_id="app_version"
44
- section_title="What app version do you want to use?"
45
- section_opts={app_versions}
46
- width_pct={40}
47
- bind:value={app_version}
48
- />
49
- </div> -->
50
-
51
- {#if app_version == app_versions[0]}
52
- <!-- OLD VERSION -->
53
- <AppOld />
54
- {:else if app_version == app_versions[1]}
55
- <!-- NEW VERSION -->
56
- <div>
57
- <div id="labeling" hidden={active == "auditing"} >
58
- <Labeling/>
59
- </div>
60
 
61
- <div id="auditing" hidden={active == "labeling"} >
62
- <Auditing bind:personalized_model={model} bind:cur_error_type={error_type} on:change/>
63
- </div>
64
  </div>
65
- {/if}
66
 
67
- <!-- TEMP -->
68
- <!-- {#key model}
69
- <div>Model: {model}</div>
70
- {/key} -->
71
  </div>
72
  </div>
73
 
 
1
  <script lang="ts">
2
  import Labeling from "./Labeling.svelte";
3
  import Auditing from "./Auditing.svelte";
 
4
 
5
  import Tab, { Label } from "@smui/tab";
6
  import TabBar from "@smui/tab-bar";
7
 
8
  export let model;
 
9
  export let error_type;
10
+ export let cur_user;
 
 
11
 
12
  // Handle routing
13
+ let active = "labeling";
14
  let searchParams = new URLSearchParams(window.location.search);
15
  let tab = searchParams.get("tab");
16
+ if (tab == "auditing") {
17
+ active = "auditing";
18
  }
19
 
20
  </script>
 
33
  </div>
34
 
35
  <div class="panel_contents">
36
+ <div>
37
+ <div id="labeling" hidden={active == "auditing"} >
38
+ <Labeling cur_user={cur_user}/>
39
+ </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
+ <div id="auditing" hidden={active == "labeling"} >
42
+ <Auditing bind:personalized_model={model} bind:cur_error_type={error_type} cur_user={cur_user} on:change/>
 
43
  </div>
44
+ </div>
45
 
 
 
 
 
46
  </div>
47
  </div>
48
 
indie_label_svelte/src/ModelPerf.svelte CHANGED
@@ -1,9 +1,7 @@
1
  <script lang="ts">
2
  import { VegaLite } from "svelte-vega";
3
  import type { View } from "svelte-vega";
4
-
5
  import LayoutGrid, { Cell } from "@smui/layout-grid";
6
- import Card, { Content } from '@smui/card';
7
 
8
  export let data;
9
 
@@ -13,64 +11,25 @@
13
  ];
14
  let perf_plot_view: View;
15
 
16
- // let perf_plot2_spec = data["perf_plot2_json"];
17
- // let perf_plot2_data = perf_plot2_spec["datasets"][perf_plot2_spec["data"]["name"]];
18
- // let perf_plot2_view: View;
19
  </script>
20
 
21
  <div>
22
  <h6>Your Model Performance</h6>
23
- <LayoutGrid>
24
- <Cell span={8}>
25
- <div class="card-container">
26
- <Card variant="outlined" padded>
27
- <p class="mdc-typography--button"><b>Interpreting your model performance</b></p>
28
- <ul>
29
- <li>
30
- The <b>Mean Absolute Error (MAE)</b> metric indicates the average absolute difference between your model's rating and your actual rating on a held-out set of comments.
31
- </li>
32
- <li>
33
- You want your model to have a <b>lower</b> MAE (indicating <b>less error</b>).
34
- </li>
35
- <li>
36
- <b>Your current MAE: {data["mae"]}</b>
37
- <ul>
38
- <li>{@html data["mae_status"]}</li>
39
- <!-- <li>
40
- This is <b>better</b> (lower) than the average MAE for other users, so your model appears to <b>better capture</b> your views than the typical user model.
41
- </li> -->
42
- </ul>
43
- </li>
44
- </ul>
45
- </Card>
46
- </div>
47
- </Cell>
48
- </LayoutGrid>
49
  <div>
50
- <!-- Overall -->
51
- <!-- <table>
52
- <tbody>
53
- <tr>
54
- <td>
55
- <span class="bold">Mean Absolute Error (MAE)</span><br>
56
-
57
- </td>
58
- <td>
59
- <span class="bold-large">{data["mae"]}</span>
60
- </td>
61
- </tr>
62
- <tr>
63
- <td>
64
- <span class="bold">Average rating difference</span><br>
65
- This metric indicates the average difference between your model's rating and your actual rating on a held-out set of comments.
66
- </td>
67
- <td>
68
- <span class="bold-large">{data["avg_diff"]}</span>
69
- </td>
70
- </tr>
71
- </tbody>
72
- </table> -->
73
-
74
  <!-- Performance visualization -->
75
  <div>
76
  <VegaLite {perf_plot_data} spec={perf_plot_spec} bind:view={perf_plot_view}/>
 
1
  <script lang="ts">
2
  import { VegaLite } from "svelte-vega";
3
  import type { View } from "svelte-vega";
 
4
  import LayoutGrid, { Cell } from "@smui/layout-grid";
 
5
 
6
  export let data;
7
 
 
11
  ];
12
  let perf_plot_view: View;
13
 
 
 
 
14
  </script>
15
 
16
  <div>
17
  <h6>Your Model Performance</h6>
18
+ <ul>
19
+ <li>
20
+ The <b>Mean Absolute Error (MAE)</b> metric indicates the average absolute difference <br>between your model's rating and your actual rating on a held-out set of comments.
21
+ </li>
22
+ <li>
23
+ You want your model to have a <b>lower</b> MAE (indicating <b>less error</b>).
24
+ </li>
25
+ <li>
26
+ <b>Your current MAE: {data["mae"]}</b>
27
+ <ul>
28
+ <li>{@html data["mae_status"]}</li>
29
+ </ul>
30
+ </li>
31
+ </ul>
 
 
 
 
 
 
 
 
 
 
 
 
32
  <div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  <!-- Performance visualization -->
34
  <div>
35
  <VegaLite {perf_plot_data} spec={perf_plot_spec} bind:view={perf_plot_view}/>
indie_label_svelte/src/OverallResults.svelte CHANGED
@@ -19,18 +19,6 @@
19
  let topic_overview_spec = topic_overview_json;
20
  let topic_overview_view: View;
21
 
22
- // // Overall Histogram
23
- // let overall_hist_json = data["overall_perf"]["overall_hist_json"];
24
- // let overall_hist_data = overall_hist_json["datasets"][overall_hist_json["data"]["name"]];
25
- // let overall_hist_spec = overall_hist_json;
26
- // let overall_hist_view: View;
27
-
28
- // // Class-conditional Histogram
29
- // let class_cond_plot_json = data["overall_perf"]["class_cond_plot_json"];
30
- // let class_cond_plot_data = class_cond_plot_json["datasets"][class_cond_plot_json["data"]["name"]];
31
- // let class_cond_plot_spec = class_cond_plot_json;
32
- // let class_cond_plot_view: View;
33
-
34
  </script>
35
 
36
  <div>
@@ -84,73 +72,6 @@
84
  </div>
85
  </div>
86
 
87
- <!-- Old visualizations -->
88
- <!-- <div style="margin-top: 500px">
89
- <h6>Overall Performance</h6>
90
- <div class="row">
91
- <div class="col s12">
92
- <div id="overall_perf">
93
- <table>
94
- <tbody>
95
- <tr class="custom-blue">
96
- <td class="bold"
97
- >System {data[
98
- "overall_perf"
99
- ]["metric"]} with YOUR labels</td
100
- >
101
- <td>
102
- <span class="bold-large"
103
- >{data[
104
- "overall_perf"
105
- ]["user_metric"]}</span
106
- >
107
- (Percentile: {data[
108
- "overall_perf"
109
- ]["user_percentile"]})
110
- </td>
111
- </tr>
112
- <tr>
113
- <td class="bold"
114
- >System {data[
115
- "overall_perf"
116
- ]["metric"]} with OTHER USERS' labels</td
117
- >
118
- <td>
119
- <span class="bold-large"
120
- >{data[
121
- "overall_perf"
122
- ]["other_metric"]}</span
123
- >
124
- (95% CI: [{data[
125
- "overall_perf"
126
- ]["other_ci_low"]}, {data[
127
- "overall_perf"
128
- ]["other_ci_high"]}])
129
- </td>
130
- </tr>
131
- </tbody>
132
- </table>
133
- </div>
134
- </div>
135
- </div>
136
- <div class="row">
137
- <div class="col s8">
138
- <VegaLite {overall_hist_data} spec={overall_hist_spec} bind:view={overall_hist_view}/>
139
- </div>
140
- </div>
141
-
142
- <h6>Performance Breakdown</h6>
143
- <div class="row">
144
- <div class="col s12">
145
- <div class="row">
146
- <div class="col s12">
147
- <VegaLite {class_cond_plot_data} spec={class_cond_plot_spec} bind:view={class_cond_plot_view} />
148
- </div>
149
- </div>
150
- </div>
151
- </div>
152
- </div> -->
153
-
154
  </div>
155
  <style>
156
  </style>
 
19
  let topic_overview_spec = topic_overview_json;
20
  let topic_overview_view: View;
21
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  </script>
23
 
24
  <div>
 
72
  </div>
73
  </div>
74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  </div>
76
  <style>
77
  </style>
indie_label_svelte/src/Results.svelte DELETED
@@ -1,206 +0,0 @@
1
- <script lang="ts">
2
- import { onMount } from "svelte";
3
- import ClusterResults from "./ClusterResults.svelte";
4
-
5
- import Button, { Label } from "@smui/button";
6
- import LinearProgress from "@smui/linear-progress";
7
- import Checkbox from '@smui/checkbox';
8
- import DataTable, {
9
- Head,
10
- Body,
11
- Row,
12
- Cell,
13
- Label,
14
- SortValue,
15
- } from "@smui/data-table";
16
- import FormField from "@smui/form-field";
17
-
18
- let cur_examples = [];
19
- let promise = Promise.resolve(null);
20
-
21
- let scaffold_methods = ["personal", "personal_group", "prompts"];
22
-
23
- let all_users = [];
24
- async function getUsers() {
25
- const response = await fetch("./get_users");
26
- const text = await response.text();
27
- const data = JSON.parse(text);
28
- all_users = data["users"];
29
- promise = getResults();
30
- }
31
-
32
- onMount(async () => {
33
- getUsers()
34
- });
35
-
36
- async function getResults() {
37
- let req_params = {
38
- users: all_users
39
- };
40
- let params = new URLSearchParams(req_params).toString();
41
- const response = await fetch("./get_results?" + params);
42
- const text = await response.text();
43
- const data = JSON.parse(text);
44
-
45
- let results = data["results"];
46
- return results;
47
- }
48
-
49
- function get_complete_ratio(reports) {
50
- let total = reports.length;
51
- let complete = reports.filter(item => item.complete_status).length;
52
- return "" + complete + "/" + total + " complete";
53
- }
54
-
55
- function get_complete_count(reports) {
56
- return reports.filter(item => item.complete_status).length;
57
- }
58
-
59
- function get_summary(reports) {
60
- let summary = "";
61
- let total_audits = 0
62
- for (const scaffold_method of scaffold_methods) {
63
- if (reports[scaffold_method]) {
64
- let cur_reports = reports[scaffold_method];
65
- let cur_ratio = get_complete_ratio(cur_reports);
66
- let cur_result = "<li><b>" + scaffold_method + "</b>: " + cur_ratio + "</li>";
67
- summary += cur_result;
68
- let cur_complete = get_complete_count(cur_reports);
69
- total_audits += cur_complete;
70
- }
71
- }
72
-
73
- let top_summary = "<li><b>Total audits</b>: " + total_audits + "</li>";
74
- summary = "<ul>" + top_summary + summary + "</ul>";
75
- return summary;
76
- }
77
-
78
- function get_url(user, scaffold_method) {
79
- return "http://localhost:5001/?user=" + user + "&scaffold=" + scaffold_method;
80
- }
81
- </script>
82
-
83
- <svelte:head>
84
- <title>Results</title>
85
- </svelte:head>
86
-
87
- <div class="panel">
88
- <div class="panel_contents">
89
- <div>
90
- <h3>Results</h3>
91
- </div>
92
-
93
- <div style="padding-top:50px">
94
- {#await promise}
95
- <div class="app_loading">
96
- <LinearProgress indeterminate />
97
- </div>
98
- {:then results}
99
- {#if results}
100
- {#each results as user_report}
101
- <div class="head_3">{user_report["user"]}</div>
102
- <div class="section_indent">
103
- <div class="head_5">Summary</div>
104
- <div>{@html get_summary(user_report)}</div>
105
- <ul>
106
- <li>Labeling pages
107
- <ul>
108
- <li>
109
- <a href="http://localhost:5001/?user={user_report["user"]}&tab=labeling&label_mode=3" target="_blank">Group-based model</a>
110
- </li>
111
- <li>
112
- <a href="http://localhost:5001/?user={user_report["user"]}&tab=labeling&label_mode=0" target="_blank">Personalized model</a>
113
- </li>
114
- </ul>
115
- </li>
116
- <li>Auditing pages
117
- <ul>
118
- <li>
119
- <a href="http://localhost:5001/?user={user_report["user"]}&scaffold=personal_group" target="_blank">Group-based audit - personal scaffold</a>
120
- </li>
121
- <li>
122
- <a href="http://localhost:5001/?user={user_report["user"]}&scaffold=personal" target="_blank">Individual audit - personal scaffold</a>
123
- </li>
124
- <li>
125
- <a href="http://localhost:5001/?user={user_report["user"]}&scaffold=prompts" target="_blank">Individual audit - prompt scaffold</a>
126
- </li>
127
- </ul>
128
- </li>
129
- </ul>
130
- </div>
131
- {#each scaffold_methods as scaffold_method}
132
- {#if user_report[scaffold_method]}
133
- <div class="spacing_vert_60 section_indent">
134
- <div class="head_5">
135
- {scaffold_method} ({get_complete_ratio(user_report[scaffold_method])})
136
- [<a href={get_url(user_report["user"], scaffold_method)} target="_blank">link</a>]
137
- </div>
138
- {#each user_report[scaffold_method] as report}
139
- <div class="spacing_vert_40 section_indent">
140
- <div class="head_6_non_cap">
141
- {report["title"]}
142
- </div>
143
-
144
- <div class="spacing_vert_20">
145
- <div class="">
146
- <b>Error type</b>
147
- </div>
148
- {report["error_type"]}
149
- </div>
150
-
151
- <div class="spacing_vert_20">
152
- <div class="">
153
- <b>Evidence</b>
154
- </div>
155
- {#if report["evidence"].length > 0}
156
- <ClusterResults
157
- cluster={null}
158
- model={null}
159
- data={{"cluster_comments": report["evidence"]}}
160
- show_vis={false}
161
- show_checkboxes={false}
162
- table_width_pct={100}
163
- rowsPerPage={10}
164
- table_id={"panel"}
165
- />
166
- {:else}
167
- <p class="grey_text">
168
- No examples added
169
- </p>
170
- {/if}
171
- </div>
172
-
173
- <div class="spacing_vert_20">
174
- <div class="">
175
- <b>Summary/Suggestions</b>
176
- </div>
177
- {report["text_entry"]}
178
- </div>
179
-
180
- <div class="spacing_vert_20">
181
- <b>Completed</b>
182
- <FormField>
183
- <Checkbox checked={report["complete_status"]} disabled/>
184
- </FormField>
185
- </div>
186
-
187
- </div>
188
- {/each}
189
- </div>
190
- {/if}
191
- {/each}
192
- {/each}
193
- {/if}
194
- {:catch error}
195
- <p style="color: red">{error.message}</p>
196
- {/await}
197
- </div>
198
- </div>
199
- </div>
200
-
201
- <style>
202
- .panel {
203
- width: 80%;
204
- padding: 50px;
205
- }
206
- </style>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
indie_label_svelte/src/SelectUserDialog.svelte DELETED
@@ -1,66 +0,0 @@
1
- <script lang="ts">
2
- import Dialog, { Title, Content, Actions } from "@smui/dialog";
3
- import Button, { Label } from "@smui/button";
4
- import Textfield from "@smui/textfield";
5
- import Select, { Option } from "@smui/select";
6
- import { user } from "./stores/cur_user_store.js";
7
- import { users } from "./stores/all_users_store.js";
8
-
9
- export let open;
10
- export let cur_user;
11
- let cur_user_tf = cur_user;
12
- let cur_user_sel = cur_user;
13
-
14
- let all_users;
15
- users.subscribe((value) => {
16
- all_users = value;
17
- });
18
-
19
- function updateUserTextField() {
20
- user.update((value) => cur_user_tf);
21
- if (!all_users.includes(user)) {
22
- all_users = all_users.concat(cur_user_tf);
23
- users.update(all_users);
24
- }
25
- open = false;
26
- }
27
-
28
- function updateUserSel() {
29
- user.update((value) => cur_user_sel);
30
- open = false;
31
- }
32
- </script>
33
-
34
- <div>
35
- <Dialog
36
- bind:open
37
- aria-labelledby="simple-title"
38
- aria-describedby="simple-content"
39
- >
40
- <!-- Title cannot contain leading whitespace due to mdc-typography-baseline-top() -->
41
- <Title id="simple-title">Select Current User</Title>
42
- <Content id="simple-content">
43
- <Textfield bind:value={cur_user_tf} label="Enter user's name" />
44
-
45
- <Select bind:value={cur_user_sel} label="Select Menu">
46
- {#each all_users as u}
47
- <Option value={u}>{u}</Option>
48
- {/each}
49
- </Select>
50
- </Content>
51
- <Actions>
52
- <Button on:click={updateUserTextField}>
53
- <Label>Update from TextField</Label>
54
- </Button>
55
- <Button on:click={updateUserSel}>
56
- <Label>Update from Select</Label>
57
- </Button>
58
- </Actions>
59
- </Dialog>
60
- </div>
61
-
62
- <style>
63
- :global(.mdc-dialog__surface) {
64
- height: 300px;
65
- }
66
- </style>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
indie_label_svelte/src/StudyLinks.svelte DELETED
@@ -1,59 +0,0 @@
1
- <script lang="ts">
2
- import { user } from "./stores/cur_user_store.js";
3
-
4
- let cur_user;
5
- user.subscribe((value) => {
6
- cur_user = value;
7
- });
8
-
9
- </script>
10
-
11
- <svelte:head>
12
- <title>Study Links</title>
13
- </svelte:head>
14
-
15
- <div class="panel">
16
- <div class="panel_contents">
17
- <div>
18
- <h3>Study Links</h3>
19
- </div>
20
-
21
- <div>
22
- <!-- <div class="head_5">{cur_user}</div> -->
23
- <div class="section_indent">
24
- <ul>
25
- <li>Labeling pages
26
- <ul>
27
- <li>
28
- <a href="http://localhost:5001/?user={cur_user}&tab=labeling&label_mode=3" target="_blank">Group-based model</a>
29
- </li>
30
- <li>
31
- <a href="http://localhost:5001/?user={cur_user}&tab=labeling&label_mode=0" target="_blank">Personalized model</a>
32
- </li>
33
- </ul>
34
- </li>
35
- <li>Auditing pages
36
- <ul>
37
- <li>
38
- <a href="http://localhost:5001/?user={cur_user}&scaffold=personal_group" target="_blank">Group-based audit - personal scaffold</a>
39
- </li>
40
- <li>
41
- <a href="http://localhost:5001/?user={cur_user}&scaffold=personal" target="_blank">Individual audit - personal scaffold</a>
42
- </li>
43
- <li>
44
- <a href="http://localhost:5001/?user={cur_user}&scaffold=prompts" target="_blank">Individual audit - prompt scaffold</a>
45
- </li>
46
- </ul>
47
- </li>
48
- </ul>
49
- </div>
50
- </div>
51
- </div>
52
- </div>
53
-
54
- <style>
55
- .panel {
56
- width: 80%;
57
- padding: 50px;
58
- }
59
- </style>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
indie_label_svelte/src/SubmitReportDialog.svelte ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <script lang="ts">
2
+ import Dialog, { Title, Content, Actions } from "@smui/dialog";
3
+ import Button, { Label } from "@smui/button";
4
+ import Textfield from "@smui/textfield";
5
+ import Select, { Option } from "@smui/select";
6
+ import CircularProgress from '@smui/circular-progress';
7
+
8
+ export let open;
9
+ export let cur_user;
10
+ export let all_reports;
11
+ let email = "";
12
+ let all_sep_options = [
13
+ "Accuracy",
14
+ "Bias/Discrimination",
15
+ "Adversarial Example",
16
+ "Other",
17
+ ];
18
+ let sep_selection = "";
19
+
20
+ let promise_submit = Promise.resolve(null);
21
+ function handleSubmitReport() {
22
+ promise_submit = submitReport();
23
+ }
24
+
25
+ async function submitReport() {
26
+ let req_params = {
27
+ cur_user: cur_user,
28
+ reports: JSON.stringify(all_reports),
29
+ email: email,
30
+ sep_selection: sep_selection,
31
+ };
32
+
33
+ let params = new URLSearchParams(req_params).toString();
34
+ const response = await fetch("./submit_avid_report?" + params);
35
+ const text = await response.text();
36
+ const data = JSON.parse(text);
37
+ return data;
38
+ }
39
+
40
+ </script>
41
+
42
+ <div>
43
+ <Dialog
44
+ bind:open
45
+ aria-labelledby="simple-title"
46
+ aria-describedby="simple-content"
47
+ >
48
+ <!-- Title cannot contain leading whitespace due to mdc-typography-baseline-top() -->
49
+ <Title id="simple-title">Send All Audit Reports</Title>
50
+ <Content id="simple-content">
51
+ <!-- Description -->
52
+ <div>
53
+ <b>When you are ready to send all of your audit reports to the <a href="https://avidml.org/" target="_blank">AI Vulnerability Database</a> (AVID), please fill out the following information.</b>
54
+ Only your submitted reports will be stored in the database for further analysis. While you can submit reports anonymously, we encourage you to provide your email so that we can contact you if we have any questions.
55
+ </div>
56
+
57
+ <!-- Summary of complete reports -->
58
+ <div>
59
+ <p><b>Summary of Reports to Send</b> (Reports that include evidence and are marked as complete)</p>
60
+ <ul>
61
+ {#each all_reports as report}
62
+ {#if report["complete_status"] && (report["evidence"].length > 0)}
63
+ <li>{report["title"]}</li>
64
+ <ul>
65
+ <li>Error Type: {report["error_type"]}</li>
66
+ <li>Evidence: Includes {report["evidence"].length} example{(report["evidence"].length > 1) ? 's' : ''}</li>
67
+ <li>Summary/Suggestions: {report["text_entry"]}</li>
68
+ </ul>
69
+ {/if}
70
+ {/each}
71
+ </ul>
72
+ </div>
73
+
74
+ <!-- Form fields -->
75
+ <div>
76
+ <Select bind:value={sep_selection} label="Audit category" style="width: 90%">
77
+ {#each all_sep_options as opt}
78
+ <Option value={opt}>{opt}</Option>
79
+ {/each}
80
+ </Select>
81
+ </div>
82
+ <div>
83
+ <Textfield bind:value={email} label="(Optional) Contact email" style="width: 90%" />
84
+ </div>
85
+
86
+ <!-- Submission and status message -->
87
+ <div class="dialog_footer">
88
+ <Button on:click={handleSubmitReport} variant="outlined">
89
+ <Label>Submit Report to AVID</Label>
90
+ </Button>
91
+
92
+ <div>
93
+ <span style="color: grey"><i>
94
+ {#await promise_submit}
95
+ <CircularProgress style="height: 32px; width: 32px;" indeterminate />
96
+ {:then result}
97
+ {#if result}
98
+ Successfully sent reports! You may close this window.
99
+ {/if}
100
+ {:catch error}
101
+ <p style="color: red">{error.message}</p>
102
+ {/await}
103
+ </i></span>
104
+ </div>
105
+ </div>
106
+ </Content>
107
+ </Dialog>
108
+ </div>
109
+
110
+ <style>
111
+ :global(.mdc-dialog__surface) {
112
+ min-width: 50%;
113
+ min-height: 50%;
114
+ margin-left: 30%;
115
+ }
116
+
117
+ .dialog_footer {
118
+ padding: 20px 0px;
119
+ }
120
+ </style>
indie_label_svelte/src/TopicTraining.svelte CHANGED
@@ -4,21 +4,15 @@
4
  import Button, { Label } from "@smui/button";
5
  import DataTable, { Head, Body, Row, Cell } from "@smui/data-table";
6
  import LinearProgress from '@smui/linear-progress';
7
- import { user } from './stores/cur_user_store.js';
8
  import { model_chosen } from './stores/cur_model_store.js';
9
 
10
  export let topic;
11
  export let model_name = null;
 
12
 
13
  let to_label = {};
14
  let promise = Promise.resolve(null);
15
 
16
- // Get current user
17
- let cur_user;
18
- user.subscribe(value => {
19
- cur_user = value;
20
- });
21
-
22
  // Get current model
23
  if (model_name == null) {
24
  model_chosen.subscribe(value => {
@@ -81,7 +75,6 @@
81
  topic: topic,
82
  };
83
 
84
- console.log("topic training model name", model_name);
85
  let params = new URLSearchParams(req_params).toString();
86
  const response = await fetch("./get_personalized_model_topic?" + params); // TODO
87
  const text = await response.text();
@@ -90,7 +83,6 @@
90
  model_name = data["new_model_name"];
91
  model_chosen.update((value) => model_name);
92
 
93
- console.log("topicTraining", data);
94
  return data;
95
  }
96
  </script>
 
4
  import Button, { Label } from "@smui/button";
5
  import DataTable, { Head, Body, Row, Cell } from "@smui/data-table";
6
  import LinearProgress from '@smui/linear-progress';
 
7
  import { model_chosen } from './stores/cur_model_store.js';
8
 
9
  export let topic;
10
  export let model_name = null;
11
+ export let cur_user;
12
 
13
  let to_label = {};
14
  let promise = Promise.resolve(null);
15
 
 
 
 
 
 
 
16
  // Get current model
17
  if (model_name == null) {
18
  model_chosen.subscribe(value => {
 
75
  topic: topic,
76
  };
77
 
 
78
  let params = new URLSearchParams(req_params).toString();
79
  const response = await fetch("./get_personalized_model_topic?" + params); // TODO
80
  const text = await response.text();
 
83
  model_name = data["new_model_name"];
84
  model_chosen.update((value) => model_name);
85
 
 
86
  return data;
87
  }
88
  </script>
indie_label_svelte/src/stores/all_users_store.js DELETED
@@ -1,6 +0,0 @@
1
- import { writable } from 'svelte/store';
2
-
3
- // Fallback if request doesn't work
4
- let all_users = ["DemoUser"];
5
-
6
- export const users = writable(all_users);
 
 
 
 
 
 
 
indie_label_svelte/src/stores/cur_user_store.js DELETED
@@ -1,3 +0,0 @@
1
- import { writable } from 'svelte/store';
2
-
3
- export const user = writable("DemoUser");
 
 
 
 
server.py CHANGED
@@ -17,10 +17,15 @@ import math
17
  import altair as alt
18
  import matplotlib.pyplot as plt
19
  import time
 
20
 
21
  import audit_utils as utils
22
 
 
 
 
23
  app = Flask(__name__)
 
24
 
25
  # Path for our main Svelte page
26
  @app.route("/")
@@ -35,15 +40,19 @@ def home(path):
35
 
36
  ########################################
37
  # ROUTE: /AUDIT_SETTINGS
38
- comments_grouped_full_topic_cat = pd.read_pickle("data/comments_grouped_full_topic_cat2_persp.pkl")
39
 
40
  @app.route("/audit_settings")
41
- def audit_settings():
42
  # Fetch page content
43
  user = request.args.get("user")
44
  scaffold_method = request.args.get("scaffold_method")
45
 
46
- user_models = utils.get_all_model_names(user)
 
 
 
 
 
47
  grp_models = [m for m in user_models if m.startswith(f"model_{user}_group_")]
48
 
49
  clusters = utils.get_unique_topics()
@@ -66,19 +75,6 @@ def audit_settings():
66
  "options": [{"value": i, "text": cluster} for i, cluster in enumerate(clusters)],
67
  },]
68
 
69
- if scaffold_method == "personal_cluster":
70
- cluster_model = user_models[0]
71
- personal_cluster_file = f"./data/personal_cluster_dfs/{cluster_model}.pkl"
72
- if os.path.isfile(personal_cluster_file) and cluster_model != "":
73
- print("audit_settings", personal_cluster_file, cluster_model)
74
- topics_under_top, topics_over_top = utils.get_personal_clusters(cluster_model)
75
- pers_cluster = topics_under_top + topics_over_top
76
- pers_cluster_options = {
77
- "label": "Personalized clusters",
78
- "options": [{"value": i, "text": cluster} for i, cluster in enumerate(pers_cluster)],
79
- }
80
- clusters_options.insert(0, pers_cluster_options)
81
-
82
  clusters_for_tuning = utils.get_large_clusters(min_n=150)
83
  clusters_for_tuning_options = [{"value": i, "text": cluster} for i, cluster in enumerate(clusters_for_tuning)] # Format for Svelecte UI element
84
 
@@ -86,54 +82,33 @@ def audit_settings():
86
  "personalized_models": user_models,
87
  "personalized_model_grp": grp_models,
88
  "perf_metrics": ["Average rating difference", "Mean Absolute Error (MAE)", "Root Mean Squared Error (RMSE)", "Mean Squared Error (MSE)"],
89
- "breakdown_categories": ['Topic', 'Toxicity Category', 'Toxicity Severity'],
90
  "clusters": clusters_options,
91
  "clusters_for_tuning": clusters_for_tuning_options,
 
92
  }
93
  return json.dumps(context)
94
 
95
- ########################################
96
- # ROUTE: /GET_USERS
97
- @app.route("/get_users")
98
- def get_users():
99
- # Fetch page content
100
- with open(f"./data/users_to_models.pkl", "rb") as f:
101
- users_to_models = pickle.load(f)
102
- users = list(users_to_models.keys())
103
- context = {
104
- "users": users,
105
- }
106
- return json.dumps(context)
107
 
108
  ########################################
109
  # ROUTE: /GET_AUDIT
110
  @app.route("/get_audit")
111
  def get_audit():
112
  pers_model = request.args.get("pers_model")
113
- perf_metric = request.args.get("perf_metric")
114
- breakdown_axis = request.args.get("breakdown_axis")
115
- breakdown_sort = request.args.get("breakdown_sort")
116
- n_topics = int(request.args.get("n_topics"))
117
  error_type = request.args.get("error_type")
118
  cur_user = request.args.get("cur_user")
119
  topic_vis_method = request.args.get("topic_vis_method")
120
  if topic_vis_method == "null":
121
  topic_vis_method = "median"
122
 
123
- if breakdown_sort == "difference":
124
- sort_class_plot = True
125
- elif breakdown_sort == "default":
126
- sort_class_plot = False
127
  else:
128
- raise Exception("Invalid breakdown_sort value")
129
-
130
- overall_perf = utils.show_overall_perf(
131
- variant=pers_model,
132
- error_type=error_type,
133
- cur_user=cur_user,
134
- breakdown_axis=breakdown_axis,
135
- topic_vis_method=topic_vis_method,
136
- )
137
 
138
  results = {
139
  "overall_perf": overall_perf,
@@ -143,60 +118,34 @@ def get_audit():
143
  ########################################
144
  # ROUTE: /GET_CLUSTER_RESULTS
145
  @app.route("/get_cluster_results")
146
- def get_cluster_results():
147
  pers_model = request.args.get("pers_model")
148
- n_examples = int(request.args.get("n_examples"))
149
  cluster = request.args.get("cluster")
150
- example_sort = request.args.get("example_sort")
151
- comparison_group = request.args.get("comparison_group")
152
  topic_df_ids = request.args.getlist("topic_df_ids")
153
  topic_df_ids = [int(val) for val in topic_df_ids[0].split(",") if val != ""]
154
  search_type = request.args.get("search_type")
155
  keyword = request.args.get("keyword")
156
- n_neighbors = request.args.get("n_neighbors")
157
- if n_neighbors != "null":
158
- n_neighbors = int(n_neighbors)
159
- neighbor_threshold = 0.6
160
  error_type = request.args.get("error_type")
161
  use_model = request.args.get("use_model") == "true"
162
- scaffold_method = request.args.get("scaffold_method")
163
-
164
 
165
- # If user has a tuned model for this cluster, use that
166
- cluster_model_file = f"./data/trained_models/{pers_model}_{cluster}.pkl"
167
- if os.path.isfile(cluster_model_file):
168
- pers_model = f"{pers_model}_{cluster}"
169
-
170
- print(f"get_cluster_results using model {pers_model}")
171
-
172
- other_ids = []
173
- perf_metric = "avg_diff"
174
- sort_ascending = True if example_sort == "ascending" else False
175
 
 
176
  topic_df = None
177
-
178
- personal_cluster_file = f"./data/personal_cluster_dfs/{pers_model}.pkl"
179
- if (scaffold_method == "personal_cluster") and (os.path.isfile(personal_cluster_file)):
180
- # Handle personal clusters
181
- with open(personal_cluster_file, "rb") as f:
182
- topic_df = pickle.load(f)
183
- topic_df = topic_df[(topic_df["topic"] == cluster)]
184
- else:
185
- # Regular handling
186
- with open(f"data/preds_dfs/{pers_model}.pkl", "rb") as f:
187
- topic_df = pickle.load(f)
188
- if search_type == "cluster":
189
- # Display examples with comment, your pred, and other users' pred
190
- topic_df = topic_df[(topic_df["topic"] == cluster) | (topic_df["item_id"].isin(topic_df_ids))]
191
-
192
- elif search_type == "neighbors":
193
- neighbor_ids = utils.get_match(topic_df_ids, K=n_neighbors, threshold=neighbor_threshold, debug=False)
194
- topic_df = topic_df[(topic_df["item_id"].isin(neighbor_ids)) | (topic_df["item_id"].isin(topic_df_ids))]
195
- elif search_type == "keyword":
196
- topic_df = topic_df[(topic_df["comment"].str.contains(keyword, case=False, regex=False)) | (topic_df["item_id"].isin(topic_df_ids))]
197
-
198
  topic_df = topic_df.drop_duplicates()
199
- print("len topic_df", len(topic_df))
 
200
 
201
  # Handle empty results
202
  if len(topic_df) == 0:
@@ -217,18 +166,20 @@ def get_cluster_results():
217
 
218
  topic_df_ids = topic_df["item_id"].unique().tolist()
219
 
220
- if (scaffold_method == "personal_cluster") and (os.path.isfile(personal_cluster_file)):
221
- cluster_overview_plot_json, sampled_df = utils.plot_overall_vis_cluster(topic_df, error_type=error_type, n_comments=500)
 
 
222
  else:
223
- # Regular
224
- cluster_overview_plot_json, sampled_df = utils.get_cluster_overview_plot(topic_df, error_type=error_type, use_model=use_model)
225
 
226
- cluster_comments = utils.get_cluster_comments(sampled_df,error_type=error_type, num_examples=n_examples, use_model=use_model) # New version of cluster comment table
227
 
228
  results = {
229
  "topic_df_ids": topic_df_ids,
230
  "cluster_overview_plot_json": json.loads(cluster_overview_plot_json),
231
- "cluster_comments": cluster_comments,
232
  }
233
  return json.dumps(results)
234
 
@@ -255,7 +206,7 @@ def get_group_size():
255
  ########################################
256
  # ROUTE: /GET_GROUP_MODEL
257
  @app.route("/get_group_model")
258
- def get_group_model():
259
  # Fetch info for initial labeling component
260
  model_name = request.args.get("model_name")
261
  user = request.args.get("user")
@@ -275,28 +226,21 @@ def get_group_model():
275
  grp_ids = grp_df["worker_id"].tolist()
276
 
277
  ratings_grp = utils.get_grp_model_labels(
278
- comments_df=comments_grouped_full_topic_cat,
279
  n_label_per_bin=BIN_DISTRIB,
280
  score_bins=SCORE_BINS,
281
  grp_ids=grp_ids,
282
  )
283
 
284
- # print("ratings_grp", ratings_grp)
285
-
286
  # Modify model name
287
  model_name = f"{model_name}_group_gender{sel_gender}_relig{sel_relig}_pol{sel_pol}_race{sel_race_orig}_lgbtq_{sel_lgbtq}"
288
-
289
- label_dir = f"./data/labels/{model_name}"
290
- # Create directory for labels if it doesn't yet exist
291
- if not os.path.isdir(label_dir):
292
- os.mkdir(label_dir)
293
- last_label_i = len([name for name in os.listdir(label_dir) if (os.path.isfile(os.path.join(label_dir, name)) and name.endswith('.pkl'))])
294
 
295
  # Train group model
296
- mae, mse, rmse, avg_diff, ratings_prev = utils.train_updated_model(model_name, last_label_i, ratings_grp, user)
297
 
298
  duration = time.time() - start
299
- print("Time to train/cache:", duration)
 
300
 
301
  context = {
302
  "group_size": group_size,
@@ -314,11 +258,10 @@ def get_labeling():
314
  clusters_for_tuning = utils.get_large_clusters(min_n=150)
315
  clusters_for_tuning_options = [{"value": i, "text": cluster} for i, cluster in enumerate(clusters_for_tuning)] # Format for Svelecte UI element
316
 
317
- # model_name_suggestion = f"model_{int(time.time())}"
318
- model_name_suggestion = f"model_{user}"
319
 
320
  context = {
321
- "personalized_models": utils.get_all_model_names(user),
322
  "model_name_suggestion": model_name_suggestion,
323
  "clusters_for_tuning": clusters_for_tuning_options,
324
  }
@@ -326,15 +269,16 @@ def get_labeling():
326
 
327
  ########################################
328
  # ROUTE: /GET_COMMENTS_TO_LABEL
329
- N_LABEL_PER_BIN = 8 # 8 * 5 = 40 comments
330
- BIN_DISTRIB = [4, 8, 16, 8, 4]
 
 
331
  SCORE_BINS = [(0.0, 0.5), (0.5, 1.5), (1.5, 2.5), (2.5, 3.5), (3.5, 4.01)]
332
  @app.route("/get_comments_to_label")
333
  def get_comments_to_label():
334
  n = int(request.args.get("n"))
335
  # Fetch examples to label
336
  to_label_ids = utils.create_example_sets(
337
- comments_df=comments_grouped_full_topic_cat,
338
  n_label_per_bin=BIN_DISTRIB,
339
  score_bins=SCORE_BINS,
340
  keyword=None
@@ -351,14 +295,11 @@ def get_comments_to_label():
351
 
352
  ########################################
353
  # ROUTE: /GET_COMMENTS_TO_LABEL_TOPIC
354
- N_LABEL_PER_BIN_TOPIC = 2 # 2 * 5 = 10 comments
355
  @app.route("/get_comments_to_label_topic")
356
  def get_comments_to_label_topic():
357
  # Fetch examples to label
358
  topic = request.args.get("topic")
359
  to_label_ids = utils.create_example_sets(
360
- comments_df=comments_grouped_full_topic_cat,
361
- # n_label_per_bin=N_LABEL_PER_BIN_TOPIC,
362
  n_label_per_bin=BIN_DISTRIB,
363
  score_bins=SCORE_BINS,
364
  keyword=None,
@@ -375,38 +316,33 @@ def get_comments_to_label_topic():
375
  ########################################
376
  # ROUTE: /GET_PERSONALIZED_MODEL
377
  @app.route("/get_personalized_model")
378
- def get_personalized_model():
379
  model_name = request.args.get("model_name")
380
  ratings_json = request.args.get("ratings")
381
  mode = request.args.get("mode")
382
  user = request.args.get("user")
383
  ratings = json.loads(ratings_json)
384
- print(ratings)
385
- start = time.time()
 
386
 
387
- label_dir = f"./data/labels/{model_name}"
388
- # Create directory for labels if it doesn't yet exist
389
- if not os.path.isdir(label_dir):
390
- os.mkdir(label_dir)
391
- last_label_i = len([name for name in os.listdir(label_dir) if (os.path.isfile(os.path.join(label_dir, name)) and name.endswith('.pkl'))])
392
 
393
  # Handle existing or new model cases
394
  if mode == "view":
395
  # Fetch prior model performance
396
- if model_name not in utils.get_all_model_names():
397
- raise Exception(f"Model {model_name} does not exist")
398
- else:
399
- mae, mse, rmse, avg_diff, ratings_prev = utils.fetch_existing_data(model_name, last_label_i)
400
 
401
  elif mode == "train":
402
  # Train model and cache predictions using new labels
403
  print("get_personalized_model train")
404
- mae, mse, rmse, avg_diff, ratings_prev = utils.train_updated_model(model_name, last_label_i, ratings, user)
405
-
406
- duration = time.time() - start
407
- print("Time to train/cache:", duration)
 
408
 
409
- perf_plot, mae_status = utils.plot_train_perf_results(model_name, mae)
410
  perf_plot_json = perf_plot.to_json()
411
 
412
  def round_metric(x):
@@ -419,7 +355,6 @@ def get_personalized_model():
419
  "mse": round_metric(mse),
420
  "rmse": round_metric(rmse),
421
  "avg_diff": round_metric(avg_diff),
422
- "duration": duration,
423
  "ratings_prev": ratings_prev,
424
  "perf_plot_json": json.loads(perf_plot_json),
425
  }
@@ -429,34 +364,29 @@ def get_personalized_model():
429
  ########################################
430
  # ROUTE: /GET_PERSONALIZED_MODEL_TOPIC
431
  @app.route("/get_personalized_model_topic")
432
- def get_personalized_model_topic():
433
  model_name = request.args.get("model_name")
434
  ratings_json = request.args.get("ratings")
435
  user = request.args.get("user")
436
  ratings = json.loads(ratings_json)
437
  topic = request.args.get("topic")
438
- print(ratings)
 
439
  start = time.time()
440
 
441
  # Modify model name
442
  model_name = f"{model_name}_{topic}"
443
-
444
- label_dir = f"./data/labels/{model_name}"
445
- # Create directory for labels if it doesn't yet exist
446
- if not os.path.isdir(label_dir):
447
- os.mkdir(label_dir)
448
- last_label_i = len([name for name in os.listdir(label_dir) if (os.path.isfile(os.path.join(label_dir, name)) and name.endswith('.pkl'))])
449
 
450
  # Handle existing or new model cases
451
  # Train model and cache predictions using new labels
452
- print("get_personalized_model_topic train")
453
- mae, mse, rmse, avg_diff, ratings_prev = utils.train_updated_model(model_name, last_label_i, ratings, user, topic=topic)
 
454
 
455
- duration = time.time() - start
456
- print("Time to train/cache:", duration)
457
-
458
- def round_metric(x):
459
- return np.round(abs(x), 3)
460
 
461
  results = {
462
  "success": "success",
@@ -477,17 +407,13 @@ def get_reports():
477
  if topic_vis_method == "null":
478
  topic_vis_method = "fp_fn"
479
 
480
- # Load reports for current user from stored files
481
- report_dir = f"./data/user_reports"
482
- user_file = os.path.join(report_dir, f"{cur_user}_{scaffold_method}.pkl")
483
-
484
- if not os.path.isfile(user_file):
485
  if scaffold_method == "fixed":
486
  reports = get_fixed_scaffold()
487
  elif (scaffold_method == "personal" or scaffold_method == "personal_group" or scaffold_method == "personal_test"):
488
- reports = get_personal_scaffold(model, topic_vis_method)
489
- elif (scaffold_method == "personal_cluster"):
490
- reports = get_personal_cluster_scaffold(model)
491
  elif scaffold_method == "prompts":
492
  reports = get_prompts_scaffold()
493
  elif scaffold_method == "tutorial":
@@ -505,8 +431,8 @@ def get_reports():
505
  ]
506
  else:
507
  # Load from pickle file
508
- with open(user_file, "rb") as f:
509
- reports = pickle.load(f)
510
 
511
  results = {
512
  "reports": reports,
@@ -572,23 +498,13 @@ def get_tutorial_scaffold():
572
  },
573
  ]
574
 
575
- def get_personal_cluster_scaffold(model):
576
- topics_under_top, topics_over_top = utils.get_personal_clusters(model)
577
-
578
- report_under = [get_empty_report(topic, "System is under-sensitive") for topic in topics_under_top]
579
-
580
- report_over = [get_empty_report(topic, "System is over-sensitive") for topic in topics_over_top]
581
- reports = (report_under + report_over)
582
- random.shuffle(reports)
583
- return reports
584
-
585
  def get_topic_errors(df, topic_vis_method, threshold=2):
586
- topics = df["topic_"].unique().tolist()
587
  topic_errors = {}
588
  for topic in topics:
589
- t_df = df[df["topic_"] == topic]
590
- y_true = t_df["pred"].to_numpy()
591
- y_pred = t_df["rating"].to_numpy()
592
  if topic_vis_method == "mae":
593
  t_err = mean_absolute_error(y_true, y_pred)
594
  elif topic_vis_method == "mse":
@@ -596,8 +512,8 @@ def get_topic_errors(df, topic_vis_method, threshold=2):
596
  elif topic_vis_method == "avg_diff":
597
  t_err = np.mean(y_true - y_pred)
598
  elif topic_vis_method == "fp_proportion":
599
- y_true = [0 if rating < threshold else 1 for rating in t_df["pred"].tolist()]
600
- y_pred = [0 if rating < threshold else 1 for rating in t_df["rating"].tolist()]
601
  try:
602
  tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
603
  except:
@@ -605,8 +521,8 @@ def get_topic_errors(df, topic_vis_method, threshold=2):
605
  total = float(len(y_true))
606
  t_err = fp / total
607
  elif topic_vis_method == "fn_proportion":
608
- y_true = [0 if rating < threshold else 1 for rating in t_df["pred"].tolist()]
609
- y_pred = [0 if rating < threshold else 1 for rating in t_df["rating"].tolist()]
610
  try:
611
  tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
612
  except:
@@ -617,65 +533,69 @@ def get_topic_errors(df, topic_vis_method, threshold=2):
617
 
618
  return topic_errors
619
 
620
- def get_personal_scaffold(model, topic_vis_method, n_topics=200, n=5):
621
  threshold = utils.get_toxic_threshold()
622
 
623
  # Get topics with greatest amount of error
624
- with open(f"./data/preds_dfs/{model}.pkl", "rb") as f:
 
625
  preds_df = pickle.load(f)
626
- preds_df_mod = preds_df.merge(utils.get_comments_grouped_full_topic_cat(), on="item_id", how="left", suffixes=('_', '_avg'))
627
- preds_df_mod = preds_df_mod[preds_df_mod["user_id"] == "A"].sort_values(by=["item_id"]).reset_index()
628
- preds_df_mod = preds_df_mod[preds_df_mod["topic_id_"] < n_topics]
629
 
630
  if topic_vis_method == "median":
631
- df = preds_df_mod.groupby(["topic_", "user_id"]).median().reset_index()
632
  elif topic_vis_method == "mean":
633
- df = preds_df_mod.groupby(["topic_", "user_id"]).mean().reset_index()
634
  elif topic_vis_method == "fp_fn":
635
  for error_type in ["fn_proportion", "fp_proportion"]:
636
  topic_errors = get_topic_errors(preds_df_mod, error_type)
637
- preds_df_mod[error_type] = [topic_errors[topic] for topic in preds_df_mod["topic_"].tolist()]
638
- df = preds_df_mod.groupby(["topic_", "user_id"]).mean().reset_index()
639
  else:
640
  # Get error for each topic
641
  topic_errors = get_topic_errors(preds_df_mod, topic_vis_method)
642
- preds_df_mod[topic_vis_method] = [topic_errors[topic] for topic in preds_df_mod["topic_"].tolist()]
643
- df = preds_df_mod.groupby(["topic_", "user_id"]).mean().reset_index()
644
 
645
  # Get system error
646
- df = df[(df["topic_"] != "53_maiareficco_kallystas_dyisisitmanila_tractorsazi") & (df["topic_"] != "79_idiot_dumb_stupid_dumber")]
 
647
 
648
  if topic_vis_method == "median" or topic_vis_method == "mean":
649
- df["error_magnitude"] = [utils.get_error_magnitude(sys, user, threshold) for sys, user in zip(df["rating"].tolist(), df["pred"].tolist())]
650
- df["error_type"] = [utils.get_error_type_radio(sys, user, threshold) for sys, user in zip(df["rating"].tolist(), df["pred"].tolist())]
651
 
652
  df_under = df[df["error_type"] == "System is under-sensitive"]
653
  df_under = df_under.sort_values(by=["error_magnitude"], ascending=False).head(n) # surface largest errors first
654
- report_under = [get_empty_report(row["topic_"], row["error_type"]) for _, row in df_under.iterrows()]
655
 
656
  df_over = df[df["error_type"] == "System is over-sensitive"]
657
  df_over = df_over.sort_values(by=["error_magnitude"], ascending=False).head(n) # surface largest errors first
658
- report_over = [get_empty_report(row["topic_"], row["error_type"]) for _, row in df_over.iterrows()]
659
 
660
  # Set up reports
661
- # return [get_empty_report(row["topic_"], row["error_type"]) for index, row in df.iterrows()]
662
  reports = (report_under + report_over)
663
  random.shuffle(reports)
664
  elif topic_vis_method == "fp_fn":
665
  df_under = df.sort_values(by=["fn_proportion"], ascending=False).head(n)
666
  df_under = df_under[df_under["fn_proportion"] > 0]
667
- report_under = [get_empty_report(row["topic_"], "System is under-sensitive") for _, row in df_under.iterrows()]
 
 
668
 
669
  df_over = df.sort_values(by=["fp_proportion"], ascending=False).head(n)
670
  df_over = df_over[df_over["fp_proportion"] > 0]
671
- report_over = [get_empty_report(row["topic_"], "System is over-sensitive") for _, row in df_over.iterrows()]
 
 
672
 
673
  reports = (report_under + report_over)
674
  random.shuffle(reports)
675
  else:
676
  df = df.sort_values(by=[topic_vis_method], ascending=False).head(n * 2)
677
- df["error_type"] = [utils.get_error_type_radio(sys, user, threshold) for sys, user in zip(df["rating"].tolist(), df["pred"].tolist())]
678
- reports = [get_empty_report(row["topic_"], row["error_type"]) for _, row in df.iterrows()]
679
 
680
  return reports
681
 
@@ -718,78 +638,88 @@ def get_prompts_scaffold():
718
  },
719
  ]
720
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
721
  ########################################
722
  # ROUTE: /SAVE_REPORTS
723
  @app.route("/save_reports")
724
- def save_reports():
725
  cur_user = request.args.get("cur_user")
726
  reports_json = request.args.get("reports")
727
  reports = json.loads(reports_json)
728
- scaffold_method = request.args.get("scaffold_method")
729
 
730
- # Save reports for current user to stored files
731
- report_dir = f"./data/user_reports"
732
- # Save to pickle file
733
- with open(os.path.join(report_dir, f"{cur_user}_{scaffold_method}.pkl"), "wb") as f:
734
- pickle.dump(reports, f)
735
 
736
  results = {
737
  "status": "success",
738
  }
 
 
739
  return json.dumps(results)
740
 
741
  ########################################
742
- # ROUTE: /GET_EXPLORE_EXAMPLES
743
- @app.route("/get_explore_examples")
744
- def get_explore_examples():
745
- threshold = utils.get_toxic_threshold()
746
- n_examples = int(request.args.get("n_examples"))
747
-
748
- # Get sample of examples
749
- df = utils.get_comments_grouped_full_topic_cat().sample(n=n_examples)
750
 
751
- df["system_decision"] = [utils.get_decision(rating, threshold) for rating in df["rating"].tolist()]
752
- df["system_color"] = [utils.get_user_color(sys, threshold) for sys in df["rating"].tolist()] # get cell colors
753
 
754
- ex_json = df.to_json(orient="records")
 
755
 
756
  results = {
757
- "examples": ex_json,
758
  }
759
  return json.dumps(results)
760
 
761
  ########################################
762
- # ROUTE: /GET_RESULTS
763
- @app.route("/get_results")
764
- def get_results():
765
- users = request.args.get("users")
766
- if users != "":
767
- users = users.split(",")
768
- # print("users", users)
769
-
770
- IGNORE_LIST = ["DemoUser"]
771
- report_dir = f"./data/user_reports"
772
-
773
 
774
- # For each user, get personal and prompt results
775
- # Get links to label pages and audit pages
776
- results = []
777
- for user in users:
778
- if user not in IGNORE_LIST:
779
- user_results = {}
780
- user_results["user"] = user
781
- for scaffold_method in ["personal", "personal_group", "prompts"]:
782
- # Get results
783
- user_file = os.path.join(report_dir, f"{user}_{scaffold_method}.pkl")
784
- if os.path.isfile(user_file):
785
- with open(user_file, "rb") as f:
786
- user_results[scaffold_method] = pickle.load(f)
787
- results.append(user_results)
788
-
789
- # print("results", results)
790
 
791
  results = {
792
- "results": results,
793
  }
794
  return json.dumps(results)
795
 
 
17
  import altair as alt
18
  import matplotlib.pyplot as plt
19
  import time
20
+ import friendlywords as fw
21
 
22
  import audit_utils as utils
23
 
24
+ import requests
25
+
26
+
27
  app = Flask(__name__)
28
+ DEBUG = False # Debug flag for development; set to False for production
29
 
30
  # Path for our main Svelte page
31
  @app.route("/")
 
40
 
41
  ########################################
42
  # ROUTE: /AUDIT_SETTINGS
 
43
 
44
  @app.route("/audit_settings")
45
+ def audit_settings(debug=DEBUG):
46
  # Fetch page content
47
  user = request.args.get("user")
48
  scaffold_method = request.args.get("scaffold_method")
49
 
50
+ # Assign user ID if none is provided (default case)
51
+ if user == "null":
52
+ # Generate random two-word user ID
53
+ user = fw.generate(2, separator="_")
54
+
55
+ user_models = utils.get_user_model_names(user)
56
  grp_models = [m for m in user_models if m.startswith(f"model_{user}_group_")]
57
 
58
  clusters = utils.get_unique_topics()
 
75
  "options": [{"value": i, "text": cluster} for i, cluster in enumerate(clusters)],
76
  },]
77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  clusters_for_tuning = utils.get_large_clusters(min_n=150)
79
  clusters_for_tuning_options = [{"value": i, "text": cluster} for i, cluster in enumerate(clusters_for_tuning)] # Format for Svelecte UI element
80
 
 
82
  "personalized_models": user_models,
83
  "personalized_model_grp": grp_models,
84
  "perf_metrics": ["Average rating difference", "Mean Absolute Error (MAE)", "Root Mean Squared Error (RMSE)", "Mean Squared Error (MSE)"],
 
85
  "clusters": clusters_options,
86
  "clusters_for_tuning": clusters_for_tuning_options,
87
+ "user": user,
88
  }
89
  return json.dumps(context)
90
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
  ########################################
93
  # ROUTE: /GET_AUDIT
94
  @app.route("/get_audit")
95
  def get_audit():
96
  pers_model = request.args.get("pers_model")
 
 
 
 
97
  error_type = request.args.get("error_type")
98
  cur_user = request.args.get("cur_user")
99
  topic_vis_method = request.args.get("topic_vis_method")
100
  if topic_vis_method == "null":
101
  topic_vis_method = "median"
102
 
103
+ if pers_model == "" or pers_model == "null" or pers_model == "undefined":
104
+ overall_perf = None
 
 
105
  else:
106
+ overall_perf = utils.show_overall_perf(
107
+ cur_model=pers_model,
108
+ error_type=error_type,
109
+ cur_user=cur_user,
110
+ topic_vis_method=topic_vis_method,
111
+ )
 
 
 
112
 
113
  results = {
114
  "overall_perf": overall_perf,
 
118
  ########################################
119
  # ROUTE: /GET_CLUSTER_RESULTS
120
  @app.route("/get_cluster_results")
121
+ def get_cluster_results(debug=DEBUG):
122
  pers_model = request.args.get("pers_model")
123
+ cur_user = request.args.get("cur_user")
124
  cluster = request.args.get("cluster")
 
 
125
  topic_df_ids = request.args.getlist("topic_df_ids")
126
  topic_df_ids = [int(val) for val in topic_df_ids[0].split(",") if val != ""]
127
  search_type = request.args.get("search_type")
128
  keyword = request.args.get("keyword")
 
 
 
 
129
  error_type = request.args.get("error_type")
130
  use_model = request.args.get("use_model") == "true"
 
 
131
 
132
+ if debug:
133
+ print(f"get_cluster_results using model {pers_model}")
 
 
 
 
 
 
 
 
134
 
135
+ # Prepare cluster df (topic_df)
136
  topic_df = None
137
+ preds_file = utils.get_preds_file(cur_user, pers_model)
138
+ with open(preds_file, "rb") as f:
139
+ topic_df = pickle.load(f)
140
+ if search_type == "cluster":
141
+ # Display examples with comment, your pred, and other users' pred
142
+ topic_df = topic_df[(topic_df["topic"] == cluster) | (topic_df["item_id"].isin(topic_df_ids))]
143
+ elif search_type == "keyword":
144
+ topic_df = topic_df[(topic_df["comment"].str.contains(keyword, case=False, regex=False)) | (topic_df["item_id"].isin(topic_df_ids))]
145
+
 
 
 
 
 
 
 
 
 
 
 
 
146
  topic_df = topic_df.drop_duplicates()
147
+ if debug:
148
+ print("len topic_df", len(topic_df))
149
 
150
  # Handle empty results
151
  if len(topic_df) == 0:
 
166
 
167
  topic_df_ids = topic_df["item_id"].unique().tolist()
168
 
169
+ # Prepare overview plot for the cluster
170
+ if use_model:
171
+ # Display results with the model as a reference point
172
+ cluster_overview_plot_json, sampled_df = utils.plot_overall_vis_cluster(cur_user, topic_df, error_type=error_type, n_comments=500)
173
  else:
174
+ # Display results without a model
175
+ cluster_overview_plot_json, sampled_df = utils.plot_overall_vis_cluster_no_model(cur_user, topic_df, n_comments=500)
176
 
177
+ cluster_comments = utils.get_cluster_comments(sampled_df,error_type=error_type, use_model=use_model) # New version of cluster comment table
178
 
179
  results = {
180
  "topic_df_ids": topic_df_ids,
181
  "cluster_overview_plot_json": json.loads(cluster_overview_plot_json),
182
+ "cluster_comments": cluster_comments.to_json(orient="records"),
183
  }
184
  return json.dumps(results)
185
 
 
206
  ########################################
207
  # ROUTE: /GET_GROUP_MODEL
208
  @app.route("/get_group_model")
209
+ def get_group_model(debug=DEBUG):
210
  # Fetch info for initial labeling component
211
  model_name = request.args.get("model_name")
212
  user = request.args.get("user")
 
226
  grp_ids = grp_df["worker_id"].tolist()
227
 
228
  ratings_grp = utils.get_grp_model_labels(
 
229
  n_label_per_bin=BIN_DISTRIB,
230
  score_bins=SCORE_BINS,
231
  grp_ids=grp_ids,
232
  )
233
 
 
 
234
  # Modify model name
235
  model_name = f"{model_name}_group_gender{sel_gender}_relig{sel_relig}_pol{sel_pol}_race{sel_race_orig}_lgbtq_{sel_lgbtq}"
236
+ utils.setup_user_model_dirs(user, model_name)
 
 
 
 
 
237
 
238
  # Train group model
239
+ mae, mse, rmse, avg_diff, ratings_prev = utils.train_updated_model(model_name, ratings_grp, user)
240
 
241
  duration = time.time() - start
242
+ if debug:
243
+ print("Time to train/cache:", duration)
244
 
245
  context = {
246
  "group_size": group_size,
 
258
  clusters_for_tuning = utils.get_large_clusters(min_n=150)
259
  clusters_for_tuning_options = [{"value": i, "text": cluster} for i, cluster in enumerate(clusters_for_tuning)] # Format for Svelecte UI element
260
 
261
+ model_name_suggestion = f"my_model"
 
262
 
263
  context = {
264
+ "personalized_models": utils.get_user_model_names(user),
265
  "model_name_suggestion": model_name_suggestion,
266
  "clusters_for_tuning": clusters_for_tuning_options,
267
  }
 
269
 
270
  ########################################
271
  # ROUTE: /GET_COMMENTS_TO_LABEL
272
+ if DEBUG:
273
+ BIN_DISTRIB = [1, 2, 4, 2, 1] # 10 comments
274
+ else:
275
+ BIN_DISTRIB = [2, 4, 8, 4, 2] # 20 comments
276
  SCORE_BINS = [(0.0, 0.5), (0.5, 1.5), (1.5, 2.5), (2.5, 3.5), (3.5, 4.01)]
277
  @app.route("/get_comments_to_label")
278
  def get_comments_to_label():
279
  n = int(request.args.get("n"))
280
  # Fetch examples to label
281
  to_label_ids = utils.create_example_sets(
 
282
  n_label_per_bin=BIN_DISTRIB,
283
  score_bins=SCORE_BINS,
284
  keyword=None
 
295
 
296
  ########################################
297
  # ROUTE: /GET_COMMENTS_TO_LABEL_TOPIC
 
298
  @app.route("/get_comments_to_label_topic")
299
  def get_comments_to_label_topic():
300
  # Fetch examples to label
301
  topic = request.args.get("topic")
302
  to_label_ids = utils.create_example_sets(
 
 
303
  n_label_per_bin=BIN_DISTRIB,
304
  score_bins=SCORE_BINS,
305
  keyword=None,
 
316
  ########################################
317
  # ROUTE: /GET_PERSONALIZED_MODEL
318
  @app.route("/get_personalized_model")
319
+ def get_personalized_model(debug=DEBUG):
320
  model_name = request.args.get("model_name")
321
  ratings_json = request.args.get("ratings")
322
  mode = request.args.get("mode")
323
  user = request.args.get("user")
324
  ratings = json.loads(ratings_json)
325
+ if debug:
326
+ print(ratings)
327
+ start = time.time()
328
 
329
+ utils.setup_user_model_dirs(user, model_name)
 
 
 
 
330
 
331
  # Handle existing or new model cases
332
  if mode == "view":
333
  # Fetch prior model performance
334
+ mae, mse, rmse, avg_diff, ratings_prev = utils.fetch_existing_data(user, model_name)
 
 
 
335
 
336
  elif mode == "train":
337
  # Train model and cache predictions using new labels
338
  print("get_personalized_model train")
339
+ mae, mse, rmse, avg_diff, ratings_prev = utils.train_updated_model(model_name, ratings, user)
340
+
341
+ if debug:
342
+ duration = time.time() - start
343
+ print("Time to train/cache:", duration)
344
 
345
+ perf_plot, mae_status = utils.plot_train_perf_results(user, model_name, mae)
346
  perf_plot_json = perf_plot.to_json()
347
 
348
  def round_metric(x):
 
355
  "mse": round_metric(mse),
356
  "rmse": round_metric(rmse),
357
  "avg_diff": round_metric(avg_diff),
 
358
  "ratings_prev": ratings_prev,
359
  "perf_plot_json": json.loads(perf_plot_json),
360
  }
 
364
  ########################################
365
  # ROUTE: /GET_PERSONALIZED_MODEL_TOPIC
366
  @app.route("/get_personalized_model_topic")
367
+ def get_personalized_model_topic(debug=DEBUG):
368
  model_name = request.args.get("model_name")
369
  ratings_json = request.args.get("ratings")
370
  user = request.args.get("user")
371
  ratings = json.loads(ratings_json)
372
  topic = request.args.get("topic")
373
+ if debug:
374
+ print(ratings)
375
  start = time.time()
376
 
377
  # Modify model name
378
  model_name = f"{model_name}_{topic}"
379
+ utils.setup_user_model_dirs(user, model_name)
 
 
 
 
 
380
 
381
  # Handle existing or new model cases
382
  # Train model and cache predictions using new labels
383
+ if debug:
384
+ print("get_personalized_model_topic train")
385
+ mae, mse, rmse, avg_diff, ratings_prev = utils.train_updated_model(model_name, ratings, user, topic=topic)
386
 
387
+ if debug:
388
+ duration = time.time() - start
389
+ print("Time to train/cache:", duration)
 
 
390
 
391
  results = {
392
  "success": "success",
 
407
  if topic_vis_method == "null":
408
  topic_vis_method = "fp_fn"
409
 
410
+ # Load reports for current user from stored file
411
+ reports_file = utils.get_reports_file(cur_user, model)
412
+ if not os.path.isfile(reports_file):
 
 
413
  if scaffold_method == "fixed":
414
  reports = get_fixed_scaffold()
415
  elif (scaffold_method == "personal" or scaffold_method == "personal_group" or scaffold_method == "personal_test"):
416
+ reports = get_personal_scaffold(cur_user, model, topic_vis_method)
 
 
417
  elif scaffold_method == "prompts":
418
  reports = get_prompts_scaffold()
419
  elif scaffold_method == "tutorial":
 
431
  ]
432
  else:
433
  # Load from pickle file
434
+ with open(reports_file, "rb") as f:
435
+ reports = json.load(f)
436
 
437
  results = {
438
  "reports": reports,
 
498
  },
499
  ]
500
 
 
 
 
 
 
 
 
 
 
 
501
  def get_topic_errors(df, topic_vis_method, threshold=2):
502
+ topics = df["topic"].unique().tolist()
503
  topic_errors = {}
504
  for topic in topics:
505
+ t_df = df[df["topic"] == topic]
506
+ y_true = t_df["pred"].to_numpy() # Predicted user rating (treated as ground truth)
507
+ y_pred = t_df["rating_sys"].to_numpy() # System rating (which we're auditing)
508
  if topic_vis_method == "mae":
509
  t_err = mean_absolute_error(y_true, y_pred)
510
  elif topic_vis_method == "mse":
 
512
  elif topic_vis_method == "avg_diff":
513
  t_err = np.mean(y_true - y_pred)
514
  elif topic_vis_method == "fp_proportion":
515
+ y_true = [0 if rating < threshold else 1 for rating in y_true]
516
+ y_pred = [0 if rating < threshold else 1 for rating in y_pred]
517
  try:
518
  tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
519
  except:
 
521
  total = float(len(y_true))
522
  t_err = fp / total
523
  elif topic_vis_method == "fn_proportion":
524
+ y_true = [0 if rating < threshold else 1 for rating in y_true]
525
+ y_pred = [0 if rating < threshold else 1 for rating in y_pred]
526
  try:
527
  tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
528
  except:
 
533
 
534
  return topic_errors
535
 
536
+ def get_personal_scaffold(cur_user, model, topic_vis_method, n_topics=200, n=5, debug=DEBUG):
537
  threshold = utils.get_toxic_threshold()
538
 
539
  # Get topics with greatest amount of error
540
+ preds_file = utils.get_preds_file(cur_user, model)
541
+ with open(preds_file, "rb") as f:
542
  preds_df = pickle.load(f)
543
+ preds_df_mod = preds_df[preds_df["user_id"] == cur_user].sort_values(by=["item_id"]).reset_index()
544
+ preds_df_mod = preds_df_mod[preds_df_mod["topic_id"] < n_topics]
 
545
 
546
  if topic_vis_method == "median":
547
+ df = preds_df_mod.groupby(["topic", "user_id"]).median().reset_index()
548
  elif topic_vis_method == "mean":
549
+ df = preds_df_mod.groupby(["topic", "user_id"]).mean().reset_index()
550
  elif topic_vis_method == "fp_fn":
551
  for error_type in ["fn_proportion", "fp_proportion"]:
552
  topic_errors = get_topic_errors(preds_df_mod, error_type)
553
+ preds_df_mod[error_type] = [topic_errors[topic] for topic in preds_df_mod["topic"].tolist()]
554
+ df = preds_df_mod.groupby(["topic", "user_id"]).mean().reset_index()
555
  else:
556
  # Get error for each topic
557
  topic_errors = get_topic_errors(preds_df_mod, topic_vis_method)
558
+ preds_df_mod[topic_vis_method] = [topic_errors[topic] for topic in preds_df_mod["topic"].tolist()]
559
+ df = preds_df_mod.groupby(["topic", "user_id"]).mean().reset_index()
560
 
561
  # Get system error
562
+ junk_topics = ["53_maiareficco_kallystas_dyisisitmanila_tractorsazi", "-1_dude_bullshit_fight_ain"]
563
+ df = df[~df["topic"].isin(junk_topics)] # Exclude known "junk topics"
564
 
565
  if topic_vis_method == "median" or topic_vis_method == "mean":
566
+ df["error_magnitude"] = [utils.get_error_magnitude(sys, user, threshold) for sys, user in zip(df["rating_sys"].tolist(), df["pred"].tolist())]
567
+ df["error_type"] = [utils.get_error_type_radio(sys, user, threshold) for sys, user in zip(df["rating_sys"].tolist(), df["pred"].tolist())]
568
 
569
  df_under = df[df["error_type"] == "System is under-sensitive"]
570
  df_under = df_under.sort_values(by=["error_magnitude"], ascending=False).head(n) # surface largest errors first
571
+ report_under = [get_empty_report(row["topic"], row["error_type"]) for _, row in df_under.iterrows()]
572
 
573
  df_over = df[df["error_type"] == "System is over-sensitive"]
574
  df_over = df_over.sort_values(by=["error_magnitude"], ascending=False).head(n) # surface largest errors first
575
+ report_over = [get_empty_report(row["topic"], row["error_type"]) for _, row in df_over.iterrows()]
576
 
577
  # Set up reports
 
578
  reports = (report_under + report_over)
579
  random.shuffle(reports)
580
  elif topic_vis_method == "fp_fn":
581
  df_under = df.sort_values(by=["fn_proportion"], ascending=False).head(n)
582
  df_under = df_under[df_under["fn_proportion"] > 0]
583
+ if debug:
584
+ print(df_under[["topic", "fn_proportion"]])
585
+ report_under = [get_empty_report(row["topic"], "System is under-sensitive") for _, row in df_under.iterrows()]
586
 
587
  df_over = df.sort_values(by=["fp_proportion"], ascending=False).head(n)
588
  df_over = df_over[df_over["fp_proportion"] > 0]
589
+ if debug:
590
+ print(df_over[["topic", "fp_proportion"]])
591
+ report_over = [get_empty_report(row["topic"], "System is over-sensitive") for _, row in df_over.iterrows()]
592
 
593
  reports = (report_under + report_over)
594
  random.shuffle(reports)
595
  else:
596
  df = df.sort_values(by=[topic_vis_method], ascending=False).head(n * 2)
597
+ df["error_type"] = [utils.get_error_type_radio(sys, user, threshold) for sys, user in zip(df["rating_sys"].tolist(), df["pred"].tolist())]
598
+ reports = [get_empty_report(row["topic"], row["error_type"]) for _, row in df.iterrows()]
599
 
600
  return reports
601
 
 
638
  },
639
  ]
640
 
641
+ # Filter to eligible reports: those that have been marked complete and include at least one piece of evidence.
642
+ def get_eligible_reports(reports):
643
+ eligible_reports = []
644
+ for r in reports:
645
+ if (r["complete_status"] == True) and (len(r["evidence"]) > 0):
646
+ eligible_reports.append(r)
647
+ return eligible_reports
648
+
649
+ # Submit all reports to AVID
650
+ # Logs the responses
651
+ def submit_reports_to_AVID(reports, cur_user, email, sep_selection, debug=DEBUG):
652
+ # Set up the connection to AVID
653
+ root = os.environ.get('AVID_API_URL')
654
+ api_key = os.environ.get('AVID_API_KEY')
655
+ key = {"Authorization": api_key}
656
+
657
+ reports = get_eligible_reports(reports)
658
+ if debug:
659
+ print("Num eligible reports:", len(reports))
660
+
661
+ for r in reports:
662
+ new_report = utils.convert_indie_label_json_to_avid_json(r, cur_user, email, sep_selection)
663
+ url = root + "submit"
664
+ response = requests.post(url, json=json.loads(new_report), headers=key) # The loads ensures type compliance
665
+ uuid = response.json()
666
+ if debug:
667
+ print("Report", new_report)
668
+ print("AVID API response:", response, uuid)
669
+
670
  ########################################
671
  # ROUTE: /SAVE_REPORTS
672
  @app.route("/save_reports")
673
+ def save_reports(debug=DEBUG):
674
  cur_user = request.args.get("cur_user")
675
  reports_json = request.args.get("reports")
676
  reports = json.loads(reports_json)
677
+ model = request.args.get("model")
678
 
679
+ # Save reports for current user to file
680
+ reports_file = utils.get_reports_file(cur_user, model)
681
+ with open(reports_file, "w", encoding ='utf8') as f:
682
+ json.dump(reports, f)
 
683
 
684
  results = {
685
  "status": "success",
686
  }
687
+ if debug:
688
+ print(results)
689
  return json.dumps(results)
690
 
691
  ########################################
692
+ # ROUTE: /SUBMIT_AVID_REPORT
693
+ @app.route("/submit_avid_report")
694
+ def submit_avid_report():
695
+ cur_user = request.args.get("cur_user")
696
+ email = request.args.get("email")
697
+ sep_selection = request.args.get("sep_selection")
698
+ reports_json = request.args.get("reports")
 
699
 
700
+ reports = json.loads(reports_json)
 
701
 
702
+ # Submit reports to AVID
703
+ submit_reports_to_AVID(reports, cur_user, email, sep_selection)
704
 
705
  results = {
706
+ "status": "success",
707
  }
708
  return json.dumps(results)
709
 
710
  ########################################
711
+ # ROUTE: /GET_EXPLORE_EXAMPLES
712
+ @app.route("/get_explore_examples")
713
+ def get_explore_examples():
714
+ threshold = utils.get_toxic_threshold()
715
+ n_examples = int(request.args.get("n_examples"))
 
 
 
 
 
 
716
 
717
+ # Get sample of examples
718
+ df = utils.get_explore_df(n_examples, threshold)
719
+ ex_json = df.to_json(orient="records")
 
 
 
 
 
 
 
 
 
 
 
 
 
720
 
721
  results = {
722
+ "examples": ex_json,
723
  }
724
  return json.dumps(results)
725