Spaces:

avid-ml
/

indie-label

Runtime error

App Files Files Community

carolanderson commited on Jan 30

Commit

dd9e64d

•

2 Parent(s): edd4636 1882b75

merge changes in user sessions and AVID reporting

Browse files

Files changed (25) hide show

.gitignore +5 -0
audit_utils.py +378 -729
indie_label_svelte/public/global.css +16 -5
indie_label_svelte/src/App.svelte +4 -57
indie_label_svelte/src/AppOld.svelte +0 -127
indie_label_svelte/src/Auditing.svelte +28 -53
indie_label_svelte/src/ClusterResults.svelte +7 -5
indie_label_svelte/src/CommentTable.svelte +16 -14
indie_label_svelte/src/Explore.svelte +0 -1
indie_label_svelte/src/Hunch.svelte +0 -26
indie_label_svelte/src/HypothesisPanel.svelte +282 -307
indie_label_svelte/src/IterativeClustering.svelte +0 -164
indie_label_svelte/src/KeywordSearch.svelte +3 -10
indie_label_svelte/src/Labeling.svelte +9 -18
indie_label_svelte/src/MainPanel.svelte +11 -36
indie_label_svelte/src/ModelPerf.svelte +14 -55
indie_label_svelte/src/OverallResults.svelte +0 -79
indie_label_svelte/src/Results.svelte +0 -206
indie_label_svelte/src/SelectUserDialog.svelte +0 -66
indie_label_svelte/src/StudyLinks.svelte +0 -59
indie_label_svelte/src/SubmitReportDialog.svelte +120 -0
indie_label_svelte/src/TopicTraining.svelte +1 -9
indie_label_svelte/src/stores/all_users_store.js +0 -6
indie_label_svelte/src/stores/cur_user_store.js +0 -3
server.py +174 -244

.gitignore ADDED Viewed

	@@ -0,0 +1,5 @@

+__pycache__/
+.DS_Store
+data/
+test_nbs/
+data_zips/

audit_utils.py CHANGED Viewed

@@ -23,6 +23,7 @@ import time
 from sentence_transformers import SentenceTransformer, util
 import torch
 from bertopic import BERTopic
 ########################################
 # PRE-LOADING
@@ -37,61 +38,39 @@ alt.renderers.enable('altair_saver', fmts=['vega-lite', 'png'])
 # Data-loading
 module_dir = "./"
-perf_dir = f"data/perf/"
-# # TEMP reset
-# with open(os.path.join(module_dir, "./data/all_model_names.pkl"), "wb") as f:
-#     all_model_names = []
-#     pickle.dump(all_model_names, f)
-# with open(f"./data/users_to_models.pkl", "wb") as f:
-#     users_to_models = {}
-#     pickle.dump(users_to_models, f)
-with open(os.path.join(module_dir, "data/ids_to_comments.pkl"), "rb") as f:
     ids_to_comments = pickle.load(f)
-with open(os.path.join(module_dir, "data/comments_to_ids.pkl"), "rb") as f:
     comments_to_ids = pickle.load(f)
-all_model_names = sorted([name for name in os.listdir(os.path.join(perf_dir)) if os.path.isdir(os.path.join(perf_dir, name))])
-comments_grouped_full_topic_cat = pd.read_pickle("data/comments_grouped_full_topic_cat2_persp.pkl")
-sys_eval_df = pd.read_pickle(os.path.join(module_dir, "data/split_data/sys_eval_df.pkl"))
-train_df = pd.read_pickle(os.path.join(module_dir, "data/split_data/train_df.pkl"))
 train_df_ids = train_df["item_id"].unique().tolist()
-model_eval_df = pd.read_pickle(os.path.join(module_dir, "data/split_data/model_eval_df.pkl"))
-ratings_df_full = pd.read_pickle(os.path.join(module_dir, "data/ratings_df_full.pkl"))
-worker_info_df = pd.read_pickle("./data/worker_info_df.pkl")
-with open(f"./data/users_to_models.pkl", "rb") as f:
-    users_to_models = pickle.load(f)
-with open("data/perf_1000_topics.pkl", "rb") as f:
-    perf_1000_topics = pickle.load(f)
-with open("data/perf_1000_tox_cat.pkl", "rb") as f:
-    perf_1000_tox_cat = pickle.load(f)
-with open("data/perf_1000_tox_severity.pkl", "rb") as f:
-    perf_1000_tox_severity = pickle.load(f)
-with open("data/user_perf_metrics.pkl", "rb") as f:
-    user_perf_metrics = pickle.load(f)
-topic_ids = comments_grouped_full_topic_cat.topic_id
-topics = comments_grouped_full_topic_cat.topic
 topic_ids_to_topics = {topic_ids[i]: topics[i] for i in range(len(topic_ids))}
 topics_to_topic_ids = {topics[i]: topic_ids[i] for i in range(len(topic_ids))}
-unique_topics_ids = sorted(comments_grouped_full_topic_cat.topic_id.unique())
 unique_topics = [topic_ids_to_topics[topic_id] for topic_id in range(len(topic_ids_to_topics) - 1)]
 def get_toxic_threshold():
     return TOXIC_THRESHOLD
-def get_all_model_names(user=None):
-    if (user is None) or (user not in users_to_models):
-        all_model_names = sorted([name for name in os.listdir(os.path.join(perf_dir)) if os.path.isdir(os.path.join(perf_dir, name))])
-        return all_model_names
     else:
-        # Fetch the user's models
-        user_models = users_to_models[user]
         user_models.sort()
         return user_models
@@ -99,7 +78,7 @@ def get_unique_topics():
     return unique_topics
 def get_large_clusters(min_n):
-    counts_df = comments_grouped_full_topic_cat.groupby(by=["topic_id"]).size().reset_index(name='counts')
     counts_df = counts_df[counts_df["counts"] >= min_n]
     return [topic_ids_to_topics[t_id] for t_id in sorted(counts_df["topic_id"].tolist()[1:])]
@@ -137,32 +116,64 @@ readable_to_internal = {
 }
 internal_to_readable = {v: k for k, v in readable_to_internal.items()}
-# Embeddings for neighbor retrieval
-model_name = "paraphrase-MiniLM-L6-v2"
-model = SentenceTransformer(model_name)
-with open("./data/comments.pkl", "rb") as f:
-    comments = pickle.load(f)
-embeddings = torch.load("./data/embeddings/21_10_embeddings.pt")
-# Perspective API recalibration
-def recalib_v1(s):
-    # convert Perspective score to 0-4 toxicity score
-    # map 0 persp to 0 (not at all toxic); 0.5 persp to 1 (slightly toxic), 1.0 persp to 4 (extremely toxic)
-    if s < 0.5:
-        return (s * 2.)
-    else:
-        return ((s - 0.5) * 6.) + 1
-def recalib_v2(s):
-    # convert Perspective score to 0-4 toxicity score
-    # just 4x the perspective score
-    return (s * 4.)
-comments_grouped_full_topic_cat["rating_avg_orig"] = comments_grouped_full_topic_cat["rating"]
-comments_grouped_full_topic_cat["rating"] = [recalib_v2(score) for score in comments_grouped_full_topic_cat["persp_score"].tolist()]
-def get_comments_grouped_full_topic_cat():
-    return comments_grouped_full_topic_cat
 ########################################
 # General utils
@@ -192,22 +203,6 @@ def my_bootstrap(vals, n_boot, alpha):
 ########################################
 # GET_AUDIT utils
-def other_users_perf(perf_metrics, metric, user_metric, alpha=0.95, n_boot=501):
-    ind = get_metric_ind(metric)
-    metric_vals = [metric_vals[ind] for metric_vals in perf_metrics.values()]
-    metric_avg = np.median(metric_vals)
-    # Future: use provided sample to perform bootstrap sampling
-    ci_1 = mne.stats.bootstrap_confidence_interval(np.array(metric_vals), ci=alpha, n_bootstraps=n_boot, stat_fun="median")
-    bs_samples, ci = my_bootstrap(metric_vals, n_boot, alpha)
-    # Get user's percentile
-    percentile = stats.percentileofscore(bs_samples, user_metric)
-    return metric_avg, ci, percentile, metric_vals
 def plot_metric_histogram(metric, user_metric, other_metric_vals, n_bins=10):
     hist, bin_edges = np.histogram(other_metric_vals, bins=n_bins, density=False)
     data = pd.DataFrame({
@@ -239,394 +234,38 @@ def plot_metric_histogram(metric, user_metric, other_metric_vals, n_bins=10):
     return (bar + rule).interactive()
-def get_toxicity_severity_bins(perf_metric, user_df, other_dfs, bins=BINS, bin_labels=BIN_LABELS, ci=0.95, n_boot=501):
-    # Note: not using other_dfs anymore
-    y_user = []
-    y_other = []
-    used_bins = []
-    other_ci_low = []
-    other_ci_high = []
-    for severity_i in range(len(bin_labels)):
-        metric_others = [metrics[get_metric_ind(perf_metric)] for metrics in perf_1000_tox_severity[severity_i].values() if metrics[get_metric_ind(perf_metric)]]
-        ci_low, ci_high = mne.stats.bootstrap_confidence_interval(np.array(metric_others), ci=ci, n_bootstraps=n_boot, stat_fun='median')
-        metric_other = np.median(metric_others)
-        cur_user_df = user_df[user_df["prediction_bin"] == severity_i]
-        y_true_user = cur_user_df.pred.to_numpy()  # user's label
-        y_pred = cur_user_df.rating_avg.to_numpy()  # system's label (avg)
-        if len(y_true_user) > 0:
-            used_bins.append(bin_labels[severity_i])
-            metric_user = calc_metric_user(y_true_user, y_pred, perf_metric)
-            y_user.append(metric_user)
-            y_other.append(metric_other)
-            other_ci_low.append(ci_low)
-            other_ci_high.append(ci_high)
-    return y_user, y_other, used_bins, other_ci_low, other_ci_high
-def get_topic_bins(perf_metric, user_df, other_dfs, n_topics, ci=0.95, n_boot=501):
-    # Note: not using other_dfs anymore
-    y_user = []
-    y_other = []
-    used_bins = []
-    other_ci_low = []
-    other_ci_high = []
-    selected_topics = unique_topics_ids[1:(n_topics + 1)]
-    for topic_id in selected_topics:
-        cur_topic = topic_ids_to_topics[topic_id]
-        metric_others = [metrics[get_metric_ind(perf_metric)] for metrics in perf_1000_topics[topic_id].values() if metrics[get_metric_ind(perf_metric)]]
-        ci_low, ci_high = mne.stats.bootstrap_confidence_interval(np.array(metric_others), ci=ci, n_bootstraps=n_boot, stat_fun='median')
-        metric_other = np.median(metric_others)
-        cur_user_df = user_df[user_df["topic"] == cur_topic]
-        y_true_user = cur_user_df.pred.to_numpy()  # user's label
-        y_pred = cur_user_df.rating_avg.to_numpy()  # system's label (avg)
-        if len(y_true_user) > 0:
-            used_bins.append(cur_topic)
-            metric_user = calc_metric_user(y_true_user, y_pred, perf_metric)
-            y_user.append(metric_user)
-            y_other.append(metric_other)
-            other_ci_low.append(ci_low)
-            other_ci_high.append(ci_high)
-    return y_user, y_other, used_bins, other_ci_low, other_ci_high
-def calc_metric_user(y_true_user, y_pred, perf_metric):
-    if perf_metric == "MAE":
-        metric_user = mean_absolute_error(y_true_user, y_pred)
-    elif perf_metric == "MSE":
-        metric_user = mean_squared_error(y_true_user, y_pred)
-    elif perf_metric == "RMSE":
-        metric_user = mean_squared_error(y_true_user, y_pred, squared=False)
-    elif perf_metric == "avg_diff":
-        metric_user = np.mean(y_true_user - y_pred)
-    return metric_user
-def get_toxicity_category_bins(perf_metric, user_df, other_dfs, threshold=0.5, ci=0.95, n_boot=501):
-    # Note: not using other_dfs anymore; threshold from pre-calculation is 0.5
-    cat_cols = ["is_profane_frac", "is_threat_frac", "is_identity_attack_frac", "is_insult_frac", "is_sexual_harassment_frac"]
-    cat_labels = ["Profanity", "Threats", "Identity Attacks", "Insults", "Sexual Harassment"]
-    y_user = []
-    y_other = []
-    used_bins = []
-    other_ci_low = []
-    other_ci_high = []
-    for i, cur_col_name in enumerate(cat_cols):
-        metric_others = [metrics[get_metric_ind(perf_metric)] for metrics in perf_1000_tox_cat[cur_col_name].values() if metrics[get_metric_ind(perf_metric)]]
-        ci_low, ci_high = mne.stats.bootstrap_confidence_interval(np.array(metric_others), ci=ci, n_bootstraps=n_boot, stat_fun='median')
-        metric_other = np.median(metric_others)
-        # Filter to rows where a comment received an average label >= the provided threshold for the category
-        cur_user_df = user_df[user_df[cur_col_name] >= threshold]
-        y_true_user = cur_user_df.pred.to_numpy()  # user's label
-        y_pred = cur_user_df.rating_avg.to_numpy()  # system's label (avg)
-        if len(y_true_user) > 0:
-            used_bins.append(cat_labels[i])
-            metric_user = calc_metric_user(y_true_user, y_pred, perf_metric)
-            y_user.append(metric_user)
-            y_other.append(metric_other)
-            other_ci_low.append(ci_low)
-            other_ci_high.append(ci_high)
-    return y_user, y_other, used_bins, other_ci_low, other_ci_high
-def plot_class_cond_results(preds_df, breakdown_axis, perf_metric, other_ids, sort_bars, n_topics, worker_id="A"):
-    # Note: preds_df already has binned results
-    # Prepare dfs
-    user_df = preds_df[preds_df.user_id == worker_id].sort_values(by=["item_id"]).reset_index()
-    other_dfs = [preds_df[preds_df.user_id == other_id].sort_values(by=["item_id"]).reset_index() for other_id in other_ids]
-    if breakdown_axis == "toxicity_severity":
-        y_user, y_other, used_bins, other_ci_low, other_ci_high = get_toxicity_severity_bins(perf_metric, user_df, other_dfs)
-    elif breakdown_axis == "topic":
-        y_user, y_other, used_bins, other_ci_low, other_ci_high = get_topic_bins(perf_metric, user_df, other_dfs, n_topics)
-    elif breakdown_axis == "toxicity_category":
-        y_user, y_other, used_bins, other_ci_low, other_ci_high = get_toxicity_category_bins(perf_metric, user_df, other_dfs)
-    diffs = list(np.array(y_user) - np.array(y_other))
-    # Generate bar chart
-    data = pd.DataFrame({
-        "metric_val": y_user + y_other,
-        "Labeler": ["You" for _ in range(len(y_user))] + ["Other users" for _ in range(len(y_user))],
-        "used_bins": used_bins + used_bins,
-        "diffs": diffs + diffs,
-        "lower_cis": y_user + other_ci_low,
-        "upper_cis": y_user + other_ci_high,
-    })
-    color_domain = ['You', 'Other users']
-    color_range = [YOUR_COLOR, OTHER_USERS_COLOR]
-    base = alt.Chart()
-    chart_title=f"{internal_to_readable[breakdown_axis]} Results"
-    x_axis = alt.X("Labeler:O", sort=("You", "Other users"), title=None, axis=None)
-    y_axis = alt.Y("metric_val:Q", title=internal_to_readable[perf_metric])
-    if sort_bars:
-        col_content = alt.Column("used_bins:O", sort=alt.EncodingSortField(field="diffs", op="mean", order='descending'))
-    else:
-        col_content = alt.Column("used_bins:O")
-    if n_topics is not None and n_topics > 10:
-        # Change to horizontal bar chart
-        bar = base.mark_bar(lineBreak="_").encode(
-            y=x_axis,
-            x=y_axis,
-            color=alt.Color("Labeler:O", scale=alt.Scale(domain=color_domain, range=color_range)),
-            tooltip=[
-                alt.Tooltip('Labeler:O', title='Labeler'),
-                alt.Tooltip('metric_val:Q', title=perf_metric, format=".3f"),
-            ]
-        )
-        error_bars = base.mark_errorbar().encode(
-            y=x_axis,
-            x = alt.X("lower_cis:Q", title=internal_to_readable[perf_metric]),
-            x2 = alt.X2("upper_cis:Q", title=None),
-            tooltip=[
-              alt.Tooltip('lower_cis:Q', title='Lower CI', format=".3f"),
-              alt.Tooltip('upper_cis:Q', title='Upper CI', format=".3f"),
-            ]
-        )
-        combined = alt.layer(
-            bar, error_bars, data=data
-        ).facet(
-            row=col_content
-        ).properties(
-            title=chart_title,
-        ).interactive()
     else:
-        bar = base.mark_bar(lineBreak="_").encode(
-            x=x_axis,
-            y=y_axis,
-            color=alt.Color("Labeler:O", scale=alt.Scale(domain=color_domain, range=color_range)),
-            tooltip=[
-                alt.Tooltip('Labeler:O', title='Labeler'),
-                alt.Tooltip('metric_val:Q', title=perf_metric, format=".3f"),
-            ]
-        )
-        error_bars = base.mark_errorbar().encode(
-            x=x_axis,
-            y = alt.Y("lower_cis:Q", title=internal_to_readable[perf_metric]),
-            y2 = alt.Y2("upper_cis:Q", title=None),
-            tooltip=[
-              alt.Tooltip('lower_cis:Q', title='Lower CI', format=".3f"),
-              alt.Tooltip('upper_cis:Q', title='Upper CI', format=".3f"),
-            ]
-        )
-        combined = alt.layer(
-            bar, error_bars, data=data
-        ).facet(
-            column=col_content
-        ).properties(
-            title=chart_title,
-        ).interactive()
-    return combined
-def show_overall_perf(variant, error_type, cur_user, threshold=TOXIC_THRESHOLD, breakdown_axis=None, topic_vis_method="median"):
-    # Your perf (calculate using model and testset)
-    breakdown_axis = readable_to_internal[breakdown_axis]
-    if breakdown_axis is not None:
-        with open(os.path.join(module_dir, f"data/preds_dfs/{variant}.pkl"), "rb") as f:
-            preds_df = pickle.load(f)
-        # Read from file
-        chart_dir = "./data/charts"
-        chart_file = os.path.join(chart_dir, f"{cur_user}_{variant}.pkl")
-        if os.path.isfile(chart_file):
-            with open(chart_file, "r") as f:
-                topic_overview_plot_json = json.load(f)
-        else:
-            preds_df_mod = preds_df.merge(comments_grouped_full_topic_cat, on="item_id", how="left", suffixes=('_', '_avg'))
-            if topic_vis_method == "median":
-                preds_df_mod_grp = preds_df_mod.groupby(["topic_", "user_id"]).median()
-            elif topic_vis_method == "mean":
-                preds_df_mod_grp = preds_df_mod.groupby(["topic_", "user_id"]).mean()
-            topic_overview_plot_json = plot_overall_vis(preds_df=preds_df_mod_grp, n_topics=200, threshold=threshold, error_type=error_type, cur_user=cur_user, cur_model=variant)
     return {
         "topic_overview_plot_json": json.loads(topic_overview_plot_json),
     }
-########################################
-# GET_CLUSTER_RESULTS utils
-def get_overall_perf3(preds_df, perf_metric, other_ids, worker_id="A"):
-    # Prepare dataset to calculate performance
-    # Note: true is user and pred is system
-    y_true = preds_df[preds_df["user_id"] == worker_id].pred.to_numpy()
-    y_pred_user = preds_df[preds_df["user_id"] == worker_id].rating_avg.to_numpy()
-    y_true_others = y_pred_others = [preds_df[preds_df["user_id"] == other_id].pred.to_numpy() for other_id in other_ids]
-    y_pred_others = [preds_df[preds_df["user_id"] == other_id].rating_avg.to_numpy() for other_id in other_ids]
-    # Get performance for user's model and for other users
-    if perf_metric == "MAE":
-        user_perf = mean_absolute_error(y_true, y_pred_user)
-        other_perfs = [mean_absolute_error(y_true_others[i], y_pred_others[i]) for i in range(len(y_true_others))]
-    elif perf_metric == "MSE":
-        user_perf = mean_squared_error(y_true, y_pred_user)
-        other_perfs = [mean_squared_error(y_true_others[i], y_pred_others[i]) for i in range(len(y_true_others))]
-    elif perf_metric == "RMSE":
-        user_perf = mean_squared_error(y_true, y_pred_user, squared=False)
-        other_perfs = [mean_squared_error(y_true_others[i], y_pred_others[i], squared=False) for i in range(len(y_true_others))]
-    elif perf_metric == "avg_diff":
-        user_perf = np.mean(y_true - y_pred_user)
-        other_perfs = [np.mean(y_true_others[i] - y_pred_others[i]) for i in range(len(y_true_others))]
-    other_perf = np.mean(other_perfs)  # average across all other users
-    return user_perf, other_perf
-def style_color_difference(row):
-    full_opacity_diff = 3.
-    pred_user_col = "Your predicted rating"
-    pred_other_col = "Other users' predicted rating"
-    pred_system_col = "Status-quo system rating"
-    diff_user = row[pred_user_col] - row[pred_system_col]
-    diff_other = row[pred_other_col] - row[pred_system_col]
-    red = "234, 133, 125"
-    green = "142, 205, 162"
-    bkgd_user = green if diff_user < 0 else red  # red if more toxic; green if less toxic
-    opac_user = min(abs(diff_user / full_opacity_diff), 1.)
-    bkgd_other = green if diff_other < 0 else red  # red if more toxic; green if less toxic
-    opac_other = min(abs(diff_other / full_opacity_diff), 1.)
-    return ["", f"background-color: rgba({bkgd_user}, {opac_user});", f"background-color: rgba({bkgd_other}, {opac_other});", "", ""]
-def display_examples_cluster(preds_df, other_ids, num_examples, sort_ascending, worker_id="A"):
-    user_df = preds_df[preds_df.user_id == worker_id].sort_values(by=["item_id"]).reset_index()
-    others_df = preds_df[preds_df.user_id == other_ids[0]]
-    for i in range(1, len(other_ids)):
-        others_df.append(preds_df[preds_df.user_id == other_ids[i]])
-        others_df.groupby(["item_id"]).mean()
-    others_df = others_df.sort_values(by=["item_id"]).reset_index()
-    df = pd.merge(user_df, others_df, on="item_id", how="left", suffixes=('_user', '_other'))
-    df["Comment"] = df["comment_user"]
-    df["Your predicted rating"] = df["pred_user"]
-    df["Other users' predicted rating"] = df["pred_other"]
-    df["Status-quo system rating"] = df["rating_avg_user"]
-    df["Status-quo system std dev"] = df["rating_stddev_user"]
-    df = df[["Comment", "Your predicted rating", "Other users' predicted rating", "Status-quo system rating", "Status-quo system std dev"]]
-    # Add styling
-    df = df.sort_values(by=['Status-quo system std dev'], ascending=sort_ascending)
-    n_to_sample = np.min([num_examples, len(df)])
-    df = df.sample(n=n_to_sample).reset_index(drop=True)
-    return df.style.apply(style_color_difference, axis=1).render()
-def calc_odds_ratio(df, comparison_group, toxic_threshold=1.5, worker_id="A", debug=False, smoothing_factor=1):
-    if comparison_group == "status_quo":
-        other_pred_col = "rating_avg"
-        # Get unique comments, but fetch average labeler rating
-        num_toxic_other = len(df[(df.user_id == "A") & (df[other_pred_col] >= toxic_threshold)]) + smoothing_factor
-        num_nontoxic_other = len(df[(df.user_id == "A") & (df[other_pred_col] < toxic_threshold)]) + smoothing_factor
-    elif comparison_group == "other_users":
-        other_pred_col = "pred"
-        num_toxic_other = len(df[(df.user_id != "A") & (df[other_pred_col] >= toxic_threshold)]) + smoothing_factor
-        num_nontoxic_other = len(df[(df.user_id != "A") & (df[other_pred_col] < toxic_threshold)]) + smoothing_factor
-    num_toxic_user = len(df[(df.user_id == "A") & (df.pred >= toxic_threshold)]) + smoothing_factor
-    num_nontoxic_user = len(df[(df.user_id == "A") & (df.pred < toxic_threshold)]) + smoothing_factor
-    toxic_ratio = num_toxic_user / num_toxic_other
-    nontoxic_ratio = num_nontoxic_user / num_nontoxic_other
-    odds_ratio = toxic_ratio / nontoxic_ratio
-    if debug:
-        print(f"Odds ratio: {odds_ratio}")
-        print(f"num_toxic_user: {num_toxic_user}, num_nontoxic_user: {num_nontoxic_user}")
-        print(f"num_toxic_other: {num_toxic_other}, num_nontoxic_other: {num_nontoxic_other}")
-    contingency_table = [[num_toxic_user, num_nontoxic_user], [num_toxic_other, num_nontoxic_other]]
-    odds_ratio, p_val = stats.fisher_exact(contingency_table, alternative='two-sided')
-    if debug:
-        print(f"Odds ratio: {odds_ratio}, p={p_val}")
-    return odds_ratio
-# Neighbor search
-def get_match(comment_inds, K=20, threshold=None, debug=False):
-    match_ids = []
-    rows = []
-    for i in comment_inds:
-        if debug:
-            print(f"\nComment: {comments[i]}")
-        query_embedding = model.encode(comments[i], convert_to_tensor=True)
-        hits = util.semantic_search(query_embedding, embeddings, score_function=util.cos_sim, top_k=K)
-        # print(hits[0])
-        for hit in hits[0]:
-            c_id = hit['corpus_id']
-            score = np.round(hit['score'], 3)
-            if threshold is None or score > threshold:
-                match_ids.append(c_id)
-                if debug:
-                    print(f"\t(ID={c_id}, Score={score}): {comments[c_id]}")
-                rows.append([c_id, score, comments[c_id]])
-    df = pd.DataFrame(rows, columns=["id", "score", "comment"])
-    return match_ids
-def display_examples_auto_cluster(preds_df, cluster, other_ids, perf_metric, sort_ascending=True, worker_id="A", num_examples=10):
-    # Overall performance
-    topic_df = preds_df
-    topic_df = topic_df[topic_df["topic"] == cluster]
-    user_perf, other_perf = get_overall_perf3(topic_df, perf_metric, other_ids)
-    user_direction = "LOWER" if user_perf < 0 else "HIGHER"
-    other_direction = "LOWER" if other_perf < 0 else "HIGHER"
-    print(f"Your ratings are on average {np.round(abs(user_perf), 3)} {user_direction} than the existing system for this cluster")
-    print(f"Others' ratings (based on {len(other_ids)} users) are on average {np.round(abs(other_perf), 3)} {other_direction} than the existing system for this cluster")
-    # Display example comments
-    df = display_examples_cluster(preds_df, other_ids, num_examples, sort_ascending)
-    return df
-# function to get results for a new provided cluster
-def display_examples_manual_cluster(preds_df, cluster_comments, other_ids, perf_metric, sort_ascending=True, worker_id="A"):
-    # Overall performance
-    cluster_df = preds_df[preds_df["comment"].isin(cluster_comments)]
-    user_perf, other_perf = get_overall_perf3(cluster_df, perf_metric, other_ids)
-    user_direction = "LOWER" if user_perf < 0 else "HIGHER"
-    other_direction = "LOWER" if other_perf < 0 else "HIGHER"
-    print(f"Your ratings are on average {np.round(abs(user_perf), 3)} {user_direction} than the existing system for this cluster")
-    print(f"Others' ratings (based on {len(other_ids)} users) are on average {np.round(abs(other_perf), 3)} {other_direction} than the existing system for this cluster")
-    user_df = preds_df[preds_df.user_id == worker_id].sort_values(by=["item_id"]).reset_index()
-    others_df = preds_df[preds_df.user_id == other_ids[0]]
-    for i in range(1, len(other_ids)):
-        others_df.append(preds_df[preds_df.user_id == other_ids[i]])
-        others_df.groupby(["item_id"]).mean()
-    others_df = others_df.sort_values(by=["item_id"]).reset_index()
-    # Get cluster_comments
-    user_df = user_df[user_df["comment"].isin(cluster_comments)]
-    others_df = others_df[others_df["comment"].isin(cluster_comments)]
-    df = pd.merge(user_df, others_df, on="item_id", how="left", suffixes=('_user', '_other'))
-    df["pred_system"] = df["rating_avg_user"]
-    df["pred_system_stddev"] = df["rating_stddev_user"]
-    df = df[["item_id", "comment_user", "pred_user", "pred_other", "pred_system", "pred_system_stddev"]]
-    # Add styling
-    df = df.sort_values(by=['pred_system_stddev'], ascending=sort_ascending)
-    df = df.style.apply(style_color_difference, axis=1).render()
-    return df
 ########################################
 # GET_LABELING utils
-def create_example_sets(comments_df, n_label_per_bin, score_bins, keyword=None, topic=None):
     # Restrict to the keyword, if provided
-    df = comments_df.copy()
     if keyword != None:
         df = df[df["comment"].str.contains(keyword)]
@@ -651,8 +290,8 @@ def create_example_sets(comments_df, n_label_per_bin, score_bins, keyword=None,
     return ex_to_label
-def get_grp_model_labels(comments_df, n_label_per_bin, score_bins, grp_ids):
-    df = comments_df.copy()
     train_df_grp = train_df[train_df["user_id"].isin(grp_ids)]
     train_df_grp_avg = train_df_grp.groupby(by=["item_id"]).median().reset_index()
@@ -676,106 +315,207 @@ def get_grp_model_labels(comments_df, n_label_per_bin, score_bins, grp_ids):
     return ratings_grp
 ########################################
 # GET_PERSONALIZED_MODEL utils
-def fetch_existing_data(model_name, last_label_i):
     # Check if we have cached model performance
-    perf_dir = f"./data/perf/{model_name}"
-    label_dir = f"./data/labels/{model_name}"
-    if os.path.isdir(os.path.join(module_dir, perf_dir)):
         # Fetch cached results
-        last_i = len([name for name in os.listdir(os.path.join(module_dir, perf_dir)) if os.path.isfile(os.path.join(module_dir, perf_dir, name))])
-        with open(os.path.join(module_dir, perf_dir, f"{last_i}.pkl"), "rb") as f:
             mae, mse, rmse, avg_diff = pickle.load(f)
     else:
-        # Fetch results from trained model
-        with open(os.path.join(module_dir, f"./data/trained_models/{model_name}.pkl"), "rb") as f:
-            cur_model = pickle.load(f)
-            mae, mse, rmse, avg_diff = users_perf(cur_model)
-        # Cache results
-        os.mkdir(os.path.join(module_dir, perf_dir))
-        with open(os.path.join(module_dir, perf_dir, "1.pkl"), "wb") as f:
-            pickle.dump((mae, mse, rmse, avg_diff), f)
     # Fetch previous user-provided labels
     ratings_prev = None
-    if last_label_i > 0:
-        with open(os.path.join(module_dir, label_dir, f"{last_i}.pkl"), "rb") as f:
             ratings_prev = pickle.load(f)
     return mae, mse, rmse, avg_diff, ratings_prev
-def train_updated_model(model_name, last_label_i, ratings, user, top_n=20, topic=None):
     # Check if there is previously-labeled data; if so, combine it with this data
-    perf_dir = f"./data/perf/{model_name}"
-    label_dir = f"./data/labels/{model_name}"
-    labeled_df = format_labeled_data(ratings) # Treat ratings as full batch of all ratings
     ratings_prev = None
     # Filter out rows with "unsure" (-1)
     labeled_df = labeled_df[labeled_df["rating"] != -1]
     # Filter to top N for user study
-    if topic is None:
-        # labeled_df = labeled_df.head(top_n)
-        labeled_df = labeled_df.tail(top_n)
     else:
         # For topic tuning, need to fetch old labels
-        if (last_label_i > 0):
             # Concatenate previous set of labels with this new batch of labels
-            with open(os.path.join(module_dir, label_dir, f"{last_label_i}.pkl"), "rb") as f:
                 ratings_prev = pickle.load(f)
-                labeled_df_prev = format_labeled_data(ratings_prev)
                 labeled_df_prev = labeled_df_prev[labeled_df_prev["rating"] != -1]
                 ratings.update(ratings_prev) # append old ratings to ratings
                 labeled_df = pd.concat([labeled_df_prev, labeled_df])
-    print("len ratings for training:", len(labeled_df))
-    cur_model, perf, _, _ = train_user_model(ratings_df=labeled_df)
-    user_perf_metrics[model_name] = users_perf(cur_model)
-    mae, mse, rmse, avg_diff = user_perf_metrics[model_name]
-    cur_preds_df = get_preds_df(cur_model, ["A"], sys_eval_df=ratings_df_full, topic=topic, model_name=model_name)  # Just get results for user
     # Save this batch of labels
-    with open(os.path.join(module_dir, label_dir, f"{last_label_i + 1}.pkl"), "wb") as f:
         pickle.dump(ratings, f)
-    # Save model results
-    with open(os.path.join(module_dir, f"./data/preds_dfs/{model_name}.pkl"), "wb") as f:
-        pickle.dump(cur_preds_df, f)
-    if model_name not in all_model_names:
-        all_model_names.append(model_name)
-    with open(os.path.join(module_dir, "./data/all_model_names.pkl"), "wb") as f:
-        pickle.dump(all_model_names, f)
-    # Handle user
-    if user not in users_to_models:
-        users_to_models[user] = []  # New user
-    if model_name not in users_to_models[user]:
-        users_to_models[user].append(model_name)  # New model
-        with open(f"./data/users_to_models.pkl", "wb") as f:
-            pickle.dump(users_to_models, f)
-    with open(os.path.join(module_dir, "./data/user_perf_metrics.pkl"), "wb") as f:
-        pickle.dump(user_perf_metrics, f)
-    with open(os.path.join(module_dir, f"./data/trained_models/{model_name}.pkl"), "wb") as f:
-        pickle.dump(cur_model, f)
-    # Cache performance results
-    if not os.path.isdir(os.path.join(module_dir, perf_dir)):
-        os.mkdir(os.path.join(module_dir, perf_dir))
-    last_perf_i = len([name for name in os.listdir(os.path.join(module_dir, perf_dir)) if os.path.isfile(os.path.join(module_dir, perf_dir, name))])
-    with open(os.path.join(module_dir, perf_dir, f"{last_perf_i + 1}.pkl"), "wb") as f:
         pickle.dump((mae, mse, rmse, avg_diff), f)
     ratings_prev = ratings
     return mae, mse, rmse, avg_diff, ratings_prev
-def format_labeled_data(ratings, worker_id="A", debug=False):
     all_rows = []
     for comment, rating in ratings.items():
         comment_id = comments_to_ids[comment]
@@ -785,7 +525,7 @@ def format_labeled_data(ratings, worker_id="A", debug=False):
     df = pd.DataFrame(all_rows, columns=["user_id", "item_id", "rating"])
     return df
-def users_perf(model, sys_eval_df=sys_eval_df, avg_ratings_df=comments_grouped_full_topic_cat, worker_id="A"):
     # Load the full empty dataset
     sys_eval_comment_ids = sys_eval_df.item_id.unique().tolist()
     empty_ratings_rows = [[worker_id, c_id, 0] for c_id in sys_eval_comment_ids]
@@ -801,17 +541,17 @@ def users_perf(model, sys_eval_df=sys_eval_df, avg_ratings_df=comments_grouped_f
     user_item_preds = get_predictions_by_user_and_item(predictions)
     df["pred"] = df.apply(lambda row: user_item_preds[(row.user_id, row.item_id)] if (row.user_id, row.item_id) in user_item_preds else np.nan, axis=1)
-    df = df.merge(avg_ratings_df, on="item_id", how="left", suffixes=('_', '_avg'))
     df.dropna(subset = ["pred"], inplace=True)
-    df["rating_"] = df.rating_.astype("int32")
-    perf_metrics = get_overall_perf(df, "A") # mae, mse, rmse, avg_diff
     return perf_metrics
 def get_overall_perf(preds_df, user_id):
     # Prepare dataset to calculate performance
-    y_pred = preds_df[preds_df["user_id"] == user_id].rating_avg.to_numpy() # Assume system is just average of true labels
-    y_true = preds_df[preds_df["user_id"] == user_id].pred.to_numpy()
     # Get performance for user's model
     mae = mean_absolute_error(y_true, y_pred)
@@ -827,7 +567,11 @@ def get_predictions_by_user_and_item(predictions):
         user_item_preds[(uid, iid)] = est
     return user_item_preds
-def get_preds_df(model, user_ids, orig_df=ratings_df_full, avg_ratings_df=comments_grouped_full_topic_cat, sys_eval_df=sys_eval_df, bins=BINS, topic=None, model_name=None):
     # Prep dataframe for all predictions we'd like to request
     start = time.time()
     sys_eval_comment_ids = sys_eval_df.item_id.unique().tolist()
@@ -836,7 +580,8 @@ def get_preds_df(model, user_ids, orig_df=ratings_df_full, avg_ratings_df=commen
     for user_id in user_ids:
         empty_ratings_rows.extend([[user_id, c_id, 0] for c_id in sys_eval_comment_ids])
     empty_ratings_df = pd.DataFrame(empty_ratings_rows, columns=["user_id", "item_id", "rating"])
-    print("setup", time.time() - start)
     # Evaluate model to get predictions
     start = time.time()
@@ -844,16 +589,17 @@ def get_preds_df(model, user_ids, orig_df=ratings_df_full, avg_ratings_df=commen
     eval_set_data = Dataset.load_from_df(empty_ratings_df, reader)
     _, testset = train_test_split(eval_set_data, test_size=1.)
     predictions = model.test(testset)
-    print("train_test_split", time.time() - start)
     # Update dataframe with predictions
     start = time.time()
     df = empty_ratings_df.copy() # user_id, item_id, rating
     user_item_preds = get_predictions_by_user_and_item(predictions)
     df["pred"] = df.apply(lambda row: user_item_preds[(row.user_id, row.item_id)] if (row.user_id, row.item_id) in user_item_preds else np.nan, axis=1)
-    df = df.merge(avg_ratings_df, on="item_id", how="left", suffixes=('_', '_avg'))
     df.dropna(subset = ["pred"], inplace=True)
-    df["rating_"] = df.rating_.astype("int32")
     # Get binned predictions (based on user prediction)
     df["prediction_bin"], out_bins = pd.cut(df["pred"], bins, labels=False, retbins=True)
@@ -861,9 +607,14 @@ def get_preds_df(model, user_ids, orig_df=ratings_df_full, avg_ratings_df=commen
     return df
 def train_user_model(ratings_df, train_df=train_df, model_eval_df=model_eval_df, train_frac=0.75, model_type="SVD", sim_type=None, user_based=True):
     # Sample from shuffled labeled dataframe and add batch to train set; specified set size to model_eval set
-    labeled = ratings_df.sample(frac=1)
     batch_size = math.floor(len(labeled) * train_frac)
     labeled_train = labeled[:batch_size]
     labeled_model_eval = labeled[batch_size:]
@@ -876,7 +627,11 @@ def train_user_model(ratings_df, train_df=train_df, model_eval_df=model_eval_df,
     return model, perf, labeled_train, labeled_model_eval
-def train_model(train_df, model_eval_df, model_type="SVD", sim_type=None, user_based=True):
     # Train model
     reader = Reader(rating_scale=(0, 4))
     train_data = Dataset.load_from_df(train_df, reader)
@@ -905,58 +660,18 @@ def train_model(train_df, model_eval_df, model_type="SVD", sim_type=None, user_b
     mae = accuracy.mae(predictions)
     mse = accuracy.mse(predictions)
-    print(f"MAE: {mae}, MSE: {mse}, RMSE: {rmse}, FCP: {fcp}")
     perf = [mae, mse, rmse, fcp]
     return algo, perf
-def plot_train_perf_results2(model_name):
-    # Open labels
-    label_dir = f"./data/labels/{model_name}"
-    n_label_files = len([name for name in os.listdir(os.path.join(module_dir, label_dir)) if os.path.isfile(os.path.join(module_dir, label_dir, name))])
-    all_rows = []
-    with open(os.path.join(module_dir, label_dir, f"{n_label_files}.pkl"), "rb") as f:
-        ratings = pickle.load(f)
-        labeled_df = format_labeled_data(ratings)
-        labeled_df = labeled_df[labeled_df["rating"] != -1]
-        # Iterate through batches of 5 labels
-        n_batches = int(np.ceil(len(labeled_df) / 5.))
-        for i in range(n_batches):
-            start = time.time()
-            n_to_sample = np.min([5 * (i + 1), len(labeled_df)])
-            cur_model, _, _, _ = train_user_model(ratings_df=labeled_df.head(n_to_sample))
-            mae, mse, rmse, avg_diff = users_perf(cur_model)
-            all_rows.append([n_to_sample, mae, "MAE"])
-            print(f"iter {i}: {time.time() - start}")
-        print("all_rows", all_rows)
-        df = pd.DataFrame(all_rows, columns=["n_to_sample", "perf", "metric"])
-        chart = alt.Chart(df).mark_line(point=True).encode(
-            x=alt.X("n_to_sample:Q", title="Number of Comments Labeled"),
-            y="perf",
-            color="metric",
-            tooltip=[
-                alt.Tooltip('n_to_sample:Q', title="Number of Comments Labeled"),
-                alt.Tooltip('metric:N', title="Metric"),
-                alt.Tooltip('perf:Q', title="Metric Value", format=".3f"),
-            ],
-        ).properties(
-            title=f"Performance over number of examples: {model_name}",
-            width=500,
-        )
-        return chart
-def plot_train_perf_results(model_name, mae):
-    perf_dir = f"./data/perf/{model_name}"
-    n_perf_files = len([name for name in os.listdir(os.path.join(module_dir, perf_dir)) if os.path.isfile(os.path.join(module_dir, perf_dir, name))])
     all_rows = []
-    for i in range(1, n_perf_files + 1):
-        with open(os.path.join(module_dir, perf_dir, f"{i}.pkl"), "rb") as f:
             mae, mse, rmse, avg_diff = pickle.load(f)
             all_rows.append([i, mae, "Your MAE"])
@@ -975,24 +690,24 @@ def plot_train_perf_results(model_name, mae):
         width=500,
     )
-    PCT_50 = 0.591
-    PCT_75 = 0.662
-    PCT_90 = 0.869
     plot_dim_width = 500
     domain_min = 0.0
-    domain_max = 1.0
     bkgd = alt.Chart(pd.DataFrame({
-        "start": [PCT_90, PCT_75, domain_min],
-        "stop": [domain_max, PCT_90, PCT_75],
-        "bkgd": ["Needs improvement (< top 90%)", "Okay (top 90%)", "Good (top 75%)"],
     })).mark_rect(opacity=0.2).encode(
-        y=alt.Y("start:Q", scale=alt.Scale(domain=[0, domain_max])),
-        y2=alt.Y2("stop:Q"),
         x=alt.value(0),
         x2=alt.value(plot_dim_width),
         color=alt.Color("bkgd:O", scale=alt.Scale(
-            domain=["Needs improvement (< top 90%)", "Okay (top 90%)", "Good (top 75%)"],
             range=["red", "yellow", "green"]),
             title="How good is your MAE?"
         )
@@ -1000,12 +715,12 @@ def plot_train_perf_results(model_name, mae):
     plot = (bkgd + chart).properties(width=plot_dim_width).resolve_scale(color='independent')
     mae_status = None
-    if mae < PCT_75:
-        mae_status = "Your MAE is in the <b>Good</b> range, which means that it's in the top 75% of scores compared to other users. Your model looks good to go."
-    elif mae < PCT_90:
-        mae_status = "Your MAE is in the <b>Okay</b> range, which means that it's in the top 90% of scores compared to other users. Your model can be used, but you can provide additional labels to improve it."
     else:
-        mae_status = "Your MAE is in the <b>Needs improvement</b> range, which means that it's in below the top 95% of scores compared to other users. Your model may need additional labels to improve."
     return plot, mae_status
 ########################################
@@ -1104,14 +819,14 @@ def get_decision(rating, threshold):
 def get_category(row, threshold=0.3):
     k_to_category = {
-        "is_profane_frac_": "Profanity",
-        "is_threat_frac_": "Threat",
-        "is_identity_attack_frac_": "Identity Attack",
-        "is_insult_frac_": "Insult",
-        "is_sexual_harassment_frac_": "Sexual Harassment",
     }
     categories = []
-    for k in ["is_profane_frac_", "is_threat_frac_", "is_identity_attack_frac_", "is_insult_frac_", "is_sexual_harassment_frac_"]:
         if row[k] > threshold:
             categories.append(k_to_category[k])
@@ -1124,19 +839,20 @@ def get_comment_url(row):
     return f"#{row['item_id']}/#comment"
 def get_topic_url(row):
-    return f"#{row['topic_']}/#topic"
-def plot_overall_vis(preds_df, error_type, cur_user, cur_model, n_topics=None, bins=VIS_BINS, threshold=TOXIC_THRESHOLD, bin_step=0.05):
     df = preds_df.copy().reset_index()
     if n_topics is not None:
-        df = df[df["topic_id_"] < n_topics]
     df["vis_pred_bin"], out_bins = pd.cut(df["pred"], bins, labels=VIS_BINS_LABELS, retbins=True)
-    df = df[df["user_id"] == "A"].sort_values(by=["item_id"]).reset_index()
-    df["system_label"] = [("toxic" if r > threshold else "non-toxic") for r in df["rating"].tolist()]
-    df["threshold"] = [threshold for r in df["rating"].tolist()]
-    df["key"] = [get_key(sys, user, threshold) for sys, user in zip(df["rating"].tolist(), df["pred"].tolist())]
     df["url"] = df.apply(lambda row: get_topic_url(row), axis=1)
     # Plot sizing
@@ -1154,12 +870,12 @@ def plot_overall_vis(preds_df, error_type, cur_user, cur_model, n_topics=None, b
     # Main chart
     chart = alt.Chart(df).mark_square(opacity=0.8, size=mark_size, stroke="grey", strokeWidth=0.5).transform_window(
         groupby=['vis_pred_bin'],
-        sort=[{'field': 'rating'}],
         id='row_number()',
         ignorePeers=True,
     ).encode(
         x=alt.X('vis_pred_bin:Q', title="Our prediction of your rating", scale=alt.Scale(domain=(domain_min, domain_max))),
-        y=alt.Y('id:O', title="Comments (ordered by System toxicity rating)", axis=alt.Axis(values=list(range(0, max_items, 5))), sort='descending'),
         color = alt.Color("key:O", scale=alt.Scale(
             domain=["System agrees: Non-toxic", "System agrees: Toxic", "System differs: Error > 1.5", "System differs: Error > 1.0", "System differs: Error > 0.5", "System differs: Error <=0.5"],
             range=["white", "#cbcbcb", "red", "#ff7a5c", "#ffa894", "#ffd1c7"]),
@@ -1167,9 +883,9 @@ def plot_overall_vis(preds_df, error_type, cur_user, cur_model, n_topics=None, b
         ),
         href="url:N",
         tooltip = [
-            alt.Tooltip("topic_:N", title="Topic"),
             alt.Tooltip("system_label:N", title="System label"),
-            alt.Tooltip("rating:Q", title="System rating", format=".2f"),
             alt.Tooltip("pred:Q", title="Your rating", format=".2f")
         ]
     )
@@ -1233,31 +949,17 @@ def plot_overall_vis(preds_df, error_type, cur_user, cur_model, n_topics=None, b
     )
     plot = (bkgd + annotation + chart + rule).properties(height=(plot_dim_height), width=plot_dim_width).resolve_scale(color='independent').to_json()
-    # Save to file
-    chart_dir = "./data/charts"
-    chart_file = os.path.join(chart_dir, f"{cur_user}_{cur_model}.pkl")
-    with open(chart_file, "w") as f:
-        json.dump(plot, f)
     return plot
-def get_cluster_overview_plot(preds_df, error_type, threshold=TOXIC_THRESHOLD, use_model=True):
-    preds_df_mod = preds_df.merge(comments_grouped_full_topic_cat, on="item_id", how="left", suffixes=('_', '_avg'))
-    if use_model:
-        return plot_overall_vis_cluster(preds_df_mod, error_type=error_type, n_comments=500, threshold=threshold)
-    else:
-        return plot_overall_vis_cluster2(preds_df_mod, error_type=error_type, n_comments=500, threshold=threshold)
-def plot_overall_vis_cluster2(preds_df, error_type, n_comments=None, bins=VIS_BINS, threshold=TOXIC_THRESHOLD, bin_step=0.05):
     df = preds_df.copy().reset_index()
-    df["vis_pred_bin"], out_bins = pd.cut(df["rating"], bins, labels=VIS_BINS_LABELS, retbins=True)
-    df = df[df["user_id"] == "A"].sort_values(by=["rating"]).reset_index()
-    df["system_label"] = [("toxic" if r > threshold else "non-toxic") for r in df["rating"].tolist()]
-    df["key"] = [get_key_no_model(sys, threshold) for sys in df["rating"].tolist()]
-    print("len(df)", len(df))  # always 0 for some reason (from keyword search)
     df["category"] = df.apply(lambda row: get_category(row), axis=1)
     df["url"] = df.apply(lambda row: get_comment_url(row), axis=1)
@@ -1279,7 +981,7 @@ def plot_overall_vis_cluster2(preds_df, error_type, n_comments=None, bins=VIS_BI
     # Main chart
     chart = alt.Chart(df).mark_square(opacity=0.8, size=mark_size, stroke="grey", strokeWidth=0.25).transform_window(
         groupby=['vis_pred_bin'],
-        sort=[{'field': 'rating'}],
         id='row_number()',
         ignorePeers=True
     ).encode(
@@ -1293,8 +995,8 @@ def plot_overall_vis_cluster2(preds_df, error_type, n_comments=None, bins=VIS_BI
         ),
         href="url:N",
         tooltip = [
-            alt.Tooltip("comment_:N", title="comment"),
-            alt.Tooltip("rating:Q", title="System rating", format=".2f"),
         ]
     )
@@ -1345,24 +1047,22 @@ def plot_overall_vis_cluster2(preds_df, error_type, n_comments=None, bins=VIS_BI
     final_plot = (bkgd + annotation + chart + rule).properties(height=(plot_dim_height), width=plot_dim_width).resolve_scale(color='independent').to_json()
     return final_plot, df
-def plot_overall_vis_cluster(preds_df, error_type, n_comments=None, bins=VIS_BINS, threshold=TOXIC_THRESHOLD, bin_step=0.05):
-    df = preds_df.copy().reset_index(drop=True)
-    # df = df[df["topic_"] == topic]
     df["vis_pred_bin"], out_bins = pd.cut(df["pred"], bins, labels=VIS_BINS_LABELS, retbins=True)
-    df = df[df["user_id"] == "A"].sort_values(by=["rating"]).reset_index(drop=True)
-    df["system_label"] = [("toxic" if r > threshold else "non-toxic") for r in df["rating"].tolist()]
-    df["key"] = [get_key(sys, user, threshold) for sys, user in zip(df["rating"].tolist(), df["pred"].tolist())]
-    print("len(df)", len(df))  # always 0 for some reason (from keyword search)
-    # print("columns", df.columns)
     df["category"] = df.apply(lambda row: get_category(row), axis=1)
     df["url"] = df.apply(lambda row: get_comment_url(row), axis=1)
     if n_comments is not None:
         n_to_sample = np.min([n_comments, len(df)])
         df = df.sample(n=n_to_sample)
     # Plot sizing
     domain_min = 0
     domain_max = 4
@@ -1377,7 +1077,7 @@ def plot_overall_vis_cluster(preds_df, error_type, n_comments=None, bins=VIS_BIN
     # Main chart
     chart = alt.Chart(df).mark_square(opacity=0.8, size=mark_size, stroke="grey", strokeWidth=0.25).transform_window(
         groupby=['vis_pred_bin'],
-        sort=[{'field': 'rating'}],
         id='row_number()',
         ignorePeers=True
     ).encode(
@@ -1390,8 +1090,8 @@ def plot_overall_vis_cluster(preds_df, error_type, n_comments=None, bins=VIS_BIN
         ),
         href="url:N",
         tooltip = [
-            alt.Tooltip("comment_:N", title="comment"),
-            alt.Tooltip("rating:Q", title="System rating", format=".2f"),
             alt.Tooltip("pred:Q", title="Your rating", format=".2f"),
             alt.Tooltip("category:N", title="Potential toxicity categories")
         ]
@@ -1457,30 +1157,27 @@ def plot_overall_vis_cluster(preds_df, error_type, n_comments=None, bins=VIS_BIN
     return final_plot, df
-def get_cluster_comments(df, error_type, threshold=TOXIC_THRESHOLD, worker_id="A", num_examples=50, use_model=True):
     df["user_color"] = [get_user_color(user, threshold) for user in df["pred"].tolist()]  # get cell colors
-    df["system_color"] = [get_user_color(sys, threshold) for sys in df["rating"].tolist()]  # get cell colors
-    df["error_color"] = [get_system_color(sys, user, threshold) for sys, user in zip(df["rating"].tolist(), df["pred"].tolist())]  # get cell colors
-    df["error_type"] = [get_error_type(sys, user, threshold) for sys, user in zip(df["rating"].tolist(), df["pred"].tolist())]  # get error type in words
-    df["error_amt"] = [abs(sys - threshold) for sys in df["rating"].tolist()]  # get raw error
     df["judgment"] = ["" for _ in range(len(df))]  # template for "agree" or "disagree" buttons
     if use_model:
         df = df.sort_values(by=["error_amt"], ascending=False) # surface largest errors first
     else:
-        print("get_cluster_comments; not using model")
-        df = df.sort_values(by=["rating"], ascending=True)
     df["id"] = df["item_id"]
-    # df["comment"] already exists
-    df["comment"] = df["comment_"]
     df["toxicity_category"] = df["category"]
     df["user_rating"] = df["pred"]
     df["user_decision"] = [get_decision(rating, threshold) for rating in df["pred"].tolist()]
-    df["system_rating"] = df["rating"]
-    df["system_decision"] = [get_decision(rating, threshold) for rating in df["rating"].tolist()]
-    df["error_type"] = df["error_type"]
-    df = df.head(num_examples)
     df = df.round(decimals=2)
     # Filter to specified error type
@@ -1493,7 +1190,7 @@ def get_cluster_comments(df, error_type, threshold=TOXIC_THRESHOLD, worker_id="A
     elif error_type == "Both":
         df = df[(df["error_type"] == "System may be under-sensitive") | (df["error_type"] == "System may be over-sensitive")]
-    return df.to_json(orient="records")
 # PERSONALIZED CLUSTERS utils
 def get_disagreement_comments(preds_df, mode, n=10_000, threshold=TOXIC_THRESHOLD):
@@ -1512,58 +1209,10 @@ def get_disagreement_comments(preds_df, mode, n=10_000, threshold=TOXIC_THRESHOL
     df = df.sort_values(by=["diff"], ascending=asc)
     df = df.head(n)
-    return df["comment_"].tolist(), df
-def get_personal_clusters(model, n=3):
-    personal_cluster_file = f"./data/personal_cluster_dfs/{model}.pkl"
-    if (os.path.isfile(personal_cluster_file)):
-        with open(personal_cluster_file, "rb") as f:
-            cluster_df = pickle.load(f)
-            cluster_df = cluster_df.sort_values(by=["topic_id"])
-            topics_under = cluster_df[cluster_df["error_type"] == "System may be under-sensitive"]["topic"].unique().tolist()
-            topics_under = topics_under[1:(n + 1)]
-            topics_over = cluster_df[cluster_df["error_type"] == "System may be over-sensitive"]["topic"].unique().tolist()
-            topics_over = topics_over[1:(n + 1)]
-            return topics_under, topics_over
-    else:
-        topics_under_top = []
-        topics_over_top = []
-        preds_df_file = f"./data/preds_dfs/{model}.pkl"
-        if (os.path.isfile(preds_df_file)):
-            with open(preds_df_file, "rb") as f:
-                preds_df = pickle.load(f)
-                preds_df_mod = preds_df.merge(comments_grouped_full_topic_cat, on="item_id", how="left", suffixes=('_', '_avg')).reset_index()
-                preds_df_mod = preds_df_mod[preds_df_mod["user_id"] == "A"]
-                comments_under, comments_under_df = get_disagreement_comments(preds_df_mod, mode="under-sensitive", n=1000)
-                if len(comments_under) > 0:
-                    topics_under = BERTopic(embedding_model="paraphrase-MiniLM-L6-v2").fit(comments_under)
-                    topics_under_top = topics_under.get_topic_info().head(n)["Name"].tolist()
-                    print("topics_under", topics_under_top)
-                    # Get topics per comment
-                    topics_assigned, _ = topics_under.transform(comments_under)
-                    comments_under_df["topic_id"] = topics_assigned
-                    cur_topic_ids = topics_under.get_topic_info().Topic
-                    topic_short_names = topics_under.get_topic_info().Name
-                    topic_ids_to_names = {cur_topic_ids[i]: topic_short_names[i] for i in range(len(cur_topic_ids))}
-                    comments_under_df["topic"] = [topic_ids_to_names[topic_id] for topic_id in comments_under_df["topic_id"].tolist()]
-                comments_over, comments_over_df = get_disagreement_comments(preds_df_mod, mode="over-sensitive", n=1000)
-                if len(comments_over) > 0:
-                    topics_over = BERTopic(embedding_model="paraphrase-MiniLM-L6-v2").fit(comments_over)
-                    topics_over_top = topics_over.get_topic_info().head(n)["Name"].tolist()
-                    print("topics_over", topics_over_top)
-                    # Get topics per comment
-                    topics_assigned, _ = topics_over.transform(comments_over)
-                    comments_over_df["topic_id"] = topics_assigned
-                    cur_topic_ids = topics_over.get_topic_info().Topic
-                    topic_short_names = topics_over.get_topic_info().Name
-                    topic_ids_to_names = {cur_topic_ids[i]: topic_short_names[i] for i in range(len(cur_topic_ids))}
-                    comments_over_df["topic"] = [topic_ids_to_names[topic_id] for topic_id in comments_over_df["topic_id"].tolist()]
-                cluster_df = pd.concat([comments_under_df, comments_over_df])
-                with open(f"./data/personal_cluster_dfs/{model}.pkl", "wb") as f:
-                    pickle.dump(cluster_df, f)
-                return topics_under_top, topics_over_top
-    return [], []

 from sentence_transformers import SentenceTransformer, util
 import torch
 from bertopic import BERTopic
+from datetime import date
 ########################################
 # PRE-LOADING
 # Data-loading
 module_dir = "./"
+with open(os.path.join(module_dir, "data/input/ids_to_comments.pkl"), "rb") as f:
     ids_to_comments = pickle.load(f)
+with open(os.path.join(module_dir, "data/input/comments_to_ids.pkl"), "rb") as f:
     comments_to_ids = pickle.load(f)
+system_preds_df = pd.read_pickle("data/input/system_preds_df.pkl")
+sys_eval_df = pd.read_pickle(os.path.join(module_dir, "data/input/split_data/sys_eval_df.pkl"))
+train_df = pd.read_pickle(os.path.join(module_dir, "data/input/split_data/train_df.pkl"))
 train_df_ids = train_df["item_id"].unique().tolist()
+model_eval_df = pd.read_pickle(os.path.join(module_dir, "data/input/split_data/model_eval_df.pkl"))
+ratings_df_full = pd.read_pickle(os.path.join(module_dir, "data/input/ratings_df_full.pkl"))
+worker_info_df = pd.read_pickle("./data/input/worker_info_df.pkl")
+topic_ids = system_preds_df.topic_id
+topics = system_preds_df.topic
 topic_ids_to_topics = {topic_ids[i]: topics[i] for i in range(len(topic_ids))}
 topics_to_topic_ids = {topics[i]: topic_ids[i] for i in range(len(topic_ids))}
+unique_topics_ids = sorted(system_preds_df.topic_id.unique())
 unique_topics = [topic_ids_to_topics[topic_id] for topic_id in range(len(topic_ids_to_topics) - 1)]
 def get_toxic_threshold():
     return TOXIC_THRESHOLD
+def get_user_model_names(user):
+    # Fetch the user's models
+    output_dir = f"./data/output"
+    users = [name for name in os.listdir(output_dir) if os.path.isdir(os.path.join(output_dir, name))]
+    if user not in users:
+        # User does not exist
+        return []
     else:
+        # Fetch trained model names for the user
+        user_dir = f"./data/output/{user}"
+        user_models = [name for name in os.listdir(user_dir) if os.path.isdir(os.path.join(user_dir, name))]
         user_models.sort()
         return user_models
     return unique_topics
 def get_large_clusters(min_n):
+    counts_df = system_preds_df.groupby(by=["topic_id"]).size().reset_index(name='counts')
     counts_df = counts_df[counts_df["counts"] >= min_n]
     return [topic_ids_to_topics[t_id] for t_id in sorted(counts_df["topic_id"].tolist()[1:])]
 }
 internal_to_readable = {v: k for k, v in readable_to_internal.items()}
+########################################
+# Data storage helper functions
+# Set up all directories for new user
+def setup_user_dirs(cur_user):
+    user_dir = f"./data/output/{cur_user}"
+    if not os.path.isdir(user_dir):
+        os.mkdir(user_dir)
+def setup_model_dirs(cur_user, cur_model):
+    model_dir = f"./data/output/{cur_user}/{cur_model}"
+    if not os.path.isdir(model_dir):
+        os.mkdir(model_dir) # Set up model dir
+        # Set up subdirs
+        os.mkdir(os.path.join(model_dir, "labels"))
+        os.mkdir(os.path.join(model_dir, "perf"))
+def setup_user_model_dirs(cur_user, cur_model):
+    setup_user_dirs(cur_user)
+    setup_model_dirs(cur_user, cur_model)
+# Charts
+def get_chart_file(cur_user, cur_model):
+    chart_dir = f"./data/output/{cur_user}/{cur_model}"
+    return os.path.join(chart_dir, f"chart_overall_vis.json")
+# Labels
+def get_label_dir(cur_user, cur_model):
+    return f"./data/output/{cur_user}/{cur_model}/labels"
+def get_n_label_files(cur_user, cur_model):
+    label_dir = get_label_dir(cur_user, cur_model)
+    return len([name for name in os.listdir(label_dir) if os.path.isfile(os.path.join(label_dir, name))])
+def get_label_file(cur_user, cur_model, label_i=None):
+    if label_i is None:
+        # Get index to add on to end of list
+        label_i = get_n_label_files(cur_user, cur_model)
+    label_dir = get_label_dir(cur_user, cur_model)
+    return os.path.join(label_dir, f"{label_i}.pkl")
+# Performance
+def get_perf_dir(cur_user, cur_model):
+    return f"./data/output/{cur_user}/{cur_model}/perf"
+def get_n_perf_files(cur_user, cur_model):
+    perf_dir = get_perf_dir(cur_user, cur_model)
+    return len([name for name in os.listdir(perf_dir) if os.path.isfile(os.path.join(perf_dir, name))])
+def get_perf_file(cur_user, cur_model, perf_i=None):
+    if perf_i is None:
+        # Get index to add on to end of list
+        perf_i = get_n_perf_files(cur_user, cur_model)
+    perf_dir = get_perf_dir(cur_user, cur_model)
+    return os.path.join(perf_dir, f"{perf_i}.pkl")
+# Predictions dataframe
+def get_preds_file(cur_user, cur_model):
+    preds_dir = f"./data/output/{cur_user}/{cur_model}"
+    return os.path.join(preds_dir, f"preds_df.pkl")
+# Reports
+def get_reports_file(cur_user, cur_model):
+    return f"./data/output/{cur_user}/{cur_model}/reports.json"
 ########################################
 # General utils
 ########################################
 # GET_AUDIT utils
 def plot_metric_histogram(metric, user_metric, other_metric_vals, n_bins=10):
     hist, bin_edges = np.histogram(other_metric_vals, bins=n_bins, density=False)
     data = pd.DataFrame({
     return (bar + rule).interactive()
+# Generates the summary plot across all topics for the user
+def show_overall_perf(cur_model, error_type, cur_user, threshold=TOXIC_THRESHOLD, topic_vis_method="median", use_cache=True):
+    # Your perf (calculate using model and testset)
+    preds_file = get_preds_file(cur_user, cur_model)
+    with open(preds_file, "rb") as f:
+        preds_df = pickle.load(f)
+    chart_file = get_chart_file(cur_user, cur_model)
+    if use_cache and os.path.isfile(chart_file):
+        # Read from file if it exists
+        with open(chart_file, "r") as f:
+            topic_overview_plot_json = json.load(f)
     else:
+        # Otherwise, generate chart and save to file
+        if topic_vis_method == "median":  # Default
+            preds_df_grp = preds_df.groupby(["topic", "user_id"]).median()
+        elif topic_vis_method == "mean":
+            preds_df_grp = preds_df.groupby(["topic", "user_id"]).mean()
+        topic_overview_plot_json = plot_overall_vis(preds_df=preds_df_grp, n_topics=200, threshold=threshold, error_type=error_type, cur_user=cur_user, cur_model=cur_model)
+        # Save to file
+        with open(chart_file, "w") as f:
+            json.dump(topic_overview_plot_json, f)
     return {
         "topic_overview_plot_json": json.loads(topic_overview_plot_json),
     }
 ########################################
 # GET_LABELING utils
+def create_example_sets(n_label_per_bin, score_bins, keyword=None, topic=None):
     # Restrict to the keyword, if provided
+    df = system_preds_df.copy()
     if keyword != None:
         df = df[df["comment"].str.contains(keyword)]
     return ex_to_label
+def get_grp_model_labels(n_label_per_bin, score_bins, grp_ids):
+    df = system_preds_df.copy()
     train_df_grp = train_df[train_df["user_id"].isin(grp_ids)]
     train_df_grp_avg = train_df_grp.groupby(by=["item_id"]).median().reset_index()
     return ratings_grp
+########################################
+# SAVE_REPORT utils
+# Convert the SEP field selection from the UI to the SEP enum value
+def get_sep_enum(sep_selection):
+    if sep_selection == "Adversarial Example":
+        return "S0403: Adversarial Example"
+    elif sep_selection == "Accuracy":
+        return "P0204: Accuracy"
+    elif sep_selection == "Bias/Discrimination":
+        return "E0100: Bias/ Discrimination"
+    else:
+        return "P0200: Model issues"
+# Format the description for the report including the provided title, error type, and text entry field ("Summary/Suggestions" text box)
+def format_description(indie_label_json):
+    title = indie_label_json["title"]
+    error_type = indie_label_json["error_type"]
+    text_entry = indie_label_json["text_entry"]
+    return f"Title: {title}\nError Type: {error_type}\nSummary/Suggestions: {text_entry}"
+# Convert indielabel json to AVID json format.
+# See the AVID format in https://avidml.org/avidtools/reference/report
+#
+# Important mappings:
+#   IndieLabel Attribute        AVID Attribute          Example
+#   text_entry                  description             "I think the Perspective API
+#                                                       is too sensitive. Here are some examples."
+#   topic                       feature                 0_shes_woman_lady_face
+#   persp_score                 model_score             0.94
+#   comment                     ori_input               "She looks beautiful"
+#   user_rating                 personal_model_score    0.92
+#   user_decision               user_decision           "Non-toxic"
+# Note that this is at the individual report level.
+def convert_indie_label_json_to_avid_json(indie_label_json, cur_user, email, sep_selection):
+    # Setting up the structure with a dict to enable programmatic additions
+    avid_json_dict = {
+        "data_type": "AVID",
+        "data_version": None,
+        "metadata": None,
+        "affects": {
+            "developer": [],
+            "deployer": [
+              "Hugging Face"
+            ],
+            # TODO: Make artifacts malleable during modularity work
+            "artifacts": [
+              {
+                "type": "Model",
+                "name": "Perspective API"
+              }
+            ]
+        },
+        "problemtype": {
+            "classof": "Undefined", # I don't think any of the other ClassEnums are applicable. Link: https://avidml.org/avidtools/_modules/avidtools/datamodels/enums#ClassEnum
+            "type": "Detection",
+            "description": {
+              "lang": "eng", # TODO: Make language selectable
+              "value": "This report contains results from an end user audit conducted on Hugging Face."
+            }
+          },
+        "metrics": [ # Note: For the end users use case, I made each comment an example.
+          ],
+        "references": [],
+        "description": {
+            "lang": "eng", # TODO: Make language selectable
+            "value": "" # Leaving empty so the report comments can be contained here.
+          },
+          "impact": {
+            "avid": {
+              "risk_domain": [
+                "Ethics"
+              ],
+              "sep_view": [
+                "E0101: Group fairness"
+              ],
+              "lifecycle_view": [
+                "L05: Evaluation"
+              ],
+              "taxonomy_version": "0.2"
+            }
+          },
+          "credit": "", # Leaving empty so that credit can be assigned
+          "reported_date": "" # Leaving empty so that it can be dynamically filled in
+    }
+    avid_json_dict["description"] = format_description(indie_label_json)
+    avid_json_dict["reported_date"] = str(date.today())
+    # Assign credit to email if provided, otherwise default to randomly assigned username
+    if email != "":
+        avid_json_dict["credit"] = email
+    else:
+        avid_json_dict["credit"] = cur_user
+    sep_enum = get_sep_enum(sep_selection)
+    avid_json_dict["impact"]["avid"]["sep_view"] = [sep_enum]
+    for e in indie_label_json["evidence"]:
+        curr_metric = {}
+        curr_metric["name"] = "Perspective API"
+        curr_metric["detection_method"] = {
+            "type": "Detection",
+            "name": "Individual Example from End User Audit"
+        }
+        res_dict = {}
+        res_dict["feature"] = e["topic"]
+        res_dict["model_score"] = str(e["persp_score"]) # Converted to string to avoid Float type error with DB
+        res_dict["ori_input"] = e["comment"]
+        res_dict["personal_model_score"] = str(e["user_rating"]) # See above
+        res_dict["user_decision"] = e["user_decision"]
+        curr_metric["results"] = res_dict
+        avid_json_dict["metrics"].append(curr_metric)
+    new_report = json.dumps(avid_json_dict)
+    return new_report
 ########################################
 # GET_PERSONALIZED_MODEL utils
+def fetch_existing_data(user, model_name):
     # Check if we have cached model performance
+    n_perf_files = get_n_perf_files(user, model_name)
+    if n_perf_files > 0:
         # Fetch cached results
+        perf_file = get_perf_file(user, model_name, n_perf_files - 1)  # Get last performance file
+        with open(perf_file, "rb") as f:
             mae, mse, rmse, avg_diff = pickle.load(f)
     else:
+        raise Exception(f"Model {model_name} does not exist")
     # Fetch previous user-provided labels
     ratings_prev = None
+    n_label_files = get_n_label_files(user, model_name)
+    if n_label_files > 0:
+        label_file = get_label_file(user, model_name, n_label_files - 1) # Get last label file
+        with open(label_file, "rb") as f:
             ratings_prev = pickle.load(f)
     return mae, mse, rmse, avg_diff, ratings_prev
+# Main function called by server's `get_personalized_model` endpoint
+# Trains an updated model with the specified name, user, and ratings
+# Saves ratings, performance metrics, and pre-computed predictions to files
+# - model_name: name of the model to train
+# - ratings: dictionary of comments to ratings
+# - user: user name
+# - top_n: number of comments to train on (used when a set was held out for original user study)
+# - topic: topic to train on (used when tuning for a specific topic)
+def train_updated_model(model_name, ratings, user, top_n=None, topic=None, debug=False):
     # Check if there is previously-labeled data; if so, combine it with this data
+    labeled_df = format_labeled_data(ratings, worker_id=user) # Treat ratings as full batch of all ratings
     ratings_prev = None
     # Filter out rows with "unsure" (-1)
     labeled_df = labeled_df[labeled_df["rating"] != -1]
     # Filter to top N for user study
+    if (topic is None) and (top_n is not None):
+        labeled_df = labeled_df.head(top_n)
     else:
         # For topic tuning, need to fetch old labels
+        n_label_files = get_n_label_files(user, model_name)
+        if n_label_files > 0:
             # Concatenate previous set of labels with this new batch of labels
+            label_file = get_label_file(user, model_name, n_label_files - 1) # Get last label file
+            with open(label_file, "rb") as f:
                 ratings_prev = pickle.load(f)
+                labeled_df_prev = format_labeled_data(ratings_prev, worker_id=user)
                 labeled_df_prev = labeled_df_prev[labeled_df_prev["rating"] != -1]
                 ratings.update(ratings_prev) # append old ratings to ratings
                 labeled_df = pd.concat([labeled_df_prev, labeled_df])
+    if debug:
+        print("len ratings for training:", len(labeled_df))
     # Save this batch of labels
+    label_file = get_label_file(user, model_name)
+    with open(label_file, "wb") as f:
         pickle.dump(ratings, f)
+    # Train model
+    cur_model, _, _, _ = train_user_model(ratings_df=labeled_df)
+    # Compute performance metrics
+    mae, mse, rmse, avg_diff = users_perf(cur_model, worker_id=user)
+    # Save performance metrics
+    perf_file = get_perf_file(user, model_name)
+    with open(perf_file, "wb") as f:
         pickle.dump((mae, mse, rmse, avg_diff), f)
+    # Pre-compute predictions for full dataset
+    cur_preds_df = get_preds_df(cur_model, [user], sys_eval_df=ratings_df_full)
+    # Save pre-computed predictions
+    preds_file = get_preds_file(user, model_name)
+    with open(preds_file, "wb") as f:
+        pickle.dump(cur_preds_df, f)
+    # Replace cached summary plot if it exists
+    show_overall_perf(cur_model=model_name, error_type="Both", cur_user=user, use_cache=False)
     ratings_prev = ratings
     return mae, mse, rmse, avg_diff, ratings_prev
+def format_labeled_data(ratings, worker_id):
     all_rows = []
     for comment, rating in ratings.items():
         comment_id = comments_to_ids[comment]
     df = pd.DataFrame(all_rows, columns=["user_id", "item_id", "rating"])
     return df
+def users_perf(model, worker_id, sys_eval_df=sys_eval_df):
     # Load the full empty dataset
     sys_eval_comment_ids = sys_eval_df.item_id.unique().tolist()
     empty_ratings_rows = [[worker_id, c_id, 0] for c_id in sys_eval_comment_ids]
     user_item_preds = get_predictions_by_user_and_item(predictions)
     df["pred"] = df.apply(lambda row: user_item_preds[(row.user_id, row.item_id)] if (row.user_id, row.item_id) in user_item_preds else np.nan, axis=1)
+    df = df.merge(system_preds_df, on="item_id", how="left", suffixes=('', '_sys'))
     df.dropna(subset = ["pred"], inplace=True)
+    df["rating"] = df.rating.astype("int32")
+    perf_metrics = get_overall_perf(df, worker_id) # mae, mse, rmse, avg_diff
     return perf_metrics
 def get_overall_perf(preds_df, user_id):
     # Prepare dataset to calculate performance
+    y_pred = preds_df[preds_df["user_id"] == user_id].rating_sys.to_numpy() # system's prediction
+    y_true = preds_df[preds_df["user_id"] == user_id].pred.to_numpy() # user's (predicted) ground truth
     # Get performance for user's model
     mae = mean_absolute_error(y_true, y_pred)
         user_item_preds[(uid, iid)] = est
     return user_item_preds
+# Pre-computes predictions for the provided model and specified users on the system-eval dataset
+# - model: trained model
+# - user_ids: list of user IDs to compute predictions for
+# - sys_eval_df: dataframe of system eval labels (pre-computed)
+def get_preds_df(model, user_ids, sys_eval_df=sys_eval_df, bins=BINS, debug=False):
     # Prep dataframe for all predictions we'd like to request
     start = time.time()
     sys_eval_comment_ids = sys_eval_df.item_id.unique().tolist()
     for user_id in user_ids:
         empty_ratings_rows.extend([[user_id, c_id, 0] for c_id in sys_eval_comment_ids])
     empty_ratings_df = pd.DataFrame(empty_ratings_rows, columns=["user_id", "item_id", "rating"])
+    if debug:
+        print("setup", time.time() - start)
     # Evaluate model to get predictions
     start = time.time()
     eval_set_data = Dataset.load_from_df(empty_ratings_df, reader)
     _, testset = train_test_split(eval_set_data, test_size=1.)
     predictions = model.test(testset)
+    if debug:
+        print("train_test_split", time.time() - start)
     # Update dataframe with predictions
     start = time.time()
     df = empty_ratings_df.copy() # user_id, item_id, rating
     user_item_preds = get_predictions_by_user_and_item(predictions)
     df["pred"] = df.apply(lambda row: user_item_preds[(row.user_id, row.item_id)] if (row.user_id, row.item_id) in user_item_preds else np.nan, axis=1)
+    df = df.merge(system_preds_df, on="item_id", how="left", suffixes=('', '_sys'))
     df.dropna(subset = ["pred"], inplace=True)
+    df["rating"] = df.rating.astype("int32")
     # Get binned predictions (based on user prediction)
     df["prediction_bin"], out_bins = pd.cut(df["pred"], bins, labels=False, retbins=True)
     return df
+# Given the full set of ratings, trains the specified model type and evaluates on the model eval set
+# - ratings_df: dataframe of all ratings
+# - train_df: dataframe of training labels
+# - model_eval_df: dataframe of model eval labels (validation set)
+# - train_frac: fraction of ratings to use for training
 def train_user_model(ratings_df, train_df=train_df, model_eval_df=model_eval_df, train_frac=0.75, model_type="SVD", sim_type=None, user_based=True):
     # Sample from shuffled labeled dataframe and add batch to train set; specified set size to model_eval set
+    labeled = ratings_df.sample(frac=1)  # Shuffle the data
     batch_size = math.floor(len(labeled) * train_frac)
     labeled_train = labeled[:batch_size]
     labeled_model_eval = labeled[batch_size:]
     return model, perf, labeled_train, labeled_model_eval
+# Given a set of labels split into training and validation (model_eval), trains the specified model type on the training labels and evaluates on the model_eval labels
+# - train_df: dataframe of training labels
+# - model_eval_df: dataframe of model eval labels (validation set)
+# - model_type: type of model to train
+def train_model(train_df, model_eval_df, model_type="SVD", sim_type=None, user_based=True, debug=False):
     # Train model
     reader = Reader(rating_scale=(0, 4))
     train_data = Dataset.load_from_df(train_df, reader)
     mae = accuracy.mae(predictions)
     mse = accuracy.mse(predictions)
+    if debug:
+        print(f"MAE: {mae}, MSE: {mse}, RMSE: {rmse}, FCP: {fcp}")
     perf = [mae, mse, rmse, fcp]
     return algo, perf
+def plot_train_perf_results(user, model_name, mae):
+    n_perf_files = get_n_perf_files(user, model_name)
     all_rows = []
+    for i in range(n_perf_files):
+        perf_file = get_perf_file(user, model_name, i)
+        with open(perf_file, "rb") as f:
             mae, mse, rmse, avg_diff = pickle.load(f)
             all_rows.append([i, mae, "Your MAE"])
         width=500,
     )
+    # Manually set for now
+    mae_good = 1.0
+    mae_okay = 1.2
     plot_dim_width = 500
     domain_min = 0.0
+    domain_max = 2.0
     bkgd = alt.Chart(pd.DataFrame({
+        "start": [mae_okay, mae_good, domain_min],
+        "stop": [domain_max, mae_okay, mae_good],
+        "bkgd": ["Needs improvement", "Okay", "Good"],
     })).mark_rect(opacity=0.2).encode(
+        y=alt.Y("start:Q", scale=alt.Scale(domain=[0, domain_max]), title=""),
+        y2=alt.Y2("stop:Q", title="Performance (MAE)"),
         x=alt.value(0),
         x2=alt.value(plot_dim_width),
         color=alt.Color("bkgd:O", scale=alt.Scale(
+            domain=["Needs improvement", "Okay", "Good"],
             range=["red", "yellow", "green"]),
             title="How good is your MAE?"
         )
     plot = (bkgd + chart).properties(width=plot_dim_width).resolve_scale(color='independent')
     mae_status = None
+    if mae < mae_good:
+        mae_status = "Your MAE is in the <b>Good</b> range. Your model looks ready to go."
+    elif mae < mae_okay:
+        mae_status = "Your MAE is in the <b>Okay</b> range. Your model can be used, but you can provide additional labels to improve it."
     else:
+        mae_status = "Your MAE is in the <b>Needs improvement</b> range. Your model may need additional labels to improve."
     return plot, mae_status
 ########################################
 def get_category(row, threshold=0.3):
     k_to_category = {
+        "is_profane_frac": "Profanity",
+        "is_threat_frac": "Threat",
+        "is_identity_attack_frac": "Identity Attack",
+        "is_insult_frac": "Insult",
+        "is_sexual_harassment_frac": "Sexual Harassment",
     }
     categories = []
+    for k in ["is_profane_frac", "is_threat_frac", "is_identity_attack_frac", "is_insult_frac", "is_sexual_harassment_frac"]:
         if row[k] > threshold:
             categories.append(k_to_category[k])
     return f"#{row['item_id']}/#comment"
 def get_topic_url(row):
+    return f"#{row['topic']}/#topic"
+# Plots overall results histogram (each block is a topic)
+def plot_overall_vis(preds_df, error_type, cur_user, cur_model, n_topics=None, bins=VIS_BINS, threshold=TOXIC_THRESHOLD, sys_col="rating_sys"):
     df = preds_df.copy().reset_index()
     if n_topics is not None:
+        df = df[df["topic_id"] < n_topics]
     df["vis_pred_bin"], out_bins = pd.cut(df["pred"], bins, labels=VIS_BINS_LABELS, retbins=True)
+    df = df[df["user_id"] == cur_user].sort_values(by=["item_id"]).reset_index()
+    df["system_label"] = [("toxic" if r > threshold else "non-toxic") for r in df[sys_col].tolist()]
+    df["threshold"] = [threshold for r in df[sys_col].tolist()]
+    df["key"] = [get_key(sys, user, threshold) for sys, user in zip(df[sys_col].tolist(), df["pred"].tolist())]
     df["url"] = df.apply(lambda row: get_topic_url(row), axis=1)
     # Plot sizing
     # Main chart
     chart = alt.Chart(df).mark_square(opacity=0.8, size=mark_size, stroke="grey", strokeWidth=0.5).transform_window(
         groupby=['vis_pred_bin'],
+        sort=[{'field': sys_col}],
         id='row_number()',
         ignorePeers=True,
     ).encode(
         x=alt.X('vis_pred_bin:Q', title="Our prediction of your rating", scale=alt.Scale(domain=(domain_min, domain_max))),
+        y=alt.Y('id:O', title="Topics (ordered by System toxicity rating)", axis=alt.Axis(values=list(range(0, max_items, 5))), sort='descending'),
         color = alt.Color("key:O", scale=alt.Scale(
             domain=["System agrees: Non-toxic", "System agrees: Toxic", "System differs: Error > 1.5", "System differs: Error > 1.0", "System differs: Error > 0.5", "System differs: Error <=0.5"],
             range=["white", "#cbcbcb", "red", "#ff7a5c", "#ffa894", "#ffd1c7"]),
         ),
         href="url:N",
         tooltip = [
+            alt.Tooltip("topic:N", title="Topic"),
             alt.Tooltip("system_label:N", title="System label"),
+            alt.Tooltip(f"{sys_col}:Q", title="System rating", format=".2f"),
             alt.Tooltip("pred:Q", title="Your rating", format=".2f")
         ]
     )
     )
     plot = (bkgd + annotation + chart + rule).properties(height=(plot_dim_height), width=plot_dim_width).resolve_scale(color='independent').to_json()
     return plot
+# Plots cluster results histogram (each block is a comment), but *without* a model
+# as a point of reference (in contrast to plot_overall_vis_cluster)
+def plot_overall_vis_cluster_no_model(cur_user, preds_df, n_comments=None, bins=VIS_BINS, threshold=TOXIC_THRESHOLD, sys_col="rating_sys"):
     df = preds_df.copy().reset_index()
+    df["vis_pred_bin"], out_bins = pd.cut(df[sys_col], bins, labels=VIS_BINS_LABELS, retbins=True)
+    df = df[df["user_id"] == cur_user].sort_values(by=[sys_col]).reset_index()
+    df["system_label"] = [("toxic" if r > threshold else "non-toxic") for r in df[sys_col].tolist()]
+    df["key"] = [get_key_no_model(sys, threshold) for sys in df[sys_col].tolist()]
     df["category"] = df.apply(lambda row: get_category(row), axis=1)
     df["url"] = df.apply(lambda row: get_comment_url(row), axis=1)
     # Main chart
     chart = alt.Chart(df).mark_square(opacity=0.8, size=mark_size, stroke="grey", strokeWidth=0.25).transform_window(
         groupby=['vis_pred_bin'],
+        sort=[{'field': sys_col}],
         id='row_number()',
         ignorePeers=True
     ).encode(
         ),
         href="url:N",
         tooltip = [
+            alt.Tooltip("comment:N", title="comment"),
+            alt.Tooltip(f"{sys_col}:Q", title="System rating", format=".2f"),
         ]
     )
     final_plot = (bkgd + annotation + chart + rule).properties(height=(plot_dim_height), width=plot_dim_width).resolve_scale(color='independent').to_json()
     return final_plot, df
+# Plots cluster results histogram (each block is a comment) *with* a model as a point of reference
+def plot_overall_vis_cluster(cur_user, preds_df, error_type, n_comments=None, bins=VIS_BINS, threshold=TOXIC_THRESHOLD, sys_col="rating_sys"):
+    df = preds_df.copy().reset_index()
     df["vis_pred_bin"], out_bins = pd.cut(df["pred"], bins, labels=VIS_BINS_LABELS, retbins=True)
+    df = df[df["user_id"] == cur_user].sort_values(by=[sys_col]).reset_index(drop=True)
+    df["system_label"] = [("toxic" if r > threshold else "non-toxic") for r in df[sys_col].tolist()]
+    df["key"] = [get_key(sys, user, threshold) for sys, user in zip(df[sys_col].tolist(), df["pred"].tolist())]
     df["category"] = df.apply(lambda row: get_category(row), axis=1)
     df["url"] = df.apply(lambda row: get_comment_url(row), axis=1)
     if n_comments is not None:
         n_to_sample = np.min([n_comments, len(df)])
         df = df.sample(n=n_to_sample)
     # Plot sizing
     domain_min = 0
     domain_max = 4
     # Main chart
     chart = alt.Chart(df).mark_square(opacity=0.8, size=mark_size, stroke="grey", strokeWidth=0.25).transform_window(
         groupby=['vis_pred_bin'],
+        sort=[{'field': sys_col}],
         id='row_number()',
         ignorePeers=True
     ).encode(
         ),
         href="url:N",
         tooltip = [
+            alt.Tooltip("comment:N", title="comment"),
+            alt.Tooltip(f"{sys_col}:Q", title="System rating", format=".2f"),
             alt.Tooltip("pred:Q", title="Your rating", format=".2f"),
             alt.Tooltip("category:N", title="Potential toxicity categories")
         ]
     return final_plot, df
+def get_cluster_comments(df, error_type, threshold=TOXIC_THRESHOLD, sys_col="rating_sys", use_model=True, debug=False):
     df["user_color"] = [get_user_color(user, threshold) for user in df["pred"].tolist()]  # get cell colors
+    df["system_color"] = [get_user_color(sys, threshold) for sys in df[sys_col].tolist()]  # get cell colors
+    df["error_color"] = [get_system_color(sys, user, threshold) for sys, user in zip(df[sys_col].tolist(), df["pred"].tolist())]  # get cell colors
+    df["error_type"] = [get_error_type(sys, user, threshold) for sys, user in zip(df[sys_col].tolist(), df["pred"].tolist())]  # get error type in words
+    df["error_amt"] = [abs(sys - threshold) for sys in df[sys_col].tolist()]  # get raw error
     df["judgment"] = ["" for _ in range(len(df))]  # template for "agree" or "disagree" buttons
     if use_model:
         df = df.sort_values(by=["error_amt"], ascending=False) # surface largest errors first
     else:
+        if debug:
+            print("get_cluster_comments; not using model")
+        df = df.sort_values(by=[sys_col], ascending=True)
     df["id"] = df["item_id"]
     df["toxicity_category"] = df["category"]
     df["user_rating"] = df["pred"]
     df["user_decision"] = [get_decision(rating, threshold) for rating in df["pred"].tolist()]
+    df["system_rating"] = df[sys_col]
+    df["system_decision"] = [get_decision(rating, threshold) for rating in df[sys_col].tolist()]
     df = df.round(decimals=2)
     # Filter to specified error type
     elif error_type == "Both":
         df = df[(df["error_type"] == "System may be under-sensitive") | (df["error_type"] == "System may be over-sensitive")]
+    return df
 # PERSONALIZED CLUSTERS utils
 def get_disagreement_comments(preds_df, mode, n=10_000, threshold=TOXIC_THRESHOLD):
     df = df.sort_values(by=["diff"], ascending=asc)
     df = df.head(n)
+    return df["comment"].tolist(), df
+def get_explore_df(n_examples, threshold):
+    df = system_preds_df.sample(n=n_examples)
+    df["system_decision"] = [get_decision(rating, threshold) for rating in df["rating"].tolist()]
+    df["system_color"] = [get_user_color(sys, threshold) for sys in df["rating"].tolist()]  # get cell colors
+    return df

indie_label_svelte/public/global.css CHANGED Viewed

@@ -2,6 +2,7 @@ html, body {
 	position: relative;
 	width: 100%;
 	height: 100%;
 }
 * {
@@ -88,13 +89,13 @@ h3 {
 }
 h5 {
 	color: rgb(80, 80, 80);
-	font-size: 25px;
 }
 h6 {
 	margin-top: 50px;
 	margin-bottom: 20px;
 	text-transform: uppercase;
-	font-size: 18px;
 }
 .head_3 {
@@ -107,7 +108,7 @@ h6 {
 .head_5 {
 	color: rgb(80, 80, 80);
-	font-size: 24px;
 	font-weight: bold;
 	margin-top: 25px;
 	margin-bottom: 25px;
@@ -117,7 +118,7 @@ h6 {
 	color: rgb(80, 80, 80);
 	text-transform: uppercase;
 	font-weight: bold;
-	font-size: 18px;
 	margin-top: 25px;
 	margin-bottom: 25px;
 }
@@ -125,7 +126,7 @@ h6 {
 .head_6_non_cap {
 	color: rgb(80, 80, 80);
 	font-weight: bold;
-	font-size: 18px;
 	margin-top: 25px;
 	margin-bottom: 25px;
 }
@@ -234,6 +235,10 @@ table {
 	overflow-y: scroll;
 }
 .tab_header {
 	position: fixed;
 	background-color: #e3d6fd;
@@ -242,6 +247,11 @@ table {
 	/* border-bottom: 1px solid grey; */
 }
 .label_table {
 	height: 750px;
 	overflow-y: scroll;
@@ -279,6 +289,7 @@ table {
 .audit_section {
 	padding-top: 40px;
 }
 .comment_table_small {

 	position: relative;
 	width: 100%;
 	height: 100%;
+	font-size: 12px;
 }
 * {
 }
 h5 {
 	color: rgb(80, 80, 80);
+	font-size: 20px;
 }
 h6 {
 	margin-top: 50px;
 	margin-bottom: 20px;
 	text-transform: uppercase;
+	font-size: 16px;
 }
 .head_3 {
 .head_5 {
 	color: rgb(80, 80, 80);
+	font-size: 20px;
 	font-weight: bold;
 	margin-top: 25px;
 	margin-bottom: 25px;
 	color: rgb(80, 80, 80);
 	text-transform: uppercase;
 	font-weight: bold;
+	font-size: 16px;
 	margin-top: 25px;
 	margin-bottom: 25px;
 }
 .head_6_non_cap {
 	color: rgb(80, 80, 80);
 	font-weight: bold;
+	font-size: 16px;
 	margin-top: 25px;
 	margin-bottom: 25px;
 }
 	overflow-y: scroll;
 }
+.page_header {
+	height: 64px !important;
+}
 .tab_header {
 	position: fixed;
 	background-color: #e3d6fd;
 	/* border-bottom: 1px solid grey; */
 }
+.tab_header span {
+	font-size: 14px;
+	line-height: normal !important;
+}
 .label_table {
 	height: 750px;
 	overflow-y: scroll;
 .audit_section {
 	padding-top: 40px;
+	width: 100%;
 }
 .comment_table_small {

indie_label_svelte/src/App.svelte CHANGED Viewed

@@ -4,18 +4,10 @@
 	import HypothesisPanel from "./HypothesisPanel.svelte";
 	import MainPanel from "./MainPanel.svelte";
-	import SelectUserDialog from "./SelectUserDialog.svelte";
 	import Explore from "./Explore.svelte";
-	import Results from "./Results.svelte";
-	import StudyLinks from "./StudyLinks.svelte";
-	import { user } from './stores/cur_user_store.js';
-	import { users } from "./stores/all_users_store.js";
 	let personalized_model;
 	let personalized_models = [];
-	// let topic = "";
 	let error_type_options = ['Both', 'System is under-sensitive', 'System is over-sensitive', 'Show errors and non-errors'];
     let error_type = error_type_options[0];
@@ -24,27 +16,6 @@
     let mode = searchParams.get("mode");
 	let cur_user = searchParams.get("user");
-	// Set cur_user if it's provided in URL params
-	if (cur_user !== null) {
-		user.update((value) => cur_user);
-	}
-	// Handle user dialog
-    let user_dialog_open = false;
-    user.subscribe(value => {
-		cur_user = value;
-	});
-	// Handle all users
-	let all_users = [];
-	async function getUsers() {
-		const response = await fetch("./get_users");
-		const text = await response.text();
-		const data = JSON.parse(text);
-		all_users = data["users"];
-		users.update((value) => all_users);
-	}
 	function getAuditSettings() {
 		let req_params = {
             user: cur_user,
@@ -55,27 +26,12 @@
             .then(function (r_orig) {
                 let r = JSON.parse(r_orig);
 				personalized_models = r["personalized_models"];
-				personalized_model = personalized_models[0]; // TEMP
-				console.log("personalized_model", personalized_model);
-				// personalized_model = "model_1632886687_iterA";
-				// let clusters = r["clusters"];
-                // topic = clusters[0]; // TEMP
             });
-		// fetch("./audit_settings")
-		// 	.then((r) => r.text())
-		// 	.then(function (r_orig) {
-		// 		let r = JSON.parse(r_orig);
-		// 		personalized_models = r["personalized_models"];
-		// 		personalized_model = personalized_models[0]; // TEMP
-		// 		// personalized_model = "model_1632886687_iterA";
-		// 		let clusters = r["clusters"];
-        //         topic = clusters[0]; // TEMP
-		// 	});
 	}
 	onMount(async () => {
 		getAuditSettings();
-		getUsers();
 	});
 </script>
@@ -88,22 +44,13 @@
 		<div>
 			<Explore />
 		</div>
-	{:else if mode == "results"}
-	<div>
-		<Results />
-	</div>
-	{:else if mode == "study_links"}
-	<div>
-		<StudyLinks />
-	</div>
 	{:else }
-		<SelectUserDialog bind:open={user_dialog_open} cur_user={cur_user} />
 		<div>
 			{#key personalized_model }
-				<HypothesisPanel model={personalized_model} bind:user_dialog_open={user_dialog_open}/>
 			{/key}
-			<MainPanel bind:model={personalized_model} bind:error_type={error_type} on:change />
 		</div>
 	{/if}
 </main>

 	import HypothesisPanel from "./HypothesisPanel.svelte";
 	import MainPanel from "./MainPanel.svelte";
 	import Explore from "./Explore.svelte";
 	let personalized_model;
 	let personalized_models = [];
 	let error_type_options = ['Both', 'System is under-sensitive', 'System is over-sensitive', 'Show errors and non-errors'];
     let error_type = error_type_options[0];
     let mode = searchParams.get("mode");
 	let cur_user = searchParams.get("user");
 	function getAuditSettings() {
 		let req_params = {
             user: cur_user,
             .then(function (r_orig) {
                 let r = JSON.parse(r_orig);
 				personalized_models = r["personalized_models"];
+				personalized_model = personalized_models[0];
+				cur_user = r["user"];
             });
 	}
 	onMount(async () => {
 		getAuditSettings();
 	});
 </script>
 		<div>
 			<Explore />
 		</div>
 	{:else }
 		<div>
 			{#key personalized_model }
+				<HypothesisPanel model={personalized_model} cur_user={cur_user}/>
 			{/key}
+			<MainPanel bind:model={personalized_model} bind:error_type={error_type} cur_user={cur_user} on:change />
 		</div>
 	{/if}
 </main>

indie_label_svelte/src/AppOld.svelte DELETED Viewed

@@ -1,127 +0,0 @@
-<svelte:head>
-	<title>IndieLabel</title>
-</svelte:head>
-<script lang="ts">
-	import { onMount } from "svelte";
-	import Section from "./Section.svelte";
-	import IterativeClustering from "./IterativeClustering.svelte";
-	import OverallResults from "./OverallResults.svelte";
-	import Labeling from "./Labeling.svelte";
-	import HypothesisPanel from "./HypothesisPanel.svelte"
-	let personalized_model;
-	let personalized_models = [];
-	let breakdown_category;
-	let breakdown_categories = [];
-	let systems = ["Perspective comment toxicity classifier"]; // Only one system for now
-	let clusters = [];
-	let promise = Promise.resolve(null);
-	function getAuditSettings() {
-		fetch("./audit_settings")
-			.then((r) => r.text())
-			.then(function (r_orig) {
-				let r = JSON.parse(r_orig);
-				breakdown_categories = r["breakdown_categories"];
-				breakdown_category = breakdown_categories[0];
-				personalized_models = r["personalized_models"];
-				personalized_model = personalized_models[0];
-				clusters = r["clusters"];
-			});
-	}
-	onMount(async () => {
-		getAuditSettings();
-	});
-	function handleAuditButton() {
-		promise = getAudit();
-	}
-	async function getAudit() {
-		let req_params = {
-			pers_model: personalized_model,
-			breakdown_axis: breakdown_category,
-			perf_metric: "avg_diff",
-			breakdown_sort: "difference",
-			n_topics: 10,
-		};
-		let params = new URLSearchParams(req_params).toString();
-		const response = await fetch("./get_audit?" + params);
-		const text = await response.text();
-		const data = JSON.parse(text);
-		return data;
-	}
-</script>
-<main>
-	<HypothesisPanel model={personalized_model} />
-	<Labeling />
-	<IterativeClustering clusters={clusters} ind={1} personalized_model={personalized_model} />
-	<div id="audit-settings" class="section">
-		<h5>Audit settings</h5>
-		<Section
-			section_id="systems"
-			section_title="What status-quo system would you like to audit?"
-			section_opts={systems}
-			bind:value={systems[0]}
-		/>
-		<Section
-			section_id="personalized_model"
-			section_title="What model would you like to use to represent your views?"
-			section_opts={personalized_models}
-			bind:value={personalized_model}
-		/>
-		<Section
-			section_id="breakdown_category"
-			section_title="How would you like to explore the performance of the system?"
-			section_opts={breakdown_categories}
-			bind:value={breakdown_category}
-		/>
-		<button on:click={handleAuditButton}> Generate results </button>
-		<div>
-			Personalized model: {personalized_model}, Breakdown category: {breakdown_category}
-		</div>
-	</div>
-	{#await promise}
-		<p>...waiting</p>
-	{:then audit_results}
-		{#if audit_results}
-			<OverallResults data={audit_results} clusters={clusters} personalized_model={personalized_model} />
-		{/if}
-	{:catch error}
-		<p style="color: red">{error.message}</p>
-	{/await}
-</main>
-<style>
-	main {
-		text-align: left;
-		padding: 1em;
-		max-width: 240px;
-		margin: 0 0;
-	}
-	h3 {
-		color: rgb(80, 80, 80);
-		font-size: 30px;
-	}
-	h5 {
-		color: rgb(80, 80, 80);
-		font-size: 25px;
-	}
-	h6 {
-		margin-top: 50px;
-		text-transform: uppercase;
-		font-size: 14px;
-	}
-	@media (min-width: 640px) {
-		main {
-			max-width: none;
-		}
-	}
-</style>

indie_label_svelte/src/Auditing.svelte CHANGED Viewed

@@ -7,7 +7,6 @@
     import HelpTooltip from "./HelpTooltip.svelte";
     import TopicTraining from "./TopicTraining.svelte";
-    import { user } from './stores/cur_user_store.js';
     import { error_type } from './stores/error_type_store.js';
     import { topic_chosen } from './stores/cur_topic_store.js';
     import { model_chosen } from './stores/cur_model_store.js';
@@ -17,15 +16,13 @@
     import LayoutGrid, { Cell } from "@smui/layout-grid";
     import Radio from '@smui/radio';
     import FormField from '@smui/form-field';
-    import Card, { Content } from '@smui/card';
     import{ Wrapper } from '@smui/tooltip';
     import IconButton from '@smui/icon-button';
-    import Select, { Option } from "@smui/select";
     import Svelecte from '../node_modules/svelecte/src/Svelecte.svelte';
     export let personalized_model;
-    // export let topic;
     export let cur_error_type = "Both";
     let evidence = [];
     let show_audit_settings = false;
@@ -54,8 +51,6 @@
     ]
     let personalized_models = [];
-    let breakdown_category;
-    let breakdown_categories = [];
     let systems = ["YouSocial comment toxicity classifier"]; // Only one system for now
     let clusters = [];
     let clusters_for_tuning = []
@@ -75,7 +70,6 @@
     let audit_type;
     if (scaffold_method == "fixed" || scaffold_method == "personal" || scaffold_method == "personal_group" || scaffold_method == "personal_test" || scaffold_method == "personal_cluster" || scaffold_method == "topic_train" || scaffold_method == "prompts") {
         audit_type = audit_types[1];
-        // audit_type = audit_types[0];
     } else {
         // No scaffolding mode or tutorial
         audit_type = audit_types[0];
@@ -99,19 +93,8 @@
         use_group_model = true;
     }
-    // TEMP
     let promise_cluster = Promise.resolve(null);
-    // Get current user from store
-    let cur_user;
-    user.subscribe(value => {
-        if (value != cur_user) {
-            cur_user = value;
-            personalized_model = "";
-            getAuditSettings();
-        }
-	});
     // Get current topic from store
     let topic;
     topic_chosen.subscribe(value => {
@@ -126,8 +109,7 @@
         if (!personalized_models.includes(personalized_model)) {
             personalized_models.push(personalized_model);
         }
-        handleClusterButton(); // re-render cluster results
 	});
     // Save current error type
@@ -137,17 +119,13 @@
         handleClusterButton();
 	}
-    // Handle topic-specific training
-    // let topic_training = null;
     async function updateTopicChosen() {
         if (topic != null) {
-            console.log("updateTopicChosen", topic)
             topic_chosen.update((value) => topic);
         }
     }
-    function getAuditSettings() {
         let req_params = {
             user: cur_user,
             scaffold_method: scaffold_method,
@@ -157,8 +135,6 @@
             .then((r) => r.text())
             .then(function (r_orig) {
                 let r = JSON.parse(r_orig);
-                breakdown_categories = r["breakdown_categories"];
-                breakdown_category = breakdown_categories[0];
                 personalized_models = r["personalized_models"];
                 if (use_group_model) {
                     let personalized_model_grp = r["personalized_model_grp"];
@@ -170,26 +146,27 @@
                 model_chosen.update((value) => personalized_model);
                 clusters = r["clusters"];
                 clusters_for_tuning = r["clusters_for_tuning"];
-                console.log("clusters", clusters); // TEMP
                 topic = clusters[0]["options"][0]["text"];
                 topic_chosen.update((value) => topic);
-                handleAuditButton();  // TEMP
-                handleClusterButton(); // TEMP
             });
     }
     onMount(async () => {
-        getAuditSettings();
     });
     function handleAuditButton() {
         model_chosen.update((value) => personalized_model);
-        promise = getAudit();
     }
-    async function getAudit() {
         let req_params = {
-            pers_model: personalized_model,
-            breakdown_axis: breakdown_category,
             perf_metric: "avg_diff",
             breakdown_sort: "difference",
             n_topics: 10,
@@ -205,23 +182,22 @@
     }
     function handleClusterButton() {
-		promise_cluster = getCluster();
 	}
-	async function getCluster() {
-        if (personalized_model == "" || personalized_model == undefined) {
             return null;
         }
 		let req_params = {
 			cluster: topic,
 			topic_df_ids: [],
-			n_examples: 500, // TEMP
-			pers_model: personalized_model,
 			example_sort: "descending", // TEMP
 			comparison_group: "status_quo", // TEMP
 			search_type: "cluster",
 			keyword: "",
-			n_neighbors: 0,
             error_type: cur_error_type,
             use_model: use_model,
             scaffold_method: scaffold_method,
@@ -230,7 +206,6 @@
 		const response = await fetch("./get_cluster_results?" + params);
 		const text = await response.text();
 		const data = JSON.parse(text);
-		console.log(topic);
 		return data;
 	}
 </script>
@@ -240,16 +215,13 @@
     <div>
         <div style="margin-top: 30px">
             <span class="head_3">Auditing</span>
-            <IconButton
-                class="material-icons grey_button"
-                size="normal"
-                on:click={() => (show_audit_settings = !show_audit_settings)}
-            >
-                help_outline
-            </IconButton>
         </div>
         <div style="width: 80%">
             <p>In this section, we'll be auditing the content moderation system. Here, you’ll be aided by a personalized model that will help direct your attention towards potential problem areas in the model’s performance. This model isn’t meant to be perfect, but is designed to help you better focus on areas that need human review.</p>
         </div>
         {#if show_audit_settings}
@@ -299,11 +271,14 @@
                 </LayoutGrid>
             </div>
         </div>
         <p>Current model: {personalized_model}</p>
         {/if}
     </div>
     <!-- 1: All topics overview -->
     {#if audit_type == audit_types[0]}
     <div class="audit_section">
         <div class="head_5">Overview of all topics</div>
@@ -364,7 +339,7 @@
                     </li>
                 </ul>
                 {#key topic}
-                <TopicTraining topic={topic} />
                 {/key}
             </div>
@@ -425,7 +400,7 @@
                             clusters={clusters}
                             model={personalized_model}
                             data={cluster_results}
-                            table_width_pct={90}
                             table_id={"main"}
                             use_model={use_model}
                             bind:evidence={evidence}
@@ -447,7 +422,7 @@
         <p>Next, you can optionally search for more comments to serve as evidence through manual keyword search (for individual words or phrases).</p>
         <div class="section_indent">
             {#key error_type}
-            <KeywordSearch clusters={clusters} personalized_model={personalized_model} bind:evidence={evidence} use_model={use_model} on:change/>
             {/key}
         </div>
     </div>
@@ -457,7 +432,7 @@
         <div class="head_5">Finalize your current report</div>
         <p>Finally, review the report you've generated on the side panel and provide a brief summary of the problem you see. You may also list suggestions or insights into addressing this problem if you have ideas. This report will be directly used by the model developers to address the issue you've raised</p>
     </div>
 </div>
 <style>

     import HelpTooltip from "./HelpTooltip.svelte";
     import TopicTraining from "./TopicTraining.svelte";
     import { error_type } from './stores/error_type_store.js';
     import { topic_chosen } from './stores/cur_topic_store.js';
     import { model_chosen } from './stores/cur_model_store.js';
     import LayoutGrid, { Cell } from "@smui/layout-grid";
     import Radio from '@smui/radio';
     import FormField from '@smui/form-field';
     import{ Wrapper } from '@smui/tooltip';
     import IconButton from '@smui/icon-button';
     import Svelecte from '../node_modules/svelecte/src/Svelecte.svelte';
     export let personalized_model;
     export let cur_error_type = "Both";
+    export let cur_user;
     let evidence = [];
     let show_audit_settings = false;
     ]
     let personalized_models = [];
     let systems = ["YouSocial comment toxicity classifier"]; // Only one system for now
     let clusters = [];
     let clusters_for_tuning = []
     let audit_type;
     if (scaffold_method == "fixed" || scaffold_method == "personal" || scaffold_method == "personal_group" || scaffold_method == "personal_test" || scaffold_method == "personal_cluster" || scaffold_method == "topic_train" || scaffold_method == "prompts") {
         audit_type = audit_types[1];
     } else {
         // No scaffolding mode or tutorial
         audit_type = audit_types[0];
         use_group_model = true;
     }
     let promise_cluster = Promise.resolve(null);
     // Get current topic from store
     let topic;
     topic_chosen.subscribe(value => {
         if (!personalized_models.includes(personalized_model)) {
             personalized_models.push(personalized_model);
         }
+        getAuditResults();
 	});
     // Save current error type
         handleClusterButton();
 	}
     async function updateTopicChosen() {
         if (topic != null) {
             topic_chosen.update((value) => topic);
         }
     }
+    function getAuditResults() {
         let req_params = {
             user: cur_user,
             scaffold_method: scaffold_method,
             .then((r) => r.text())
             .then(function (r_orig) {
                 let r = JSON.parse(r_orig);
                 personalized_models = r["personalized_models"];
                 if (use_group_model) {
                     let personalized_model_grp = r["personalized_model_grp"];
                 model_chosen.update((value) => personalized_model);
                 clusters = r["clusters"];
                 clusters_for_tuning = r["clusters_for_tuning"];
                 topic = clusters[0]["options"][0]["text"];
                 topic_chosen.update((value) => topic);
+                handleAuditButton();
+                handleClusterButton();
             });
     }
     onMount(async () => {
+        getAuditResults();
     });
     function handleAuditButton() {
         model_chosen.update((value) => personalized_model);
+        if (personalized_model == "" || personalized_model == undefined) {
+            return;
+        }
+        promise = getAudit(personalized_model);
     }
+    async function getAudit(pers_model) {
         let req_params = {
+            pers_model: pers_model,
             perf_metric: "avg_diff",
             breakdown_sort: "difference",
             n_topics: 10,
     }
     function handleClusterButton() {
+		promise_cluster = getCluster(personalized_model);
 	}
+	async function getCluster(pers_model) {
+        if (pers_model == "" || pers_model == undefined) {
             return null;
         }
 		let req_params = {
 			cluster: topic,
 			topic_df_ids: [],
+            cur_user: cur_user,
+			pers_model: pers_model,
 			example_sort: "descending", // TEMP
 			comparison_group: "status_quo", // TEMP
 			search_type: "cluster",
 			keyword: "",
             error_type: cur_error_type,
             use_model: use_model,
             scaffold_method: scaffold_method,
 		const response = await fetch("./get_cluster_results?" + params);
 		const text = await response.text();
 		const data = JSON.parse(text);
 		return data;
 	}
 </script>
     <div>
         <div style="margin-top: 30px">
             <span class="head_3">Auditing</span>
         </div>
         <div style="width: 80%">
+            {#if personalized_model}
             <p>In this section, we'll be auditing the content moderation system. Here, you’ll be aided by a personalized model that will help direct your attention towards potential problem areas in the model’s performance. This model isn’t meant to be perfect, but is designed to help you better focus on areas that need human review.</p>
+            {:else}
+            <p>Please first train your personalized model by following the steps in the "Labeling" tab (click the top left tab above).</p>
+            {/if}
         </div>
         {#if show_audit_settings}
                 </LayoutGrid>
             </div>
         </div>
+        {/if}
+        {#if personalized_model}
         <p>Current model: {personalized_model}</p>
         {/if}
     </div>
     <!-- 1: All topics overview -->
+    {#if personalized_model}
     {#if audit_type == audit_types[0]}
     <div class="audit_section">
         <div class="head_5">Overview of all topics</div>
                     </li>
                 </ul>
                 {#key topic}
+                <TopicTraining topic={topic} cur_user={cur_user}/>
                 {/key}
             </div>
                             clusters={clusters}
                             model={personalized_model}
                             data={cluster_results}
+                            table_width_pct={100}
                             table_id={"main"}
                             use_model={use_model}
                             bind:evidence={evidence}
         <p>Next, you can optionally search for more comments to serve as evidence through manual keyword search (for individual words or phrases).</p>
         <div class="section_indent">
             {#key error_type}
+            <KeywordSearch clusters={clusters} personalized_model={personalized_model} cur_user={cur_user} bind:evidence={evidence} use_model={use_model} on:change/>
             {/key}
         </div>
     </div>
         <div class="head_5">Finalize your current report</div>
         <p>Finally, review the report you've generated on the side panel and provide a brief summary of the problem you see. You may also list suggestions or insights into addressing this problem if you have ideas. This report will be directly used by the model developers to address the issue you've raised</p>
     </div>
+    {/if}
 </div>
 <style>

indie_label_svelte/src/ClusterResults.svelte CHANGED Viewed

@@ -35,6 +35,7 @@
     export let evidence;
     export let table_id;
     export let use_model = true;
     let N_COMMENTS = 500;
     let show_num_ratings = false;
@@ -54,12 +55,10 @@
     //your code goes here on location change
         let cur_url = window.location.href;
         let cur_url_elems = cur_url.split("#");
-        // console.log(cur_url_elems)
         if (cur_url_elems.length > 0) {
             let path = cur_url_elems[2];
             if (path == "comment") {
                 let comment_id = cur_url_elems[1].split("/")[0];
-                console.log("comment_id", comment_id)
                 selected_comment_id = parseInt(comment_id);
                 let table_ind = null;
                 for (let i = 0; i < items.length; i++) {
@@ -129,7 +128,6 @@
         items = data["cluster_comments"];
         set_length = items.length;
     }
-    // console.log(set_length);
     let cur_open_evidence;
     open_evidence.subscribe(value => {
@@ -323,8 +321,10 @@
                     <Cell>Potential toxicity<br>categories</Cell>
                     {/if}
                     {/if}
                     <Cell>Do you agree<br>with the system?</Cell>
                     {#if !show_checkboxes}
                     <Cell>Remove</Cell>
@@ -396,7 +396,8 @@
                         </Cell>
                         {/if}
                         {/if}
                         <Cell>
                             <div>
                                 <FormField>
@@ -417,6 +418,7 @@
                                 </FormField>
                             </div>
                         </Cell>
                         {#if !show_checkboxes}
                         <Cell>

     export let evidence;
     export let table_id;
     export let use_model = true;
+    export let show_agree_disagree = false;
     let N_COMMENTS = 500;
     let show_num_ratings = false;
     //your code goes here on location change
         let cur_url = window.location.href;
         let cur_url_elems = cur_url.split("#");
         if (cur_url_elems.length > 0) {
             let path = cur_url_elems[2];
             if (path == "comment") {
                 let comment_id = cur_url_elems[1].split("/")[0];
                 selected_comment_id = parseInt(comment_id);
                 let table_ind = null;
                 for (let i = 0; i < items.length; i++) {
         items = data["cluster_comments"];
         set_length = items.length;
     }
     let cur_open_evidence;
     open_evidence.subscribe(value => {
                     <Cell>Potential toxicity<br>categories</Cell>
                     {/if}
                     {/if}
+                    {#if show_agree_disagree}
                     <Cell>Do you agree<br>with the system?</Cell>
+                    {/if}
                     {#if !show_checkboxes}
                     <Cell>Remove</Cell>
                         </Cell>
                         {/if}
                         {/if}
+                        {#if show_agree_disagree}
                         <Cell>
                             <div>
                                 <FormField>
                                 </FormField>
                             </div>
                         </Cell>
+                        {/if}
                         {#if !show_checkboxes}
                         <Cell>

indie_label_svelte/src/CommentTable.svelte CHANGED Viewed

@@ -4,22 +4,18 @@
     import Button, { Label } from "@smui/button";
     import DataTable, { Head, Body, Row, Cell } from "@smui/data-table";
     import LinearProgress from '@smui/linear-progress';
-    import IconButton from '@smui/icon-button';
-    import { user } from './stores/cur_user_store.js';
     export let mode;
     export let model_name;
     let to_label = {};
     let promise = Promise.resolve(null);
     let n_complete_ratings;
     let n_unsure_ratings;
-    // Get current user
-    let cur_user;
-    user.subscribe(value => {
-		cur_user = value;
-	});
     function getCommentsToLabel(cur_mode, n) {
         if (cur_mode == "train") {
@@ -48,6 +44,7 @@
     }
     function handleTrainModelButton() {
         promise = getModel("train");
     }
@@ -91,11 +88,14 @@
             user: cur_user,
         };
         let params = new URLSearchParams(req_params).toString();
-        const response = await fetch("./get_personalized_model?" + params);
-        const text = await response.text();
-        const data = JSON.parse(text);
-        to_label = data["ratings_prev"];
-        console.log(data);
         return data;
     }
 </script>
@@ -221,12 +221,14 @@
     {/key}
     <div class="spacing_vert_40">
-        <Button on:click={handleTrainModelButton} variant="outlined" disabled={(!n_complete_ratings) || (n_complete_ratings < 40)}>
             <Label>Train Model</Label>
         </Button>
         <Button on:click={getCompleteRatings} variant="outlined">
             <Label>Get Number of Comments Labeled</Label>
         </Button>
         <Button on:click={() => handleLoadCommentsButton(5)} variant="outlined">
             <Label>Fetch More Comments To Label</Label>
         </Button>

     import Button, { Label } from "@smui/button";
     import DataTable, { Head, Body, Row, Cell } from "@smui/data-table";
     import LinearProgress from '@smui/linear-progress';
+    import { model_chosen } from './stores/cur_model_store.js';
     export let mode;
     export let model_name;
+    export let cur_user;
     let to_label = {};
     let promise = Promise.resolve(null);
     let n_complete_ratings;
     let n_unsure_ratings;
+    let show_comments_labeled_count = false;
     function getCommentsToLabel(cur_mode, n) {
         if (cur_mode == "train") {
     }
     function handleTrainModelButton() {
+        getCompleteRatings();
         promise = getModel("train");
     }
             user: cur_user,
         };
         let params = new URLSearchParams(req_params).toString();
+        const data = await fetch("./get_personalized_model?" + params)
+            .then((r) => r.text())
+            .then(function (text) {
+                let data = JSON.parse(text);
+                to_label = data["ratings_prev"];
+                model_chosen.update((value) => model_name);
+                return data;
+            });
         return data;
     }
 </script>
     {/key}
     <div class="spacing_vert_40">
+        <Button on:click={handleTrainModelButton} variant="outlined">
             <Label>Train Model</Label>
         </Button>
+        {#if show_comments_labeled_count}
         <Button on:click={getCompleteRatings} variant="outlined">
             <Label>Get Number of Comments Labeled</Label>
         </Button>
+        {/if}
         <Button on:click={() => handleLoadCommentsButton(5)} variant="outlined">
             <Label>Fetch More Comments To Label</Label>
         </Button>

indie_label_svelte/src/Explore.svelte CHANGED Viewed

@@ -48,7 +48,6 @@
         const text = await response.text();
         const data = JSON.parse(text);
         cur_examples = JSON.parse(data["examples"]);
-        console.log(cur_examples); // TEMP
         return true;
     }
 </script>

         const text = await response.text();
         const data = JSON.parse(text);
         cur_examples = JSON.parse(data["examples"]);
         return true;
     }
 </script>

indie_label_svelte/src/Hunch.svelte CHANGED Viewed

@@ -1,9 +1,7 @@
 <script lang="ts">
     import { onMount } from "svelte";
-	import IterativeClustering from "./IterativeClustering.svelte";
 	import Button, { Label } from "@smui/button";
 	import Textfield from '@smui/textfield';
-	import LinearProgress from "@smui/linear-progress";
     export let ind;
 	export let hunch;
@@ -32,7 +30,6 @@
 <div>
 	<div>
-		<!-- <h6>Hunch {ind + 1}</h6> -->
 		<h6>Topic:</h6>
 		{topic}
 	</div>
@@ -46,13 +43,6 @@
 			label="My current hunch is that..."
 		>
 		</Textfield>
-		<!-- <Button
-			on:click={handleTestOnExamples}
-			class="button_float_right spacing_vert"
-			variant="outlined"
-		>
-			<Label>Test on examples</Label>
-		</Button> -->
 	</div>
 	<div class="spacing_vert">
@@ -63,23 +53,7 @@
             <Label>Submit</Label>
         </Button>
     </div>
-	<!-- {#await example_block}
-        <div class="app_loading">
-            <LinearProgress indeterminate />
-        </div>
-    {:then} -->
-		<!-- {#if example_block}
-			<IterativeClustering clusters={clusters} ind={ind + 1} personalized_model={model} />
-		{/if} -->
-    <!-- {:catch error}
-        <p style="color: red">{error.message}</p>
-    {/await} -->
 </div>
 <style>
-	/* * {
-        z-index: 11;
-        overflow-x: hidden;
-    } */
 </style>

 <script lang="ts">
     import { onMount } from "svelte";
 	import Button, { Label } from "@smui/button";
 	import Textfield from '@smui/textfield';
     export let ind;
 	export let hunch;
 <div>
 	<div>
 		<h6>Topic:</h6>
 		{topic}
 	</div>
 			label="My current hunch is that..."
 		>
 		</Textfield>
 	</div>
 	<div class="spacing_vert">
             <Label>Submit</Label>
         </Button>
     </div>
 </div>
 <style>
 </style>

indie_label_svelte/src/HypothesisPanel.svelte CHANGED Viewed

@@ -1,12 +1,10 @@
 <script lang="ts">
     import { onMount } from "svelte";
     import ClusterResults from "./ClusterResults.svelte";
-    import HelpTooltip from "./HelpTooltip.svelte";
     import Button, { Label } from "@smui/button";
     import Textfield from '@smui/textfield';
-    import { user } from './stores/cur_user_store.js';
-    import { error_type } from './stores/error_type_store.js';
     import { new_evidence } from './stores/new_evidence_store.js';
     import { open_evidence } from './stores/open_evidence_store.js';
     import { topic_chosen } from './stores/cur_topic_store.js';
@@ -24,37 +22,27 @@
     import Checkbox from '@smui/checkbox';
     import FormField from '@smui/form-field';
     import IconButton from "@smui/icon-button";
-    import{ Wrapper } from '@smui/tooltip';
     import Radio from '@smui/radio';
-    import Switch from '@smui/switch';
     export let model;
-    // export let topic;
-    export let user_dialog_open;
     let all_reports = [];
-    let cur_user;
-    user.subscribe(value => {
-		cur_user = value;
-	});
     let cur_topic;
     topic_chosen.subscribe(value => {
 		cur_topic = value;
 	});
     // Handle routing
     let searchParams = new URLSearchParams(window.location.search);
     let scaffold_method = searchParams.get("scaffold");
     let topic_vis_method = searchParams.get("topic_vis_method");
-    // TODO: connect to selected["error_type"] so changes on main panel affect report panel
-    // let cur_error_type;
-    // error_type.subscribe(value => {
-	// 	cur_error_type = value;
-	// });
     // Handle drawer
     let open = false;
     let selected = null;
@@ -69,8 +57,6 @@
         cur_open_evidence = selected["evidence"];
         open_evidence.update((value) => cur_open_evidence);
         let isolated_topic = selected["title"].replace(/^(Topic: )/,'');
-        console.log("selected title", selected["title"]);
-        console.log(selected);
         // Close panel
         open = false;
@@ -151,6 +137,7 @@
             cur_user: cur_user,
             reports: JSON.stringify(all_reports),
             scaffold_method: scaffold_method,
         };
         let params = new URLSearchParams(req_params).toString();
         const response = await fetch("./save_reports?" + params);
@@ -218,328 +205,309 @@
     // Save current error type
     async function updateErrorType() {
         // Update error type on main page to be the selected error type
-		// error_type.update((value) => cur_error_type);
-        // selected["error_type"] = cur_error_type;
         editErrorType = false;
 	}
 </script>
-<div class="hypothesis_panel">
-    <div class="panel_header">
-        <div class="panel_header_content">
-            <div class="page_header">
-                <!-- <span class="page_title">IndieLabel</span> -->
-                <img src="/logo.png" style="height: 60px; padding: 0px 20px;" alt="IndieLabel" />
-                <Button on:click={() => (user_dialog_open = true)} class="user_button" color="secondary" style="margin: 12px 10px;" >
-                    <Label>User: {cur_user}</Label>
-                </Button>
-            </div>
-            <div class="hypotheses_header">
-                <h5 style="float: left; margin: 0; padding: 5px 20px;">Your Audit Reports</h5>
-                <Button
-                    on:click={() => (open = !open)}
-                    color="primary"
-                    style="float: right; padding: 10px; margin-right: 10px;"
-                >
-                    {#if open}
-                    <Label>Close</Label>
-                    {:else}
-                        {#key unfinished_count}
-                        <Label>Unfinished reports ({unfinished_count})</Label>
-                        {/key}
-                    {/if}
-                </Button>
             </div>
         </div>
-    </div>
-    <div class="panel_contents">
-        <!-- Drawer -->
-        {#await promise}
-            <div class="app_loading_fullwidth">
-                <LinearProgress indeterminate />
-            </div>
-        {:then reports}
-            {#if reports}
-            <div class="drawer-container">
-                {#key open}
-                <Drawer variant="dismissible" bind:open>
-                    <Header>
-                        <Title>Your Reports</Title>
-                        <Subtitle>Select a report to view.</Subtitle>
-                    </Header>
-                    <Content>
-                        <List twoLine>
-                            {#each reports as report}
-                                <Item
-                                    href="javascript:void(0)"
-                                    on:click={() => setActive(report)}
-                                    activated={selected === report}
-                                >
-                                    {#if report["complete_status"]}
-                                    <Graphic class="material-icons" aria-hidden="true">task_alt</Graphic>
-                                    {:else}
-                                    <Graphic class="material-icons" aria-hidden="true">radio_button_unchecked</Graphic>
-                                    {/if}
-                                    <Text>
-                                        <PrimaryText>
-                                            {report["title"]}
-                                        </PrimaryText>
-                                        <SecondaryText>
-                                            {report["error_type"]}
-                                        </SecondaryText>
-                                    </Text>
-                                </Item>
-                            {/each}
-                        </List>
-                    </Content>
-                </Drawer>
-                {/key}
-                <AppContent class="app-content">
-                    <main class="main-content">
-                        {#if selected}
-                        <div class="head_6_highlight">
-                            Current Report
-                        </div>
-                        <div class="panel_contents2">
-                            <!-- Title -->
-                            <div class="spacing_vert">
-                                <div class="edit_button_row">
-                                    {#if editTitle}
-                                        <div class="edit_button_row_input">
-                                            <Textfield
-                                                bind:value={selected["title"]}
-                                                label="Your report title"
-                                                input$rows={4}
-                                                textarea
-                                                variant="outlined"
-                                                style="width: 100%;"
-                                                helperLine$style="width: 100%;"
-                                            />
-                                        </div>
-                                        <div>
-                                            <IconButton class="material-icons grey_button" size="button" on:click={() => (editTitle = false)}>
-                                                check
-                                            </IconButton>
-                                        </div>
-                                    {:else}
-                                        {#if selected["title"] != ""}
-                                            <div class="head_5">
-                                                {selected["title"]}
-                                            </div>
                                         {:else}
-                                            <div class="grey_text">Enter a report title</div>
                                         {/if}
-                                        <div>
-                                            <IconButton class="material-icons grey_button" size="button" on:click={() => (editTitle = true)}>
-                                                create
-                                            </IconButton>
-                                        </div>
-                                    {/if}
-                                </div>
                             </div>
-                            <!-- Error type -->
-                            <div class="spacing_vert_40">
-                                <div class="head_6">
-                                    <b>Error Type</b>
                                 </div>
-                                <div class="edit_button_row">
-                                    {#if editErrorType}
-                                        <div>
-                                            {#each error_type_options as e}
-                                                <div style="display: flex; align-items: center;">
-                                                    <!-- <Wrapper rich>
                                                         <FormField>
                                                             <Radio bind:group={selected["error_type"]} value={e.opt} on:change={updateErrorType} color="secondary" />
                                                             <span slot="label">
-                                                                {e.opt}
-                                                                <IconButton class="material-icons" size="button" disabled>help_outline</IconButton>
                                                             </span>
                                                         </FormField>
-                                                        <HelpTooltip text={e.help} />
-                                                    </Wrapper> -->
-                                                    <FormField>
-                                                        <Radio bind:group={selected["error_type"]} value={e.opt} on:change={updateErrorType} color="secondary" />
-                                                        <span slot="label">
-                                                            <b>{e.opt}</b> {e.descr}
-                                                        </span>
-                                                    </FormField>
                                                 </div>
-                                            {/each}
-                                        </div>
-                                        <!-- <div>
-                                            <IconButton class="material-icons grey_button" size="button" on:click={() => (editErrorType = false)}>
-                                                check
-                                            </IconButton>
-                                        </div> -->
-                                    {:else}
-                                        {#if selected["error_type"] != ""}
                                             <div>
-                                                <p>{selected["error_type"]}</p>
                                             </div>
-                                        {:else}
-                                            <div class="grey_text">Select an error type</div>
                                         {/if}
-                                        <div>
-                                            <IconButton class="material-icons grey_button" size="button" on:click={() => (editErrorType = true)}>
-                                                create
-                                            </IconButton>
-                                        </div>
-                                    {/if}
                                 </div>
-                            </div>
-                            <!-- Evidence -->
-                            <div class="spacing_vert_40">
-                                <div class="head_6">
-                                    <b>Evidence</b>
-                                </div>
-                                {#key cur_open_evidence}
-                                <div>
-                                    {#if cur_open_evidence.length > 0}
-                                    <ClusterResults
-                                        cluster={cur_topic}
-                                        model={model}
-                                        data={{"cluster_comments": cur_open_evidence}}
-                                        show_vis={false}
-                                        show_checkboxes={false}
-                                        table_width_pct={100}
-                                        rowsPerPage={25}
-                                        table_id={"panel"}
-                                    />
-                                    {:else}
-                                        <p class="grey_text">
-                                            Add examples from the main panel to see them here!
-                                        </p>
-                                    {/if}
                                 </div>
-                                {/key}
-                            </div>
-                            <div class="spacing_vert_60">
-                                <div class="head_6">
-                                    <b>Summary/Suggestions</b>
                                 </div>
-                                <div class="spacing_vert">
-                                    <Textfield
-                                        style="width: 100%;"
-                                        helperLine$style="width: 100%;"
-                                        input$rows={8}
-                                        textarea
-                                        bind:value={selected["text_entry"]}
-                                        label="My current hunch is that..."
-                                    >
-                                    </Textfield>
-                                </div>
-                            </div>
-                            <div class="spacing_vert_40">
-                                <div class="head_6">
-                                    <b>Mark report as complete?</b>
-                                    <FormField>
-                                        <Checkbox checked={selected["complete_status"]} on:change={handleMarkComplete} />
-                                    </FormField>
                                 </div>
                             </div>
-                        </div>
-                        {/if}
-                    </main>
-                </AppContent>
-            </div>
-            {/if}
-        {:catch error}
-            <p style="color: red">{error.message}</p>
-        {/await}
-    </div>
-    <div class="panel_footer">
-        <div class="panel_footer_contents">
-            <Button
-                on:click={handleNewReport}
-                variant="outlined"
-                color="secondary"
-                style=""
-            >
-                <Label>New</Label>
-            </Button>
-            <Button
-                on:click={handleDeleteReport}
-                variant="outlined"
-                color="secondary"
-                style=""
-            >
-                <Label>Delete</Label>
-            </Button>
-            <Button
-                on:click={handleSaveReport}
-                variant="outlined"
-                color="secondary"
-            >
-                <Label>Save</Label>
-            </Button>
-            <div>
-                <span style="color: grey"><i>Last saved:
-                {#await promise_save}
-                    <CircularProgress style="height: 32px; width: 32px;" indeterminate />
-                {:then result}
-                    {#if result}
-                     {new Date().toLocaleTimeString()}
-                    {:else}
-                     —
-                    {/if}
-                {:catch error}
-                    <p style="color: red">{error.message}</p>
-                {/await}
-                </i></span>
             </div>
         </div>
     </div>
-    <!-- TEMP -->
-    <!-- {#key model}
-        <div>Model: {model}</div>
-    {/key} -->
 </div>
 <style>
-    /* Drawer */
-    /* .drawer-container {
-        position: relative;
-        display: flex;
-        height: 350px;
-        max-width: 600px;
-        border: 1px solid
-        var(--mdc-theme-text-hint-on-background, rgba(0, 0, 0, 0.1));
-        overflow: hidden;
-        z-index: 0;
-    }
-    * :global(.app-content) {
-        flex: auto;
-        overflow: auto;
-        position: relative;
-        flex-grow: 1;
-    }
-    .main-content {
-        overflow: auto;
-        padding: 16px;
-        height: 100%;
-        box-sizing: border-box;
-    } */
     .panel_contents {
         padding: 0 20px;
         overflow-y: auto;
@@ -578,6 +546,13 @@
     :global(.mdc-button.user_button) {
         float: right;
         margin-right: 20px;
     }
     .page_header {

 <script lang="ts">
     import { onMount } from "svelte";
     import ClusterResults from "./ClusterResults.svelte";
+    import SubmitReportDialog from "./SubmitReportDialog.svelte";
     import Button, { Label } from "@smui/button";
     import Textfield from '@smui/textfield';
     import { new_evidence } from './stores/new_evidence_store.js';
     import { open_evidence } from './stores/open_evidence_store.js';
     import { topic_chosen } from './stores/cur_topic_store.js';
     import Checkbox from '@smui/checkbox';
     import FormField from '@smui/form-field';
     import IconButton from "@smui/icon-button";
     import Radio from '@smui/radio';
     export let model;
+    export let cur_user;
     let all_reports = [];
     let cur_topic;
     topic_chosen.subscribe(value => {
 		cur_topic = value;
 	});
+    // Handle submit report dialog
+	let submit_dialog_open = false;
     // Handle routing
     let searchParams = new URLSearchParams(window.location.search);
     let scaffold_method = searchParams.get("scaffold");
+    if (scaffold_method == null) {
+        scaffold_method = "personal"; // Default to personalized model scaffold
+    }
     let topic_vis_method = searchParams.get("topic_vis_method");
     // Handle drawer
     let open = false;
     let selected = null;
         cur_open_evidence = selected["evidence"];
         open_evidence.update((value) => cur_open_evidence);
         let isolated_topic = selected["title"].replace(/^(Topic: )/,'');
         // Close panel
         open = false;
             cur_user: cur_user,
             reports: JSON.stringify(all_reports),
             scaffold_method: scaffold_method,
+            model: model,
         };
         let params = new URLSearchParams(req_params).toString();
         const response = await fetch("./save_reports?" + params);
     // Save current error type
     async function updateErrorType() {
         // Update error type on main page to be the selected error type
         editErrorType = false;
 	}
+    let promise_submit = Promise.resolve(null);
+    function handleSubmitReport() {
+        promise_submit = submitReport();
+    }
+    async function submitReport() {
+        submit_dialog_open = true;
+        return true;
+    }
 </script>
+<div>
+    {#await promise_submit}
+        <CircularProgress style="height: 32px; width: 32px;" indeterminate />
+    {:then}
+        <SubmitReportDialog bind:open={submit_dialog_open} cur_user={cur_user} all_reports={all_reports}/>
+    {:catch error}
+        <p style="color: red">{error.message}</p>
+    {/await}
+    <div class="hypothesis_panel">
+        <div class="panel_header">
+            <div class="panel_header_content">
+                <div class="page_header">
+                    <img src="/logo.png" style="height: 50px; padding: 0px 20px;" alt="IndieLabel" />
+                    <Button class="user_button" color="secondary" style="margin: 12px 10px;" >
+                        <Label>User: {cur_user}</Label>
+                    </Button>
+                </div>
+                <div class="hypotheses_header">
+                    <h5 style="float: left; margin: 0; padding: 5px 20px;">Your Audit Reports</h5>
+                    <Button
+                        on:click={() => (open = !open)}
+                        color="primary"
+                        disabled={model == null}
+                        style="float: right; padding: 10px; margin-right: 10px;"
+                    >
+                        {#if open}
+                        <Label>Close</Label>
+                        {:else}
+                            {#key unfinished_count}
+                            <Label>Unfinished reports ({unfinished_count})</Label>
+                            {/key}
+                        {/if}
+                    </Button>
+                </div>
             </div>
         </div>
+        {#if model == null}
+        <div class="panel_contents">
+            <p>You can start to author audit reports in this panel after you've trained your personalized model in the "Labeling" tab.</p>
+        </div>
+        {:else}
+        <div class="panel_contents">
+            <!-- Drawer -->
+            {#await promise}
+                <div class="app_loading_fullwidth">
+                    <LinearProgress indeterminate />
+                </div>
+            {:then reports}
+                {#if reports}
+                <div class="drawer-container">
+                    {#key open}
+                    <Drawer variant="dismissible" bind:open>
+                        <Header>
+                            <Title>Your Reports</Title>
+                            <Subtitle>Select a report to view.</Subtitle>
+                        </Header>
+                        <Content>
+                            <List twoLine>
+                                {#each reports as report}
+                                    <Item
+                                        href="javascript:void(0)"
+                                        on:click={() => setActive(report)}
+                                        activated={selected === report}
+                                    >
+                                        {#if report["complete_status"]}
+                                        <Graphic class="material-icons" aria-hidden="true">task_alt</Graphic>
                                         {:else}
+                                        <Graphic class="material-icons" aria-hidden="true">radio_button_unchecked</Graphic>
                                         {/if}
+                                        <Text>
+                                            <PrimaryText>
+                                                {report["title"]}
+                                            </PrimaryText>
+                                            <SecondaryText>
+                                                {report["error_type"]}
+                                            </SecondaryText>
+                                        </Text>
+                                    </Item>
+                                {/each}
+                            </List>
+                        </Content>
+                    </Drawer>
+                    {/key}
+                    <AppContent class="app-content">
+                        <main class="main-content">
+                            {#if selected}
+                            <div class="head_6_highlight">
+                                Current Report
                             </div>
+                            <div class="panel_contents2">
+                                <!-- Title -->
+                                <div class="spacing_vert">
+                                    <div class="edit_button_row">
+                                        {#if editTitle}
+                                            <div class="edit_button_row_input">
+                                                <Textfield
+                                                    bind:value={selected["title"]}
+                                                    label="Your report title"
+                                                    input$rows={4}
+                                                    textarea
+                                                    variant="outlined"
+                                                    style="width: 100%;"
+                                                    helperLine$style="width: 100%;"
+                                                />
+                                            </div>
+                                            <div>
+                                                <IconButton class="material-icons grey_button" size="button" on:click={() => (editTitle = false)}>
+                                                    check
+                                                </IconButton>
+                                            </div>
+                                        {:else}
+                                            {#if selected["title"] != ""}
+                                                <div class="head_5">
+                                                    {selected["title"]}
+                                                </div>
+                                            {:else}
+                                                <div class="grey_text">Enter a report title</div>
+                                            {/if}
+                                            <div>
+                                                <IconButton class="material-icons grey_button" size="button" on:click={() => (editTitle = true)}>
+                                                    create
+                                                </IconButton>
+                                            </div>
+                                        {/if}
+                                    </div>
                                 </div>
+                                <!-- Error type -->
+                                <div class="spacing_vert_40">
+                                    <div class="head_6">
+                                        <b>Error Type</b>
+                                    </div>
+                                    <div class="edit_button_row">
+                                        {#if editErrorType}
+                                            <div>
+                                                {#each error_type_options as e}
+                                                    <div style="display: flex; align-items: center;">
                                                         <FormField>
                                                             <Radio bind:group={selected["error_type"]} value={e.opt} on:change={updateErrorType} color="secondary" />
                                                             <span slot="label">
+                                                                <b>{e.opt}</b> {e.descr}
                                                             </span>
                                                         </FormField>
+                                                    </div>
+                                                {/each}
+                                            </div>
+                                        {:else}
+                                            {#if selected["error_type"] != ""}
+                                                <div>
+                                                    <p>{selected["error_type"]}</p>
                                                 </div>
+                                            {:else}
+                                                <div class="grey_text">Select an error type</div>
+                                            {/if}
                                             <div>
+                                                <IconButton class="material-icons grey_button" size="button" on:click={() => (editErrorType = true)}>
+                                                    create
+                                                </IconButton>
                                             </div>
                                         {/if}
+                                    </div>
                                 </div>
+                                <!-- Evidence -->
+                                <div class="spacing_vert_40">
+                                    <div class="head_6">
+                                        <b>Evidence</b>
+                                    </div>
+                                    {#key cur_open_evidence}
+                                    <div>
+                                        {#if cur_open_evidence.length > 0}
+                                        <ClusterResults
+                                            cluster={cur_topic}
+                                            model={model}
+                                            data={{"cluster_comments": cur_open_evidence}}
+                                            show_vis={false}
+                                            show_checkboxes={false}
+                                            table_width_pct={100}
+                                            rowsPerPage={25}
+                                            table_id={"panel"}
+                                        />
+                                        {:else}
+                                            <p class="grey_text">
+                                                Add examples from the main panel to see them here!
+                                            </p>
+                                        {/if}
+                                    </div>
+                                    {/key}
                                 </div>
+                                <div class="spacing_vert_60">
+                                    <div class="head_6">
+                                        <b>Summary/Suggestions</b>
+                                    </div>
+                                    <div class="spacing_vert">
+                                        <Textfield
+                                            style="width: 100%;"
+                                            helperLine$style="width: 100%;"
+                                            input$rows={8}
+                                            textarea
+                                            bind:value={selected["text_entry"]}
+                                            label="My current hunch is that..."
+                                        >
+                                        </Textfield>
+                                    </div>
                                 </div>
+                                <div class="spacing_vert_40">
+                                    <div class="head_6">
+                                        <b>Mark report as complete?</b>
+                                        <FormField>
+                                            <Checkbox checked={selected["complete_status"]} on:change={handleMarkComplete} />
+                                        </FormField>
+                                    </div>
                                 </div>
                             </div>
+                            {/if}
+                        </main>
+                    </AppContent>
+                </div>
+                {/if}
+            {:catch error}
+                <p style="color: red">{error.message}</p>
+            {/await}
+        </div>
+        <div class="panel_footer">
+            <div class="panel_footer_contents">
+                <Button
+                    on:click={handleNewReport}
+                    variant="outlined"
+                    color="secondary"
+                    style=""
+                >
+                    <Label>New</Label>
+                </Button>
+                <!-- <Button
+                    on:click={handleDeleteReport}
+                    variant="outlined"
+                    color="secondary"
+                    style=""
+                >
+                    <Label>Delete</Label>
+                </Button> -->
+                <Button
+                    on:click={handleSaveReport}
+                    variant="outlined"
+                    color="secondary"
+                >
+                    <Label>Save</Label>
+                </Button>
+                <Button
+                    on:click={handleSubmitReport}
+                    variant="outlined"
+                    color="secondary"
+                >
+                    <Label>Send Reports</Label>
+                </Button>
+                <div>
+                    <span style="color: grey"><i>Last saved:
+                    {#await promise_save}
+                        <CircularProgress style="height: 32px; width: 32px;" indeterminate />
+                    {:then result}
+                        {#if result}
+                        {new Date().toLocaleTimeString()}
+                        {:else}
+                        —
+                        {/if}
+                    {:catch error}
+                        <p style="color: red">{error.message}</p>
+                    {/await}
+                    </i></span>
+                </div>
             </div>
         </div>
+        {/if}
     </div>
 </div>
 <style>
     .panel_contents {
         padding: 0 20px;
         overflow-y: auto;
     :global(.mdc-button.user_button) {
         float: right;
         margin-right: 20px;
+        max-width: 200px;
+    }
+    :global(.mdc-button.user_button span) {
+        text-overflow: ellipsis;
+        white-space: nowrap;
+        overflow: hidden;
     }
     .page_header {

indie_label_svelte/src/IterativeClustering.svelte DELETED Viewed

@@ -1,164 +0,0 @@
-<script>
-    import Section from "./Section.svelte";
-    import ClusterResults from "./ClusterResults.svelte";
-    import Button, { Label } from "@smui/button";
-    import Textfield from "@smui/textfield";
-    import LayoutGrid, { Cell } from "@smui/layout-grid";
-    import LinearProgress from "@smui/linear-progress";
-    import Chip, { Set, Text } from '@smui/chips';
-    export let clusters;
-    export let personalized_model;
-    export let evidence;
-    export let width_pct = 80;
-    let topic_df_ids = [];
-    let promise_iter_cluster = Promise.resolve(null);
-    let keyword = null;
-    let n_neighbors = null;
-    let cur_iter_cluster = null;
-    let history = [];
-    async function getIterCluster(search_type) {
-        let req_params = {
-            cluster: cur_iter_cluster,
-            topic_df_ids: topic_df_ids,
-            n_examples: 500, // TEMP
-            pers_model: personalized_model,
-            example_sort: "descending", // TEMP
-            comparison_group: "status_quo", // TEMP
-            search_type: search_type,
-            keyword: keyword,
-            n_neighbors: n_neighbors,
-        };
-        console.log("topic_df_ids", topic_df_ids);
-        let params = new URLSearchParams(req_params).toString();
-        const response = await fetch("./get_cluster_results?" + params);
-        const text = await response.text();
-        const data = JSON.parse(text);
-        // if (data["cluster_comments"] == null) {
-        //     return false
-        // }
-        topic_df_ids = data["topic_df_ids"];
-        return data;
-    }
-    function findCluster() {
-        promise_iter_cluster = getIterCluster("cluster");
-        history = history.concat("bulk-add cluster: " + cur_iter_cluster);
-    }
-    function findNeighbors() {
-        promise_iter_cluster = getIterCluster("neighbors");
-        history = history.concat("find " + n_neighbors + " neighbors");
-    }
-    function findKeywords() {
-        promise_iter_cluster = getIterCluster("keyword");
-        history = history.concat("keyword search: " + keyword);
-    }
-</script>
-<div>
-    <div>
-        <!-- <h6>Hunch {ind} examples</h6> -->
-        <div>
-            <h6>Search Settings</h6>
-            <!-- Start with cluster -->
-            <!-- <div class="">
-                <Section
-                    section_id="iter_cluster"
-                    section_title="Bulk-add cluster"
-                    section_opts={clusters}
-                    bind:value={cur_iter_cluster}
-                    width_pct={100}
-                />
-                <Button
-                    on:click={findCluster}
-                    variant="outlined"
-                    class="button_float_right"
-                    disabled={cur_iter_cluster == null}
-                >
-                    <Label>Search</Label>
-                </Button>
-            </div> -->
-            <!-- Manual keyword -->
-            <div class="spacing_vert">
-                <Textfield
-                    bind:value={keyword}
-                    label="Keyword search"
-                    variant="outlined"
-                    style="width: {width_pct}%"
-                />
-                <Button
-                    on:click={findKeywords}
-                    variant="outlined"
-                    class="button_float_right spacing_vert"
-                    disabled={keyword == null}
-                >
-                    <Label>Search</Label>
-                </Button>
-            </div>
-            <!-- Find neighbors of current set -->
-            <div class="spacing_vert">
-                <Textfield
-                    bind:value={n_neighbors}
-                    label="Number of neighbors to retrieve"
-                    type="number"
-                    min="1"
-                    max="50"
-                    variant="outlined"
-                    style="width: {width_pct}%"
-                />
-                <Button
-                    on:click={findNeighbors}
-                    variant="outlined"
-                    class="button_float_right spacing_vert"
-                    disabled={n_neighbors == null}
-                >
-                    <Label>Search</Label>
-                </Button>
-            </div>
-        </div>
-    </div>
-    {#await promise_iter_cluster}
-        <div class="app_loading" style="width: {width_pct}%">
-            <LinearProgress indeterminate />
-        </div>
-    {:then iter_cluster_results}
-        {#if iter_cluster_results}
-            {#if history.length > 0}
-                <div class="bold" style="padding-top:40px;">Search History</div>
-                <Set chips={history} let:chip choice>
-                    <Chip {chip}>
-                        <Text>{chip}</Text>
-                    </Chip>
-                </Set>
-            {/if}
-            {#if iter_cluster_results.cluster_comments != null}
-                <ClusterResults
-                    cluster={""}
-                    clusters={clusters}
-                    model={personalized_model}
-                    data={iter_cluster_results}
-                    show_vis={false}
-                    table_width_pct={80}
-                    bind:evidence={evidence}
-                    on:change
-                />
-            {:else}
-                <div class="bold" style="padding-top:40px;">
-                    No results found
-                </div>
-            {/if}
-        {/if}
-    {:catch error}
-        <p style="color: red">{error.message}</p>
-    {/await}
-</div>
-<style>
-</style>

indie_label_svelte/src/KeywordSearch.svelte CHANGED Viewed

@@ -4,12 +4,11 @@
     import Button, { Label } from "@smui/button";
     import Textfield from "@smui/textfield";
-    import LinearProgress from "@smui/linear-progress";
-    import Chip, { Set, Text } from '@smui/chips';
     export let clusters;
     export let personalized_model;
     export let evidence;
     export let width_pct = 80;
     export let use_model = true;
@@ -17,7 +16,6 @@
     let topic_df_ids = [];
     let promise_iter_cluster = Promise.resolve(null);
     let keyword = null;
-    let n_neighbors = null;
     let cur_iter_cluster = null;
     let history = [];
@@ -30,23 +28,18 @@
         let req_params = {
             cluster: cur_iter_cluster,
             topic_df_ids: topic_df_ids,
-            n_examples: 500, // TEMP
             pers_model: personalized_model,
             example_sort: "descending", // TEMP
             comparison_group: "status_quo", // TEMP
             search_type: search_type,
             keyword: keyword,
-            n_neighbors: n_neighbors,
             error_type: cur_error_type,
         };
-        console.log("topic_df_ids", topic_df_ids);
         let params = new URLSearchParams(req_params).toString();
         const response = await fetch("./get_cluster_results?" + params);
         const text = await response.text();
         const data = JSON.parse(text);
-        // if (data["cluster_comments"] == null) {
-        //     return false
-        // }
         topic_df_ids = data["topic_df_ids"];
         return data;
     }

     import Button, { Label } from "@smui/button";
     import Textfield from "@smui/textfield";
+    import LinearProgress from "@smui/linear-progress";
     export let clusters;
     export let personalized_model;
+    export let cur_user;
     export let evidence;
     export let width_pct = 80;
     export let use_model = true;
     let topic_df_ids = [];
     let promise_iter_cluster = Promise.resolve(null);
     let keyword = null;
     let cur_iter_cluster = null;
     let history = [];
         let req_params = {
             cluster: cur_iter_cluster,
             topic_df_ids: topic_df_ids,
+            cur_user: cur_user,
             pers_model: personalized_model,
             example_sort: "descending", // TEMP
             comparison_group: "status_quo", // TEMP
             search_type: search_type,
             keyword: keyword,
             error_type: cur_error_type,
         };
         let params = new URLSearchParams(req_params).toString();
         const response = await fetch("./get_cluster_results?" + params);
         const text = await response.text();
         const data = JSON.parse(text);
         topic_df_ids = data["topic_df_ids"];
         return data;
     }

indie_label_svelte/src/Labeling.svelte CHANGED Viewed

@@ -8,7 +8,8 @@
     import Button, { Label } from "@smui/button";
     import LinearProgress from '@smui/linear-progress';
     import Svelecte from '../node_modules/svelecte/src/Svelecte.svelte';
-    import { user } from './stores/cur_user_store.js';
     let model_name = "";
     let personalized_models = [];
@@ -16,24 +17,13 @@
     let label_modes = [
         "Create a new model",
         "Edit an existing model",
-        "Tune your model for a topic area",
-        "Set up a group-based model",
     ];
     let clusters_for_tuning = [];
     let topic;
-    // Get current user
-    let cur_user;
-    user.subscribe(value => {
-        if (value != cur_user) {
-            cur_user = value;
-            personalized_models = [];
-            getLabeling();
-        }
-		cur_user = value;
-	});
     // Handle routing
     let label_mode = label_modes[0];
 	let searchParams = new URLSearchParams(window.location.search);
@@ -43,8 +33,10 @@
     } else if (req_label_mode == 1) {
         label_mode = label_modes[1];
     } else if (req_label_mode == 2) {
         label_mode = label_modes[2];
     } else if (req_label_mode == 3) {
         label_mode = label_modes[3];
     }
@@ -101,7 +93,6 @@
         const response = await fetch("./get_group_model?" + params);
         const text = await response.text();
         const data = JSON.parse(text);
-        console.log("getGroupModel", data);
         return data
     }
@@ -172,7 +163,7 @@
                 </li>
             </ul>
-            <CommentTable mode={"train"} model_name={model_name}/>
         </div>
     {:else if label_mode == label_modes[1]}
         <!-- EXISTING MODEL -->
@@ -202,7 +193,7 @@
                 </li>
             </ul>
             {#key existing_model_name}
-                <CommentTable mode={"view"} model_name={existing_model_name}/>
             {/key}
         </div>
     {:else if label_mode == label_modes[2]}
@@ -239,7 +230,7 @@
                         </li>
                     </ul>
                     {#key topic}
-                    <TopicTraining topic={topic} model_name={model_name} />
                     {/key}
                 </div>

     import Button, { Label } from "@smui/button";
     import LinearProgress from '@smui/linear-progress';
     import Svelecte from '../node_modules/svelecte/src/Svelecte.svelte';
+    export let cur_user;
     let model_name = "";
     let personalized_models = [];
     let label_modes = [
         "Create a new model",
         "Edit an existing model",
+        // "Tune your model for a topic area",
+        // "Set up a group-based model",
     ];
     let clusters_for_tuning = [];
     let topic;
     // Handle routing
     let label_mode = label_modes[0];
 	let searchParams = new URLSearchParams(window.location.search);
     } else if (req_label_mode == 1) {
         label_mode = label_modes[1];
     } else if (req_label_mode == 2) {
+        // Unused; previous topic-based mode
         label_mode = label_modes[2];
     } else if (req_label_mode == 3) {
+        // Unused; previous group-based mode
         label_mode = label_modes[3];
     }
         const response = await fetch("./get_group_model?" + params);
         const text = await response.text();
         const data = JSON.parse(text);
         return data
     }
                 </li>
             </ul>
+            <CommentTable mode={"train"} model_name={model_name} cur_user={cur_user}/>
         </div>
     {:else if label_mode == label_modes[1]}
         <!-- EXISTING MODEL -->
                 </li>
             </ul>
             {#key existing_model_name}
+                <CommentTable mode={"view"} model_name={existing_model_name} cur_user={cur_user}/>
             {/key}
         </div>
     {:else if label_mode == label_modes[2]}
                         </li>
                     </ul>
                     {#key topic}
+                    <TopicTraining topic={topic} model_name={model_name} cur_user={cur_user}/>
                     {/key}
                 </div>

indie_label_svelte/src/MainPanel.svelte CHANGED Viewed

@@ -1,24 +1,20 @@
 <script lang="ts">
 	import Labeling from "./Labeling.svelte";
 	import Auditing from "./Auditing.svelte";
-	import AppOld from "./AppOld.svelte";
 	import Tab, { Label } from "@smui/tab";
 	import TabBar from "@smui/tab-bar";
 	export let model;
-	// export let topic;
 	export let error_type;
-	let app_versions = ["old", "new"];
-	let app_version = "new";
 	// Handle routing
-	let active = "auditing";
     let searchParams = new URLSearchParams(window.location.search);
     let tab = searchParams.get("tab");
-	if (tab == "labeling") {
-		active = "labeling";
 	}
 </script>
@@ -37,37 +33,16 @@
 	</div>
 	<div class="panel_contents">
-		<!-- VERSION SELECTION -->
-		<!-- <div>
-			<Section
-				section_id="app_version"
-				section_title="What app version do you want to use?"
-				section_opts={app_versions}
-                width_pct={40}
-				bind:value={app_version}
-			/>
-		</div> -->
-		{#if app_version == app_versions[0]}
-			<!-- OLD VERSION -->
-			<AppOld />
-		{:else if app_version == app_versions[1]}
-			<!-- NEW VERSION -->
-			<div>
-				<div id="labeling" hidden={active == "auditing"} >
-					<Labeling/>
-				</div>
-				<div id="auditing" hidden={active == "labeling"} >
-					<Auditing bind:personalized_model={model} bind:cur_error_type={error_type} on:change/>
-				</div>
 			</div>
-		{/if}
-		<!-- TEMP -->
-		<!-- {#key model}
-			<div>Model: {model}</div>
-		{/key} -->
 	</div>
 </div>

 <script lang="ts">
 	import Labeling from "./Labeling.svelte";
 	import Auditing from "./Auditing.svelte";
 	import Tab, { Label } from "@smui/tab";
 	import TabBar from "@smui/tab-bar";
 	export let model;
 	export let error_type;
+	export let cur_user;
 	// Handle routing
+	let active = "labeling";
     let searchParams = new URLSearchParams(window.location.search);
     let tab = searchParams.get("tab");
+	if (tab == "auditing") {
+		active = "auditing";
 	}
 </script>
 	</div>
 	<div class="panel_contents">
+		<div>
+			<div id="labeling" hidden={active == "auditing"} >
+				<Labeling cur_user={cur_user}/>
+			</div>
+			<div id="auditing" hidden={active == "labeling"} >
+				<Auditing bind:personalized_model={model} bind:cur_error_type={error_type} cur_user={cur_user} on:change/>
 			</div>
+		</div>
 	</div>
 </div>

indie_label_svelte/src/ModelPerf.svelte CHANGED Viewed

@@ -1,9 +1,7 @@
 <script lang="ts">
     import { VegaLite } from "svelte-vega";
 	import type { View } from "svelte-vega";
     import LayoutGrid, { Cell } from "@smui/layout-grid";
-    import Card, { Content } from '@smui/card';
     export let data;
@@ -13,64 +11,25 @@
     ];
     let perf_plot_view: View;
-    // let perf_plot2_spec = data["perf_plot2_json"];
-    // let perf_plot2_data = perf_plot2_spec["datasets"][perf_plot2_spec["data"]["name"]];
-    // let perf_plot2_view: View;
 </script>
 <div>
     <h6>Your Model Performance</h6>
-    <LayoutGrid>
-        <Cell span={8}>
-            <div class="card-container">
-                <Card variant="outlined" padded>
-                    <p class="mdc-typography--button"><b>Interpreting your model performance</b></p>
-                    <ul>
-                        <li>
-                            The <b>Mean Absolute Error (MAE)</b> metric indicates the average absolute difference between your model's rating and your actual rating on a held-out set of comments.
-                        </li>
-                        <li>
-                            You want your model to have a <b>lower</b> MAE (indicating <b>less error</b>).
-                        </li>
-                        <li>
-                            <b>Your current MAE: {data["mae"]}</b>
-                            <ul>
-                                <li>{@html data["mae_status"]}</li>
-                                <!-- <li>
-                                    This is <b>better</b> (lower) than the average MAE for other users, so your model appears to <b>better capture</b> your views than the typical user model.
-                                </li> -->
-                            </ul>
-                        </li>
-                    </ul>
-                </Card>
-            </div>
-        </Cell>
-    </LayoutGrid>
     <div>
-        <!-- Overall -->
-        <!-- <table>
-            <tbody>
-                <tr>
-                    <td>
-                        <span class="bold">Mean Absolute Error (MAE)</span><br>
-                    </td>
-                    <td>
-                        <span class="bold-large">{data["mae"]}</span>
-                    </td>
-                </tr>
-                <tr>
-                    <td>
-                        <span class="bold">Average rating difference</span><br>
-                        This metric indicates the average difference between your model's rating and your actual rating on a held-out set of comments.
-                    </td>
-                    <td>
-                        <span class="bold-large">{data["avg_diff"]}</span>
-                    </td>
-                </tr>
-            </tbody>
-        </table> -->
         <!-- Performance visualization -->
         <div>
             <VegaLite {perf_plot_data} spec={perf_plot_spec} bind:view={perf_plot_view}/>

 <script lang="ts">
     import { VegaLite } from "svelte-vega";
 	import type { View } from "svelte-vega";
     import LayoutGrid, { Cell } from "@smui/layout-grid";
     export let data;
     ];
     let perf_plot_view: View;
 </script>
 <div>
     <h6>Your Model Performance</h6>
+        <ul>
+            <li>
+                The <b>Mean Absolute Error (MAE)</b> metric indicates the average absolute difference <br>between your model's rating and your actual rating on a held-out set of comments.
+            </li>
+            <li>
+                You want your model to have a <b>lower</b> MAE (indicating <b>less error</b>).
+            </li>
+            <li>
+                <b>Your current MAE: {data["mae"]}</b>
+                <ul>
+                    <li>{@html data["mae_status"]}</li>
+                </ul>
+            </li>
+        </ul>
     <div>
         <!-- Performance visualization -->
         <div>
             <VegaLite {perf_plot_data} spec={perf_plot_spec} bind:view={perf_plot_view}/>

indie_label_svelte/src/OverallResults.svelte CHANGED Viewed

@@ -19,18 +19,6 @@
     let topic_overview_spec = topic_overview_json;
     let topic_overview_view: View;
-    // // Overall Histogram
-    // let overall_hist_json = data["overall_perf"]["overall_hist_json"];
-    // let overall_hist_data = overall_hist_json["datasets"][overall_hist_json["data"]["name"]];
-    // let overall_hist_spec = overall_hist_json;
-    // let overall_hist_view: View;
-    // // Class-conditional Histogram
-    // let class_cond_plot_json = data["overall_perf"]["class_cond_plot_json"];
-    // let class_cond_plot_data = class_cond_plot_json["datasets"][class_cond_plot_json["data"]["name"]];
-    // let class_cond_plot_spec = class_cond_plot_json;
-    // let class_cond_plot_view: View;
 </script>
 <div>
@@ -84,73 +72,6 @@
         </div>
     </div>
-    <!-- Old visualizations -->
-    <!-- <div style="margin-top: 500px">
-        <h6>Overall Performance</h6>
-        <div class="row">
-            <div class="col s12">
-                <div id="overall_perf">
-                    <table>
-                        <tbody>
-                            <tr class="custom-blue">
-                                <td class="bold"
-                                    >System {data[
-                                        "overall_perf"
-                                    ]["metric"]} with YOUR labels</td
-                                >
-                                <td>
-                                    <span class="bold-large"
-                                        >{data[
-                                            "overall_perf"
-                                        ]["user_metric"]}</span
-                                    >
-                                    (Percentile: {data[
-                                        "overall_perf"
-                                    ]["user_percentile"]})
-                                </td>
-                            </tr>
-                            <tr>
-                                <td class="bold"
-                                    >System {data[
-                                        "overall_perf"
-                                    ]["metric"]} with OTHER USERS' labels</td
-                                >
-                                <td>
-                                    <span class="bold-large"
-                                        >{data[
-                                            "overall_perf"
-                                        ]["other_metric"]}</span
-                                    >
-                                    (95% CI: [{data[
-                                        "overall_perf"
-                                    ]["other_ci_low"]}, {data[
-                                        "overall_perf"
-                                    ]["other_ci_high"]}])
-                                </td>
-                            </tr>
-                        </tbody>
-                    </table>
-                </div>
-            </div>
-        </div>
-        <div class="row">
-            <div class="col s8">
-                <VegaLite {overall_hist_data} spec={overall_hist_spec} bind:view={overall_hist_view}/>
-            </div>
-        </div>
-        <h6>Performance Breakdown</h6>
-        <div class="row">
-            <div class="col s12">
-                <div class="row">
-                    <div class="col s12">
-                        <VegaLite {class_cond_plot_data} spec={class_cond_plot_spec} bind:view={class_cond_plot_view} />
-                    </div>
-                </div>
-            </div>
-        </div>
-    </div> -->
 </div>
 <style>
 </style>

     let topic_overview_spec = topic_overview_json;
     let topic_overview_view: View;
 </script>
 <div>
         </div>
     </div>
 </div>
 <style>
 </style>

indie_label_svelte/src/Results.svelte DELETED Viewed

@@ -1,206 +0,0 @@
-<script lang="ts">
-    import { onMount } from "svelte";
-    import ClusterResults from "./ClusterResults.svelte";
-    import Button, { Label } from "@smui/button";
-    import LinearProgress from "@smui/linear-progress";
-    import Checkbox from '@smui/checkbox';
-    import DataTable, {
-        Head,
-        Body,
-        Row,
-        Cell,
-        Label,
-        SortValue,
-    } from "@smui/data-table";
-    import FormField from "@smui/form-field";
-    let cur_examples = [];
-    let promise = Promise.resolve(null);
-    let scaffold_methods = ["personal", "personal_group", "prompts"];
-    let all_users = [];
-	async function getUsers() {
-		const response = await fetch("./get_users");
-		const text = await response.text();
-		const data = JSON.parse(text);
-		all_users = data["users"];
-        promise = getResults();
-	}
-    onMount(async () => {
-        getUsers()
-    });
-    async function getResults() {
-        let req_params = {
-            users: all_users
-        };
-        let params = new URLSearchParams(req_params).toString();
-        const response = await fetch("./get_results?" + params);
-        const text = await response.text();
-        const data = JSON.parse(text);
-        let results = data["results"];
-        return results;
-    }
-    function get_complete_ratio(reports) {
-        let total = reports.length;
-        let complete = reports.filter(item => item.complete_status).length;
-        return "" + complete + "/" + total + " complete";
-    }
-    function get_complete_count(reports) {
-        return reports.filter(item => item.complete_status).length;
-    }
-    function get_summary(reports) {
-        let summary = "";
-        let total_audits = 0
-        for (const scaffold_method of scaffold_methods) {
-            if (reports[scaffold_method]) {
-                let cur_reports = reports[scaffold_method];
-                let cur_ratio = get_complete_ratio(cur_reports);
-                let cur_result = "<li><b>" + scaffold_method + "</b>: " + cur_ratio + "</li>";
-                summary += cur_result;
-                let cur_complete = get_complete_count(cur_reports);
-                total_audits += cur_complete;
-            }
-        }
-        let top_summary = "<li><b>Total audits</b>: " + total_audits + "</li>";
-        summary = "<ul>" + top_summary + summary + "</ul>";
-        return summary;
-    }
-    function get_url(user, scaffold_method) {
-        return "http://localhost:5001/?user=" + user + "&scaffold=" + scaffold_method;
-    }
-</script>
-<svelte:head>
-    <title>Results</title>
-</svelte:head>
-<div class="panel">
-    <div class="panel_contents">
-        <div>
-            <h3>Results</h3>
-        </div>
-        <div style="padding-top:50px">
-            {#await promise}
-                <div class="app_loading">
-                    <LinearProgress indeterminate />
-                </div>
-            {:then results}
-                {#if results}
-                    {#each results as user_report}
-                        <div class="head_3">{user_report["user"]}</div>
-                        <div class="section_indent">
-                            <div class="head_5">Summary</div>
-                            <div>{@html get_summary(user_report)}</div>
-                            <ul>
-                                <li>Labeling pages
-                                    <ul>
-                                        <li>
-                                            <a href="http://localhost:5001/?user={user_report["user"]}&tab=labeling&label_mode=3" target="_blank">Group-based model</a>
-                                        </li>
-                                        <li>
-                                            <a href="http://localhost:5001/?user={user_report["user"]}&tab=labeling&label_mode=0" target="_blank">Personalized model</a>
-                                        </li>
-                                    </ul>
-                                </li>
-                                <li>Auditing pages
-                                    <ul>
-                                        <li>
-                                            <a href="http://localhost:5001/?user={user_report["user"]}&scaffold=personal_group" target="_blank">Group-based audit - personal scaffold</a>
-                                        </li>
-                                        <li>
-                                            <a href="http://localhost:5001/?user={user_report["user"]}&scaffold=personal" target="_blank">Individual audit - personal scaffold</a>
-                                        </li>
-                                        <li>
-                                            <a href="http://localhost:5001/?user={user_report["user"]}&scaffold=prompts" target="_blank">Individual audit - prompt scaffold</a>
-                                        </li>
-                                    </ul>
-                                </li>
-                            </ul>
-                        </div>
-                        {#each scaffold_methods as scaffold_method}
-                            {#if user_report[scaffold_method]}
-                            <div class="spacing_vert_60 section_indent">
-                                <div class="head_5">
-                                    {scaffold_method} ({get_complete_ratio(user_report[scaffold_method])})
-                                    [<a href={get_url(user_report["user"], scaffold_method)} target="_blank">link</a>]
-                                </div>
-                                {#each user_report[scaffold_method] as report}
-                                    <div class="spacing_vert_40 section_indent">
-                                        <div class="head_6_non_cap">
-                                            {report["title"]}
-                                        </div>
-                                        <div class="spacing_vert_20">
-                                            <div class="">
-                                                <b>Error type</b>
-                                            </div>
-                                            {report["error_type"]}
-                                        </div>
-                                        <div class="spacing_vert_20">
-                                            <div class="">
-                                                <b>Evidence</b>
-                                            </div>
-                                            {#if report["evidence"].length > 0}
-                                            <ClusterResults
-                                                cluster={null}
-                                                model={null}
-                                                data={{"cluster_comments": report["evidence"]}}
-                                                show_vis={false}
-                                                show_checkboxes={false}
-                                                table_width_pct={100}
-                                                rowsPerPage={10}
-                                                table_id={"panel"}
-                                            />
-                                            {:else}
-                                                <p class="grey_text">
-                                                    No examples added
-                                                </p>
-                                            {/if}
-                                        </div>
-                                        <div class="spacing_vert_20">
-                                            <div class="">
-                                                <b>Summary/Suggestions</b>
-                                            </div>
-                                            {report["text_entry"]}
-                                        </div>
-                                        <div class="spacing_vert_20">
-                                            <b>Completed</b>
-                                            <FormField>
-                                                <Checkbox checked={report["complete_status"]} disabled/>
-                                            </FormField>
-                                        </div>
-                                    </div>
-                                {/each}
-                            </div>
-                            {/if}
-                        {/each}
-                    {/each}
-                {/if}
-            {:catch error}
-                <p style="color: red">{error.message}</p>
-            {/await}
-        </div>
-    </div>
-</div>
-<style>
-    .panel {
-        width: 80%;
-        padding: 50px;
-    }
-</style>

indie_label_svelte/src/SelectUserDialog.svelte DELETED Viewed

@@ -1,66 +0,0 @@
-<script lang="ts">
-    import Dialog, { Title, Content, Actions } from "@smui/dialog";
-    import Button, { Label } from "@smui/button";
-    import Textfield from "@smui/textfield";
-    import Select, { Option } from "@smui/select";
-    import { user } from "./stores/cur_user_store.js";
-    import { users } from "./stores/all_users_store.js";
-    export let open;
-    export let cur_user;
-    let cur_user_tf = cur_user;
-    let cur_user_sel = cur_user;
-    let all_users;
-    users.subscribe((value) => {
-        all_users = value;
-    });
-    function updateUserTextField() {
-        user.update((value) => cur_user_tf);
-        if (!all_users.includes(user)) {
-            all_users = all_users.concat(cur_user_tf);
-            users.update(all_users);
-        }
-        open = false;
-    }
-    function updateUserSel() {
-        user.update((value) => cur_user_sel);
-        open = false;
-    }
-</script>
-<div>
-    <Dialog
-        bind:open
-        aria-labelledby="simple-title"
-        aria-describedby="simple-content"
-    >
-        <!-- Title cannot contain leading whitespace due to mdc-typography-baseline-top() -->
-        <Title id="simple-title">Select Current User</Title>
-        <Content id="simple-content">
-            <Textfield bind:value={cur_user_tf} label="Enter user's name" />
-            <Select bind:value={cur_user_sel} label="Select Menu">
-                {#each all_users as u}
-                    <Option value={u}>{u}</Option>
-                {/each}
-            </Select>
-        </Content>
-        <Actions>
-            <Button on:click={updateUserTextField}>
-                <Label>Update from TextField</Label>
-            </Button>
-            <Button on:click={updateUserSel}>
-                <Label>Update from Select</Label>
-            </Button>
-        </Actions>
-    </Dialog>
-</div>
-<style>
-    :global(.mdc-dialog__surface) {
-        height: 300px;
-    }
-</style>

indie_label_svelte/src/StudyLinks.svelte DELETED Viewed

@@ -1,59 +0,0 @@
-<script lang="ts">
-    import { user } from "./stores/cur_user_store.js";
-    let cur_user;
-    user.subscribe((value) => {
-        cur_user = value;
-    });
-</script>
-<svelte:head>
-    <title>Study Links</title>
-</svelte:head>
-<div class="panel">
-    <div class="panel_contents">
-        <div>
-            <h3>Study Links</h3>
-        </div>
-        <div>
-            <!-- <div class="head_5">{cur_user}</div> -->
-            <div class="section_indent">
-                <ul>
-                    <li>Labeling pages
-                        <ul>
-                            <li>
-                                <a href="http://localhost:5001/?user={cur_user}&tab=labeling&label_mode=3" target="_blank">Group-based model</a>
-                            </li>
-                            <li>
-                                <a href="http://localhost:5001/?user={cur_user}&tab=labeling&label_mode=0" target="_blank">Personalized model</a>
-                            </li>
-                        </ul>
-                    </li>
-                    <li>Auditing pages
-                        <ul>
-                            <li>
-                                <a href="http://localhost:5001/?user={cur_user}&scaffold=personal_group" target="_blank">Group-based audit - personal scaffold</a>
-                            </li>
-                            <li>
-                                <a href="http://localhost:5001/?user={cur_user}&scaffold=personal" target="_blank">Individual audit - personal scaffold</a>
-                            </li>
-                            <li>
-                                <a href="http://localhost:5001/?user={cur_user}&scaffold=prompts" target="_blank">Individual audit - prompt scaffold</a>
-                            </li>
-                        </ul>
-                    </li>
-                </ul>
-            </div>
-        </div>
-    </div>
-</div>
-<style>
-    .panel {
-        width: 80%;
-        padding: 50px;
-    }
-</style>

indie_label_svelte/src/SubmitReportDialog.svelte ADDED Viewed

	@@ -0,0 +1,120 @@

+<script lang="ts">
+    import Dialog, { Title, Content, Actions } from "@smui/dialog";
+    import Button, { Label } from "@smui/button";
+    import Textfield from "@smui/textfield";
+    import Select, { Option } from "@smui/select";
+    import CircularProgress from '@smui/circular-progress';
+    export let open;
+    export let cur_user;
+    export let all_reports;
+    let email = "";
+    let all_sep_options = [
+        "Accuracy",
+        "Bias/Discrimination",
+        "Adversarial Example",
+        "Other",
+    ];
+    let sep_selection = "";
+    let promise_submit = Promise.resolve(null);
+    function handleSubmitReport() {
+        promise_submit = submitReport();
+    }
+    async function submitReport() {
+        let req_params = {
+            cur_user: cur_user,
+            reports: JSON.stringify(all_reports),
+            email: email,
+            sep_selection: sep_selection,
+        };
+		let params = new URLSearchParams(req_params).toString();
+        const response = await fetch("./submit_avid_report?" + params);
+        const text = await response.text();
+        const data = JSON.parse(text);
+        return data;
+    }
+</script>
+<div>
+    <Dialog
+        bind:open
+        aria-labelledby="simple-title"
+        aria-describedby="simple-content"
+    >
+        <!-- Title cannot contain leading whitespace due to mdc-typography-baseline-top() -->
+        <Title id="simple-title">Send All Audit Reports</Title>
+        <Content id="simple-content">
+            <!-- Description -->
+            <div>
+                <b>When you are ready to send all of your audit reports to the <a href="https://avidml.org/" target="_blank">AI Vulnerability Database</a> (AVID), please fill out the following information.</b>
+                Only your submitted reports will be stored in the database for further analysis. While you can submit reports anonymously, we encourage you to provide your email so that we can contact you if we have any questions.
+            </div>
+            <!-- Summary of complete reports -->
+            <div>
+                <p><b>Summary of Reports to Send</b> (Reports that include evidence and are marked as complete)</p>
+                <ul>
+                    {#each all_reports as report}
+                        {#if report["complete_status"] && (report["evidence"].length > 0)}
+                            <li>{report["title"]}</li>
+                            <ul>
+                                <li>Error Type: {report["error_type"]}</li>
+                                <li>Evidence: Includes {report["evidence"].length} example{(report["evidence"].length > 1) ? 's' : ''}</li>
+                                <li>Summary/Suggestions: {report["text_entry"]}</li>
+                            </ul>
+                        {/if}
+                    {/each}
+                </ul>
+            </div>
+            <!-- Form fields -->
+            <div>
+                <Select bind:value={sep_selection} label="Audit category" style="width: 90%">
+                    {#each all_sep_options as opt}
+                        <Option value={opt}>{opt}</Option>
+                    {/each}
+                </Select>
+            </div>
+            <div>
+                <Textfield bind:value={email} label="(Optional) Contact email" style="width: 90%" />
+            </div>
+            <!-- Submission and status message -->
+            <div class="dialog_footer">
+                <Button on:click={handleSubmitReport} variant="outlined">
+                    <Label>Submit Report to AVID</Label>
+                </Button>
+                <div>
+                    <span style="color: grey"><i>
+                    {#await promise_submit}
+                        <CircularProgress style="height: 32px; width: 32px;" indeterminate />
+                    {:then result}
+                        {#if result}
+                        Successfully sent reports! You may close this window.
+                        {/if}
+                    {:catch error}
+                        <p style="color: red">{error.message}</p>
+                    {/await}
+                    </i></span>
+                </div>
+            </div>
+        </Content>
+    </Dialog>
+</div>
+<style>
+    :global(.mdc-dialog__surface) {
+        min-width: 50%;
+        min-height: 50%;
+        margin-left: 30%;
+    }
+    .dialog_footer {
+        padding: 20px 0px;
+    }
+</style>

indie_label_svelte/src/TopicTraining.svelte CHANGED Viewed

@@ -4,21 +4,15 @@
     import Button, { Label } from "@smui/button";
     import DataTable, { Head, Body, Row, Cell } from "@smui/data-table";
     import LinearProgress from '@smui/linear-progress';
-    import { user } from './stores/cur_user_store.js';
     import { model_chosen } from './stores/cur_model_store.js';
     export let topic;
     export let model_name = null;
     let to_label = {};
     let promise = Promise.resolve(null);
-    // Get current user
-    let cur_user;
-    user.subscribe(value => {
-		cur_user = value;
-	});
     // Get current model
     if (model_name == null) {
         model_chosen.subscribe(value => {
@@ -81,7 +75,6 @@
             topic: topic,
         };
-        console.log("topic training model name", model_name);
         let params = new URLSearchParams(req_params).toString();
         const response = await fetch("./get_personalized_model_topic?" + params); // TODO
         const text = await response.text();
@@ -90,7 +83,6 @@
         model_name = data["new_model_name"];
         model_chosen.update((value) => model_name);
-        console.log("topicTraining", data);
         return data;
     }
 </script>

     import Button, { Label } from "@smui/button";
     import DataTable, { Head, Body, Row, Cell } from "@smui/data-table";
     import LinearProgress from '@smui/linear-progress';
     import { model_chosen } from './stores/cur_model_store.js';
     export let topic;
     export let model_name = null;
+    export let cur_user;
     let to_label = {};
     let promise = Promise.resolve(null);
     // Get current model
     if (model_name == null) {
         model_chosen.subscribe(value => {
             topic: topic,
         };
         let params = new URLSearchParams(req_params).toString();
         const response = await fetch("./get_personalized_model_topic?" + params); // TODO
         const text = await response.text();
         model_name = data["new_model_name"];
         model_chosen.update((value) => model_name);
         return data;
     }
 </script>

indie_label_svelte/src/stores/all_users_store.js DELETED Viewed

@@ -1,6 +0,0 @@
-import { writable } from 'svelte/store';
-// Fallback if request doesn't work
-let all_users = ["DemoUser"];
-export const users = writable(all_users);

indie_label_svelte/src/stores/cur_user_store.js DELETED Viewed

@@ -1,3 +0,0 @@
-import { writable } from 'svelte/store';
-export const user = writable("DemoUser");

server.py CHANGED Viewed

@@ -17,10 +17,15 @@ import math
 import altair as alt
 import matplotlib.pyplot as plt
 import time
 import audit_utils as utils
 app = Flask(__name__)
 # Path for our main Svelte page
 @app.route("/")
@@ -35,15 +40,19 @@ def home(path):
 ########################################
 # ROUTE: /AUDIT_SETTINGS
-comments_grouped_full_topic_cat = pd.read_pickle("data/comments_grouped_full_topic_cat2_persp.pkl")
 @app.route("/audit_settings")
-def audit_settings():
     # Fetch page content
     user = request.args.get("user")
     scaffold_method = request.args.get("scaffold_method")
-    user_models = utils.get_all_model_names(user)
     grp_models = [m for m in user_models if m.startswith(f"model_{user}_group_")]
     clusters = utils.get_unique_topics()
@@ -66,19 +75,6 @@ def audit_settings():
             "options": [{"value": i, "text": cluster} for i, cluster in enumerate(clusters)],
         },]
-    if scaffold_method == "personal_cluster":
-        cluster_model = user_models[0]
-        personal_cluster_file = f"./data/personal_cluster_dfs/{cluster_model}.pkl"
-        if os.path.isfile(personal_cluster_file) and cluster_model != "":
-            print("audit_settings", personal_cluster_file, cluster_model)
-            topics_under_top, topics_over_top = utils.get_personal_clusters(cluster_model)
-            pers_cluster = topics_under_top + topics_over_top
-            pers_cluster_options = {
-                "label": "Personalized clusters",
-                "options": [{"value": i, "text": cluster} for i, cluster in enumerate(pers_cluster)],
-            }
-            clusters_options.insert(0, pers_cluster_options)
     clusters_for_tuning = utils.get_large_clusters(min_n=150)
     clusters_for_tuning_options = [{"value": i, "text": cluster} for i, cluster in enumerate(clusters_for_tuning)]  # Format for Svelecte UI element
@@ -86,54 +82,33 @@ def audit_settings():
         "personalized_models": user_models,
         "personalized_model_grp": grp_models,
         "perf_metrics": ["Average rating difference", "Mean Absolute Error (MAE)", "Root Mean Squared Error (RMSE)", "Mean Squared Error (MSE)"],
-        "breakdown_categories": ['Topic', 'Toxicity Category', 'Toxicity Severity'],
         "clusters": clusters_options,
         "clusters_for_tuning": clusters_for_tuning_options,
     }
     return json.dumps(context)
-########################################
-# ROUTE: /GET_USERS
-@app.route("/get_users")
-def get_users():
-    # Fetch page content
-    with open(f"./data/users_to_models.pkl", "rb") as f:
-        users_to_models = pickle.load(f)
-        users = list(users_to_models.keys())
-        context = {
-            "users": users,
-        }
-        return json.dumps(context)
 ########################################
 # ROUTE: /GET_AUDIT
 @app.route("/get_audit")
 def get_audit():
     pers_model = request.args.get("pers_model")
-    perf_metric = request.args.get("perf_metric")
-    breakdown_axis = request.args.get("breakdown_axis")
-    breakdown_sort = request.args.get("breakdown_sort")
-    n_topics = int(request.args.get("n_topics"))
     error_type = request.args.get("error_type")
     cur_user = request.args.get("cur_user")
     topic_vis_method = request.args.get("topic_vis_method")
     if topic_vis_method == "null":
         topic_vis_method = "median"
-    if breakdown_sort == "difference":
-        sort_class_plot = True
-    elif breakdown_sort == "default":
-        sort_class_plot = False
     else:
-        raise Exception("Invalid breakdown_sort value")
-    overall_perf = utils.show_overall_perf(
-        variant=pers_model,
-        error_type=error_type,
-        cur_user=cur_user,
-        breakdown_axis=breakdown_axis,
-        topic_vis_method=topic_vis_method,
-    )
     results = {
         "overall_perf": overall_perf,
@@ -143,60 +118,34 @@ def get_audit():
 ########################################
 # ROUTE: /GET_CLUSTER_RESULTS
 @app.route("/get_cluster_results")
-def get_cluster_results():
     pers_model = request.args.get("pers_model")
-    n_examples = int(request.args.get("n_examples"))
     cluster = request.args.get("cluster")
-    example_sort = request.args.get("example_sort")
-    comparison_group = request.args.get("comparison_group")
     topic_df_ids = request.args.getlist("topic_df_ids")
     topic_df_ids = [int(val) for val in topic_df_ids[0].split(",") if val != ""]
     search_type = request.args.get("search_type")
     keyword = request.args.get("keyword")
-    n_neighbors = request.args.get("n_neighbors")
-    if n_neighbors != "null":
-        n_neighbors = int(n_neighbors)
-    neighbor_threshold = 0.6
     error_type = request.args.get("error_type")
     use_model = request.args.get("use_model") == "true"
-    scaffold_method = request.args.get("scaffold_method")
-    # If user has a tuned model for this cluster, use that
-    cluster_model_file = f"./data/trained_models/{pers_model}_{cluster}.pkl"
-    if os.path.isfile(cluster_model_file):
-        pers_model = f"{pers_model}_{cluster}"
-    print(f"get_cluster_results using model {pers_model}")
-    other_ids = []
-    perf_metric = "avg_diff"
-    sort_ascending = True if example_sort == "ascending" else False
     topic_df = None
-    personal_cluster_file = f"./data/personal_cluster_dfs/{pers_model}.pkl"
-    if (scaffold_method == "personal_cluster") and (os.path.isfile(personal_cluster_file)):
-        # Handle personal clusters
-        with open(personal_cluster_file, "rb") as f:
-            topic_df = pickle.load(f)
-            topic_df = topic_df[(topic_df["topic"] == cluster)]
-    else:
-        # Regular handling
-        with open(f"data/preds_dfs/{pers_model}.pkl", "rb") as f:
-            topic_df = pickle.load(f)
-        if search_type == "cluster":
-            # Display examples with comment, your pred, and other users' pred
-            topic_df = topic_df[(topic_df["topic"] == cluster) | (topic_df["item_id"].isin(topic_df_ids))]
-        elif search_type == "neighbors":
-            neighbor_ids = utils.get_match(topic_df_ids, K=n_neighbors, threshold=neighbor_threshold, debug=False)
-            topic_df = topic_df[(topic_df["item_id"].isin(neighbor_ids)) | (topic_df["item_id"].isin(topic_df_ids))]
-        elif search_type == "keyword":
-            topic_df = topic_df[(topic_df["comment"].str.contains(keyword, case=False, regex=False)) | (topic_df["item_id"].isin(topic_df_ids))]
     topic_df = topic_df.drop_duplicates()
-    print("len topic_df", len(topic_df))
     # Handle empty results
     if len(topic_df) == 0:
@@ -217,18 +166,20 @@ def get_cluster_results():
     topic_df_ids = topic_df["item_id"].unique().tolist()
-    if (scaffold_method == "personal_cluster") and (os.path.isfile(personal_cluster_file)):
-        cluster_overview_plot_json, sampled_df = utils.plot_overall_vis_cluster(topic_df, error_type=error_type, n_comments=500)
     else:
-        # Regular
-        cluster_overview_plot_json, sampled_df = utils.get_cluster_overview_plot(topic_df, error_type=error_type, use_model=use_model)
-    cluster_comments = utils.get_cluster_comments(sampled_df,error_type=error_type, num_examples=n_examples, use_model=use_model)  # New version of cluster comment table
     results = {
         "topic_df_ids": topic_df_ids,
         "cluster_overview_plot_json": json.loads(cluster_overview_plot_json),
-        "cluster_comments": cluster_comments,
     }
     return json.dumps(results)
@@ -255,7 +206,7 @@ def get_group_size():
 ########################################
 # ROUTE: /GET_GROUP_MODEL
 @app.route("/get_group_model")
-def get_group_model():
     # Fetch info for initial labeling component
     model_name = request.args.get("model_name")
     user = request.args.get("user")
@@ -275,28 +226,21 @@ def get_group_model():
     grp_ids = grp_df["worker_id"].tolist()
     ratings_grp = utils.get_grp_model_labels(
-        comments_df=comments_grouped_full_topic_cat,
         n_label_per_bin=BIN_DISTRIB,
         score_bins=SCORE_BINS,
         grp_ids=grp_ids,
     )
-    # print("ratings_grp", ratings_grp)
     # Modify model name
     model_name = f"{model_name}_group_gender{sel_gender}_relig{sel_relig}_pol{sel_pol}_race{sel_race_orig}_lgbtq_{sel_lgbtq}"
-    label_dir = f"./data/labels/{model_name}"
-    # Create directory for labels if it doesn't yet exist
-    if not os.path.isdir(label_dir):
-        os.mkdir(label_dir)
-    last_label_i = len([name for name in os.listdir(label_dir) if (os.path.isfile(os.path.join(label_dir, name)) and name.endswith('.pkl'))])
     # Train group model
-    mae, mse, rmse, avg_diff, ratings_prev = utils.train_updated_model(model_name, last_label_i, ratings_grp, user)
     duration = time.time() - start
-    print("Time to train/cache:", duration)
     context = {
         "group_size": group_size,
@@ -314,11 +258,10 @@ def get_labeling():
     clusters_for_tuning = utils.get_large_clusters(min_n=150)
     clusters_for_tuning_options = [{"value": i, "text": cluster} for i, cluster in enumerate(clusters_for_tuning)]  # Format for Svelecte UI element
-    # model_name_suggestion = f"model_{int(time.time())}"
-    model_name_suggestion = f"model_{user}"
     context = {
-        "personalized_models": utils.get_all_model_names(user),
         "model_name_suggestion": model_name_suggestion,
         "clusters_for_tuning": clusters_for_tuning_options,
     }
@@ -326,15 +269,16 @@ def get_labeling():
 ########################################
 # ROUTE: /GET_COMMENTS_TO_LABEL
-N_LABEL_PER_BIN = 8 # 8 * 5 = 40 comments
-BIN_DISTRIB = [4, 8, 16, 8, 4]
 SCORE_BINS = [(0.0, 0.5), (0.5, 1.5), (1.5, 2.5), (2.5, 3.5), (3.5, 4.01)]
 @app.route("/get_comments_to_label")
 def get_comments_to_label():
     n = int(request.args.get("n"))
     # Fetch examples to label
     to_label_ids = utils.create_example_sets(
-        comments_df=comments_grouped_full_topic_cat,
         n_label_per_bin=BIN_DISTRIB,
         score_bins=SCORE_BINS,
         keyword=None
@@ -351,14 +295,11 @@ def get_comments_to_label():
 ########################################
 # ROUTE: /GET_COMMENTS_TO_LABEL_TOPIC
-N_LABEL_PER_BIN_TOPIC = 2 # 2 * 5 = 10 comments
 @app.route("/get_comments_to_label_topic")
 def get_comments_to_label_topic():
     # Fetch examples to label
     topic = request.args.get("topic")
     to_label_ids = utils.create_example_sets(
-        comments_df=comments_grouped_full_topic_cat,
-        # n_label_per_bin=N_LABEL_PER_BIN_TOPIC,
         n_label_per_bin=BIN_DISTRIB,
         score_bins=SCORE_BINS,
         keyword=None,
@@ -375,38 +316,33 @@ def get_comments_to_label_topic():
 ########################################
 # ROUTE: /GET_PERSONALIZED_MODEL
 @app.route("/get_personalized_model")
-def get_personalized_model():
     model_name = request.args.get("model_name")
     ratings_json = request.args.get("ratings")
     mode = request.args.get("mode")
     user = request.args.get("user")
     ratings = json.loads(ratings_json)
-    print(ratings)
-    start = time.time()
-    label_dir = f"./data/labels/{model_name}"
-    # Create directory for labels if it doesn't yet exist
-    if not os.path.isdir(label_dir):
-        os.mkdir(label_dir)
-    last_label_i = len([name for name in os.listdir(label_dir) if (os.path.isfile(os.path.join(label_dir, name)) and name.endswith('.pkl'))])
     # Handle existing or new model cases
     if mode == "view":
         # Fetch prior model performance
-        if model_name not in utils.get_all_model_names():
-            raise Exception(f"Model {model_name} does not exist")
-        else:
-            mae, mse, rmse, avg_diff, ratings_prev = utils.fetch_existing_data(model_name, last_label_i)
     elif mode == "train":
         # Train model and cache predictions using new labels
         print("get_personalized_model train")
-        mae, mse, rmse, avg_diff, ratings_prev = utils.train_updated_model(model_name, last_label_i, ratings, user)
-    duration = time.time() - start
-    print("Time to train/cache:", duration)
-    perf_plot, mae_status = utils.plot_train_perf_results(model_name, mae)
     perf_plot_json = perf_plot.to_json()
     def round_metric(x):
@@ -419,7 +355,6 @@ def get_personalized_model():
         "mse": round_metric(mse),
         "rmse": round_metric(rmse),
         "avg_diff": round_metric(avg_diff),
-        "duration": duration,
         "ratings_prev": ratings_prev,
         "perf_plot_json": json.loads(perf_plot_json),
     }
@@ -429,34 +364,29 @@ def get_personalized_model():
 ########################################
 # ROUTE: /GET_PERSONALIZED_MODEL_TOPIC
 @app.route("/get_personalized_model_topic")
-def get_personalized_model_topic():
     model_name = request.args.get("model_name")
     ratings_json = request.args.get("ratings")
     user = request.args.get("user")
     ratings = json.loads(ratings_json)
     topic = request.args.get("topic")
-    print(ratings)
     start = time.time()
     # Modify model name
     model_name = f"{model_name}_{topic}"
-    label_dir = f"./data/labels/{model_name}"
-    # Create directory for labels if it doesn't yet exist
-    if not os.path.isdir(label_dir):
-        os.mkdir(label_dir)
-    last_label_i = len([name for name in os.listdir(label_dir) if (os.path.isfile(os.path.join(label_dir, name)) and name.endswith('.pkl'))])
     # Handle existing or new model cases
     # Train model and cache predictions using new labels
-    print("get_personalized_model_topic train")
-    mae, mse, rmse, avg_diff, ratings_prev = utils.train_updated_model(model_name, last_label_i, ratings, user, topic=topic)
-    duration = time.time() - start
-    print("Time to train/cache:", duration)
-    def round_metric(x):
-        return np.round(abs(x), 3)
     results = {
         "success": "success",
@@ -477,17 +407,13 @@ def get_reports():
     if topic_vis_method == "null":
         topic_vis_method = "fp_fn"
-    # Load reports for current user from stored files
-    report_dir = f"./data/user_reports"
-    user_file = os.path.join(report_dir, f"{cur_user}_{scaffold_method}.pkl")
-    if not os.path.isfile(user_file):
         if scaffold_method == "fixed":
             reports = get_fixed_scaffold()
         elif (scaffold_method == "personal" or scaffold_method == "personal_group" or scaffold_method == "personal_test"):
-            reports = get_personal_scaffold(model, topic_vis_method)
-        elif (scaffold_method == "personal_cluster"):
-            reports = get_personal_cluster_scaffold(model)
         elif scaffold_method == "prompts":
             reports = get_prompts_scaffold()
         elif scaffold_method == "tutorial":
@@ -505,8 +431,8 @@ def get_reports():
             ]
     else:
         # Load from pickle file
-        with open(user_file, "rb") as f:
-            reports = pickle.load(f)
     results = {
         "reports": reports,
@@ -572,23 +498,13 @@ def get_tutorial_scaffold():
         },
     ]
-def get_personal_cluster_scaffold(model):
-    topics_under_top, topics_over_top = utils.get_personal_clusters(model)
-    report_under = [get_empty_report(topic, "System is under-sensitive") for topic in topics_under_top]
-    report_over = [get_empty_report(topic, "System is over-sensitive") for topic in topics_over_top]
-    reports = (report_under + report_over)
-    random.shuffle(reports)
-    return reports
 def get_topic_errors(df, topic_vis_method, threshold=2):
-    topics = df["topic_"].unique().tolist()
     topic_errors = {}
     for topic in topics:
-        t_df = df[df["topic_"] == topic]
-        y_true = t_df["pred"].to_numpy()
-        y_pred = t_df["rating"].to_numpy()
         if topic_vis_method == "mae":
             t_err = mean_absolute_error(y_true, y_pred)
         elif topic_vis_method == "mse":
@@ -596,8 +512,8 @@ def get_topic_errors(df, topic_vis_method, threshold=2):
         elif topic_vis_method == "avg_diff":
             t_err = np.mean(y_true - y_pred)
         elif topic_vis_method == "fp_proportion":
-            y_true = [0 if rating < threshold else 1 for rating in t_df["pred"].tolist()]
-            y_pred = [0 if rating < threshold else 1 for rating in t_df["rating"].tolist()]
             try:
                 tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
             except:
@@ -605,8 +521,8 @@ def get_topic_errors(df, topic_vis_method, threshold=2):
             total = float(len(y_true))
             t_err = fp / total
         elif topic_vis_method == "fn_proportion":
-            y_true = [0 if rating < threshold else 1 for rating in t_df["pred"].tolist()]
-            y_pred = [0 if rating < threshold else 1 for rating in t_df["rating"].tolist()]
             try:
                 tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
             except:
@@ -617,65 +533,69 @@ def get_topic_errors(df, topic_vis_method, threshold=2):
     return topic_errors
-def get_personal_scaffold(model, topic_vis_method, n_topics=200, n=5):
     threshold = utils.get_toxic_threshold()
     # Get topics with greatest amount of error
-    with open(f"./data/preds_dfs/{model}.pkl", "rb") as f:
         preds_df = pickle.load(f)
-        preds_df_mod = preds_df.merge(utils.get_comments_grouped_full_topic_cat(), on="item_id", how="left", suffixes=('_', '_avg'))
-        preds_df_mod = preds_df_mod[preds_df_mod["user_id"] == "A"].sort_values(by=["item_id"]).reset_index()
-        preds_df_mod = preds_df_mod[preds_df_mod["topic_id_"] < n_topics]
         if topic_vis_method == "median":
-            df = preds_df_mod.groupby(["topic_", "user_id"]).median().reset_index()
         elif topic_vis_method == "mean":
-            df = preds_df_mod.groupby(["topic_", "user_id"]).mean().reset_index()
         elif topic_vis_method == "fp_fn":
             for error_type in ["fn_proportion", "fp_proportion"]:
                 topic_errors = get_topic_errors(preds_df_mod, error_type)
-                preds_df_mod[error_type] = [topic_errors[topic] for topic in preds_df_mod["topic_"].tolist()]
-            df = preds_df_mod.groupby(["topic_", "user_id"]).mean().reset_index()
         else:
             # Get error for each topic
             topic_errors = get_topic_errors(preds_df_mod, topic_vis_method)
-            preds_df_mod[topic_vis_method] = [topic_errors[topic] for topic in preds_df_mod["topic_"].tolist()]
-            df = preds_df_mod.groupby(["topic_", "user_id"]).mean().reset_index()
         # Get system error
-        df = df[(df["topic_"] != "53_maiareficco_kallystas_dyisisitmanila_tractorsazi") & (df["topic_"] != "79_idiot_dumb_stupid_dumber")]
         if topic_vis_method == "median" or topic_vis_method == "mean":
-            df["error_magnitude"] = [utils.get_error_magnitude(sys, user, threshold) for sys, user in zip(df["rating"].tolist(), df["pred"].tolist())]
-            df["error_type"] = [utils.get_error_type_radio(sys, user, threshold) for sys, user in zip(df["rating"].tolist(), df["pred"].tolist())]
             df_under = df[df["error_type"] == "System is under-sensitive"]
             df_under = df_under.sort_values(by=["error_magnitude"], ascending=False).head(n) # surface largest errors first
-            report_under = [get_empty_report(row["topic_"], row["error_type"]) for _, row in df_under.iterrows()]
             df_over = df[df["error_type"] == "System is over-sensitive"]
             df_over = df_over.sort_values(by=["error_magnitude"], ascending=False).head(n) # surface largest errors first
-            report_over = [get_empty_report(row["topic_"], row["error_type"]) for _, row in df_over.iterrows()]
             # Set up reports
-            # return [get_empty_report(row["topic_"], row["error_type"]) for index, row in df.iterrows()]
             reports = (report_under + report_over)
             random.shuffle(reports)
         elif topic_vis_method == "fp_fn":
             df_under = df.sort_values(by=["fn_proportion"], ascending=False).head(n)
             df_under = df_under[df_under["fn_proportion"] > 0]
-            report_under = [get_empty_report(row["topic_"], "System is under-sensitive") for _, row in df_under.iterrows()]
             df_over = df.sort_values(by=["fp_proportion"], ascending=False).head(n)
             df_over = df_over[df_over["fp_proportion"] > 0]
-            report_over = [get_empty_report(row["topic_"], "System is over-sensitive") for _, row in df_over.iterrows()]
             reports = (report_under + report_over)
             random.shuffle(reports)
         else:
             df = df.sort_values(by=[topic_vis_method], ascending=False).head(n * 2)
-            df["error_type"] = [utils.get_error_type_radio(sys, user, threshold) for sys, user in zip(df["rating"].tolist(), df["pred"].tolist())]
-            reports = [get_empty_report(row["topic_"], row["error_type"]) for _, row in df.iterrows()]
         return reports
@@ -718,78 +638,88 @@ def get_prompts_scaffold():
         },
     ]
 ########################################
 # ROUTE: /SAVE_REPORTS
 @app.route("/save_reports")
-def save_reports():
     cur_user = request.args.get("cur_user")
     reports_json = request.args.get("reports")
     reports = json.loads(reports_json)
-    scaffold_method = request.args.get("scaffold_method")
-    # Save reports for current user to stored files
-    report_dir = f"./data/user_reports"
-    # Save to pickle file
-    with open(os.path.join(report_dir, f"{cur_user}_{scaffold_method}.pkl"), "wb") as f:
-        pickle.dump(reports, f)
     results = {
         "status": "success",
     }
     return json.dumps(results)
 ########################################
-# ROUTE: /GET_EXPLORE_EXAMPLES
-@app.route("/get_explore_examples")
-def get_explore_examples():
-    threshold = utils.get_toxic_threshold()
-    n_examples = int(request.args.get("n_examples"))
-    # Get sample of examples
-    df = utils.get_comments_grouped_full_topic_cat().sample(n=n_examples)
-    df["system_decision"] = [utils.get_decision(rating, threshold) for rating in df["rating"].tolist()]
-    df["system_color"] = [utils.get_user_color(sys, threshold) for sys in df["rating"].tolist()]  # get cell colors
-    ex_json = df.to_json(orient="records")
     results = {
-        "examples": ex_json,
     }
     return json.dumps(results)
 ########################################
-# ROUTE: /GET_RESULTS
-@app.route("/get_results")
-def get_results():
-    users = request.args.get("users")
-    if users != "":
-        users = users.split(",")
-    # print("users", users)
-    IGNORE_LIST = ["DemoUser"]
-    report_dir = f"./data/user_reports"
-    # For each user, get personal and prompt results
-    # Get links to label pages and audit pages
-    results = []
-    for user in users:
-        if user not in IGNORE_LIST:
-            user_results = {}
-            user_results["user"] = user
-            for scaffold_method in ["personal", "personal_group", "prompts"]:
-                # Get results
-                user_file = os.path.join(report_dir, f"{user}_{scaffold_method}.pkl")
-                if os.path.isfile(user_file):
-                    with open(user_file, "rb") as f:
-                        user_results[scaffold_method] = pickle.load(f)
-            results.append(user_results)
-    # print("results", results)
     results = {
-        "results": results,
     }
     return json.dumps(results)

 import altair as alt
 import matplotlib.pyplot as plt
 import time
+import friendlywords as fw
 import audit_utils as utils
+import requests
 app = Flask(__name__)
+DEBUG = False  # Debug flag for development; set to False for production
 # Path for our main Svelte page
 @app.route("/")
 ########################################
 # ROUTE: /AUDIT_SETTINGS
 @app.route("/audit_settings")
+def audit_settings(debug=DEBUG):
     # Fetch page content
     user = request.args.get("user")
     scaffold_method = request.args.get("scaffold_method")
+    # Assign user ID if none is provided (default case)
+    if user == "null":
+        # Generate random two-word user ID
+        user = fw.generate(2, separator="_")
+    user_models = utils.get_user_model_names(user)
     grp_models = [m for m in user_models if m.startswith(f"model_{user}_group_")]
     clusters = utils.get_unique_topics()
             "options": [{"value": i, "text": cluster} for i, cluster in enumerate(clusters)],
         },]
     clusters_for_tuning = utils.get_large_clusters(min_n=150)
     clusters_for_tuning_options = [{"value": i, "text": cluster} for i, cluster in enumerate(clusters_for_tuning)]  # Format for Svelecte UI element
         "personalized_models": user_models,
         "personalized_model_grp": grp_models,
         "perf_metrics": ["Average rating difference", "Mean Absolute Error (MAE)", "Root Mean Squared Error (RMSE)", "Mean Squared Error (MSE)"],
         "clusters": clusters_options,
         "clusters_for_tuning": clusters_for_tuning_options,
+        "user": user,
     }
     return json.dumps(context)
 ########################################
 # ROUTE: /GET_AUDIT
 @app.route("/get_audit")
 def get_audit():
     pers_model = request.args.get("pers_model")
     error_type = request.args.get("error_type")
     cur_user = request.args.get("cur_user")
     topic_vis_method = request.args.get("topic_vis_method")
     if topic_vis_method == "null":
         topic_vis_method = "median"
+    if pers_model == "" or pers_model == "null" or pers_model == "undefined":
+        overall_perf = None
     else:
+        overall_perf = utils.show_overall_perf(
+            cur_model=pers_model,
+            error_type=error_type,
+            cur_user=cur_user,
+            topic_vis_method=topic_vis_method,
+        )
     results = {
         "overall_perf": overall_perf,
 ########################################
 # ROUTE: /GET_CLUSTER_RESULTS
 @app.route("/get_cluster_results")
+def get_cluster_results(debug=DEBUG):
     pers_model = request.args.get("pers_model")
+    cur_user = request.args.get("cur_user")
     cluster = request.args.get("cluster")
     topic_df_ids = request.args.getlist("topic_df_ids")
     topic_df_ids = [int(val) for val in topic_df_ids[0].split(",") if val != ""]
     search_type = request.args.get("search_type")
     keyword = request.args.get("keyword")
     error_type = request.args.get("error_type")
     use_model = request.args.get("use_model") == "true"
+    if debug:
+        print(f"get_cluster_results using model {pers_model}")
+    # Prepare cluster df (topic_df)
     topic_df = None
+    preds_file = utils.get_preds_file(cur_user, pers_model)
+    with open(preds_file, "rb") as f:
+        topic_df = pickle.load(f)
+    if search_type == "cluster":
+        # Display examples with comment, your pred, and other users' pred
+        topic_df = topic_df[(topic_df["topic"] == cluster) | (topic_df["item_id"].isin(topic_df_ids))]
+    elif search_type == "keyword":
+        topic_df = topic_df[(topic_df["comment"].str.contains(keyword, case=False, regex=False)) | (topic_df["item_id"].isin(topic_df_ids))]
     topic_df = topic_df.drop_duplicates()
+    if debug:
+        print("len topic_df", len(topic_df))
     # Handle empty results
     if len(topic_df) == 0:
     topic_df_ids = topic_df["item_id"].unique().tolist()
+    # Prepare overview plot for the cluster
+    if use_model:
+        # Display results with the model as a reference point
+        cluster_overview_plot_json, sampled_df = utils.plot_overall_vis_cluster(cur_user, topic_df, error_type=error_type, n_comments=500)
     else:
+        # Display results without a model
+        cluster_overview_plot_json, sampled_df = utils.plot_overall_vis_cluster_no_model(cur_user, topic_df, n_comments=500)
+    cluster_comments = utils.get_cluster_comments(sampled_df,error_type=error_type, use_model=use_model)  # New version of cluster comment table
     results = {
         "topic_df_ids": topic_df_ids,
         "cluster_overview_plot_json": json.loads(cluster_overview_plot_json),
+        "cluster_comments": cluster_comments.to_json(orient="records"),
     }
     return json.dumps(results)
 ########################################
 # ROUTE: /GET_GROUP_MODEL
 @app.route("/get_group_model")
+def get_group_model(debug=DEBUG):
     # Fetch info for initial labeling component
     model_name = request.args.get("model_name")
     user = request.args.get("user")
     grp_ids = grp_df["worker_id"].tolist()
     ratings_grp = utils.get_grp_model_labels(
         n_label_per_bin=BIN_DISTRIB,
         score_bins=SCORE_BINS,
         grp_ids=grp_ids,
     )
     # Modify model name
     model_name = f"{model_name}_group_gender{sel_gender}_relig{sel_relig}_pol{sel_pol}_race{sel_race_orig}_lgbtq_{sel_lgbtq}"
+    utils.setup_user_model_dirs(user, model_name)
     # Train group model
+    mae, mse, rmse, avg_diff, ratings_prev = utils.train_updated_model(model_name, ratings_grp, user)
     duration = time.time() - start
+    if debug:
+        print("Time to train/cache:", duration)
     context = {
         "group_size": group_size,
     clusters_for_tuning = utils.get_large_clusters(min_n=150)
     clusters_for_tuning_options = [{"value": i, "text": cluster} for i, cluster in enumerate(clusters_for_tuning)]  # Format for Svelecte UI element
+    model_name_suggestion = f"my_model"
     context = {
+        "personalized_models": utils.get_user_model_names(user),
         "model_name_suggestion": model_name_suggestion,
         "clusters_for_tuning": clusters_for_tuning_options,
     }
 ########################################
 # ROUTE: /GET_COMMENTS_TO_LABEL
+if DEBUG:
+    BIN_DISTRIB = [1, 2, 4, 2, 1]  # 10 comments
+else:
+    BIN_DISTRIB = [2, 4, 8, 4, 2]  # 20 comments
 SCORE_BINS = [(0.0, 0.5), (0.5, 1.5), (1.5, 2.5), (2.5, 3.5), (3.5, 4.01)]
 @app.route("/get_comments_to_label")
 def get_comments_to_label():
     n = int(request.args.get("n"))
     # Fetch examples to label
     to_label_ids = utils.create_example_sets(
         n_label_per_bin=BIN_DISTRIB,
         score_bins=SCORE_BINS,
         keyword=None
 ########################################
 # ROUTE: /GET_COMMENTS_TO_LABEL_TOPIC
 @app.route("/get_comments_to_label_topic")
 def get_comments_to_label_topic():
     # Fetch examples to label
     topic = request.args.get("topic")
     to_label_ids = utils.create_example_sets(
         n_label_per_bin=BIN_DISTRIB,
         score_bins=SCORE_BINS,
         keyword=None,
 ########################################
 # ROUTE: /GET_PERSONALIZED_MODEL
 @app.route("/get_personalized_model")
+def get_personalized_model(debug=DEBUG):
     model_name = request.args.get("model_name")
     ratings_json = request.args.get("ratings")
     mode = request.args.get("mode")
     user = request.args.get("user")
     ratings = json.loads(ratings_json)
+    if debug:
+        print(ratings)
+        start = time.time()
+    utils.setup_user_model_dirs(user, model_name)
     # Handle existing or new model cases
     if mode == "view":
         # Fetch prior model performance
+        mae, mse, rmse, avg_diff, ratings_prev = utils.fetch_existing_data(user, model_name)
     elif mode == "train":
         # Train model and cache predictions using new labels
         print("get_personalized_model train")
+        mae, mse, rmse, avg_diff, ratings_prev = utils.train_updated_model(model_name, ratings, user)
+    if debug:
+        duration = time.time() - start
+        print("Time to train/cache:", duration)
+    perf_plot, mae_status = utils.plot_train_perf_results(user, model_name, mae)
     perf_plot_json = perf_plot.to_json()
     def round_metric(x):
         "mse": round_metric(mse),
         "rmse": round_metric(rmse),
         "avg_diff": round_metric(avg_diff),
         "ratings_prev": ratings_prev,
         "perf_plot_json": json.loads(perf_plot_json),
     }
 ########################################
 # ROUTE: /GET_PERSONALIZED_MODEL_TOPIC
 @app.route("/get_personalized_model_topic")
+def get_personalized_model_topic(debug=DEBUG):
     model_name = request.args.get("model_name")
     ratings_json = request.args.get("ratings")
     user = request.args.get("user")
     ratings = json.loads(ratings_json)
     topic = request.args.get("topic")
+    if debug:
+        print(ratings)
     start = time.time()
     # Modify model name
     model_name = f"{model_name}_{topic}"
+    utils.setup_user_model_dirs(user, model_name)
     # Handle existing or new model cases
     # Train model and cache predictions using new labels
+    if debug:
+        print("get_personalized_model_topic train")
+    mae, mse, rmse, avg_diff, ratings_prev = utils.train_updated_model(model_name, ratings, user, topic=topic)
+    if debug:
+        duration = time.time() - start
+        print("Time to train/cache:", duration)
     results = {
         "success": "success",
     if topic_vis_method == "null":
         topic_vis_method = "fp_fn"
+    # Load reports for current user from stored file
+    reports_file = utils.get_reports_file(cur_user, model)
+    if not os.path.isfile(reports_file):
         if scaffold_method == "fixed":
             reports = get_fixed_scaffold()
         elif (scaffold_method == "personal" or scaffold_method == "personal_group" or scaffold_method == "personal_test"):
+            reports = get_personal_scaffold(cur_user, model, topic_vis_method)
         elif scaffold_method == "prompts":
             reports = get_prompts_scaffold()
         elif scaffold_method == "tutorial":
             ]
     else:
         # Load from pickle file
+        with open(reports_file, "rb") as f:
+            reports = json.load(f)
     results = {
         "reports": reports,
         },
     ]
 def get_topic_errors(df, topic_vis_method, threshold=2):
+    topics = df["topic"].unique().tolist()
     topic_errors = {}
     for topic in topics:
+        t_df = df[df["topic"] == topic]
+        y_true = t_df["pred"].to_numpy()  # Predicted user rating (treated as ground truth)
+        y_pred = t_df["rating_sys"].to_numpy()  # System rating (which we're auditing)
         if topic_vis_method == "mae":
             t_err = mean_absolute_error(y_true, y_pred)
         elif topic_vis_method == "mse":
         elif topic_vis_method == "avg_diff":
             t_err = np.mean(y_true - y_pred)
         elif topic_vis_method == "fp_proportion":
+            y_true = [0 if rating < threshold else 1 for rating in y_true]
+            y_pred = [0 if rating < threshold else 1 for rating in y_pred]
             try:
                 tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
             except:
             total = float(len(y_true))
             t_err = fp / total
         elif topic_vis_method == "fn_proportion":
+            y_true = [0 if rating < threshold else 1 for rating in y_true]
+            y_pred = [0 if rating < threshold else 1 for rating in y_pred]
             try:
                 tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
             except:
     return topic_errors
+def get_personal_scaffold(cur_user, model, topic_vis_method, n_topics=200, n=5, debug=DEBUG):
     threshold = utils.get_toxic_threshold()
     # Get topics with greatest amount of error
+    preds_file = utils.get_preds_file(cur_user, model)
+    with open(preds_file, "rb") as f:
         preds_df = pickle.load(f)
+        preds_df_mod = preds_df[preds_df["user_id"] == cur_user].sort_values(by=["item_id"]).reset_index()
+        preds_df_mod = preds_df_mod[preds_df_mod["topic_id"] < n_topics]
         if topic_vis_method == "median":
+            df = preds_df_mod.groupby(["topic", "user_id"]).median().reset_index()
         elif topic_vis_method == "mean":
+            df = preds_df_mod.groupby(["topic", "user_id"]).mean().reset_index()
         elif topic_vis_method == "fp_fn":
             for error_type in ["fn_proportion", "fp_proportion"]:
                 topic_errors = get_topic_errors(preds_df_mod, error_type)
+                preds_df_mod[error_type] = [topic_errors[topic] for topic in preds_df_mod["topic"].tolist()]
+            df = preds_df_mod.groupby(["topic", "user_id"]).mean().reset_index()
         else:
             # Get error for each topic
             topic_errors = get_topic_errors(preds_df_mod, topic_vis_method)
+            preds_df_mod[topic_vis_method] = [topic_errors[topic] for topic in preds_df_mod["topic"].tolist()]
+            df = preds_df_mod.groupby(["topic", "user_id"]).mean().reset_index()
         # Get system error
+        junk_topics = ["53_maiareficco_kallystas_dyisisitmanila_tractorsazi", "-1_dude_bullshit_fight_ain"]
+        df = df[~df["topic"].isin(junk_topics)]  # Exclude known "junk topics"
         if topic_vis_method == "median" or topic_vis_method == "mean":
+            df["error_magnitude"] = [utils.get_error_magnitude(sys, user, threshold) for sys, user in zip(df["rating_sys"].tolist(), df["pred"].tolist())]
+            df["error_type"] = [utils.get_error_type_radio(sys, user, threshold) for sys, user in zip(df["rating_sys"].tolist(), df["pred"].tolist())]
             df_under = df[df["error_type"] == "System is under-sensitive"]
             df_under = df_under.sort_values(by=["error_magnitude"], ascending=False).head(n) # surface largest errors first
+            report_under = [get_empty_report(row["topic"], row["error_type"]) for _, row in df_under.iterrows()]
             df_over = df[df["error_type"] == "System is over-sensitive"]
             df_over = df_over.sort_values(by=["error_magnitude"], ascending=False).head(n) # surface largest errors first
+            report_over = [get_empty_report(row["topic"], row["error_type"]) for _, row in df_over.iterrows()]
             # Set up reports
             reports = (report_under + report_over)
             random.shuffle(reports)
         elif topic_vis_method == "fp_fn":
             df_under = df.sort_values(by=["fn_proportion"], ascending=False).head(n)
             df_under = df_under[df_under["fn_proportion"] > 0]
+            if debug:
+                print(df_under[["topic", "fn_proportion"]])
+            report_under = [get_empty_report(row["topic"], "System is under-sensitive") for _, row in df_under.iterrows()]
             df_over = df.sort_values(by=["fp_proportion"], ascending=False).head(n)
             df_over = df_over[df_over["fp_proportion"] > 0]
+            if debug:
+                print(df_over[["topic", "fp_proportion"]])
+            report_over = [get_empty_report(row["topic"], "System is over-sensitive") for _, row in df_over.iterrows()]
             reports = (report_under + report_over)
             random.shuffle(reports)
         else:
             df = df.sort_values(by=[topic_vis_method], ascending=False).head(n * 2)
+            df["error_type"] = [utils.get_error_type_radio(sys, user, threshold) for sys, user in zip(df["rating_sys"].tolist(), df["pred"].tolist())]
+            reports = [get_empty_report(row["topic"], row["error_type"]) for _, row in df.iterrows()]
         return reports
         },
     ]
+# Filter to eligible reports: those that have been marked complete and include at least one piece of evidence.
+def get_eligible_reports(reports):
+    eligible_reports = []
+    for r in reports:
+        if (r["complete_status"] == True) and (len(r["evidence"]) > 0):
+            eligible_reports.append(r)
+    return eligible_reports
+# Submit all reports to AVID
+# Logs the responses
+def submit_reports_to_AVID(reports, cur_user, email, sep_selection, debug=DEBUG):
+    # Set up the connection to AVID
+    root = os.environ.get('AVID_API_URL')
+    api_key = os.environ.get('AVID_API_KEY')
+    key = {"Authorization": api_key}
+    reports = get_eligible_reports(reports)
+    if debug:
+        print("Num eligible reports:", len(reports))
+    for r in reports:
+        new_report = utils.convert_indie_label_json_to_avid_json(r, cur_user, email, sep_selection)
+        url = root + "submit"
+        response = requests.post(url, json=json.loads(new_report), headers=key) # The loads ensures type compliance
+        uuid = response.json()
+        if debug:
+            print("Report", new_report)
+            print("AVID API response:", response, uuid)
 ########################################
 # ROUTE: /SAVE_REPORTS
 @app.route("/save_reports")
+def save_reports(debug=DEBUG):
     cur_user = request.args.get("cur_user")
     reports_json = request.args.get("reports")
     reports = json.loads(reports_json)
+    model = request.args.get("model")
+    # Save reports for current user to file
+    reports_file = utils.get_reports_file(cur_user, model)
+    with open(reports_file, "w", encoding ='utf8') as f:
+        json.dump(reports, f)
     results = {
         "status": "success",
     }
+    if debug:
+        print(results)
     return json.dumps(results)
 ########################################
+# ROUTE: /SUBMIT_AVID_REPORT
+@app.route("/submit_avid_report")
+def submit_avid_report():
+    cur_user = request.args.get("cur_user")
+    email = request.args.get("email")
+    sep_selection = request.args.get("sep_selection")
+    reports_json = request.args.get("reports")
+    reports = json.loads(reports_json)
+    # Submit reports to AVID
+    submit_reports_to_AVID(reports, cur_user, email, sep_selection)
     results = {
+        "status": "success",
     }
     return json.dumps(results)
 ########################################
+# ROUTE: /GET_EXPLORE_EXAMPLES
+@app.route("/get_explore_examples")
+def get_explore_examples():
+    threshold = utils.get_toxic_threshold()
+    n_examples = int(request.args.get("n_examples"))
+    # Get sample of examples
+    df = utils.get_explore_df(n_examples, threshold)
+    ex_json = df.to_json(orient="records")
     results = {
+        "examples": ex_json,
     }
     return json.dumps(results)