Commit
β’
a852b26
1
Parent(s):
19f5813
Initial app
Browse files
README.md
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
---
|
2 |
-
title: Fineweb2 Compare My Annotations
|
3 |
-
emoji:
|
4 |
colorFrom: yellow
|
5 |
colorTo: gray
|
6 |
sdk: gradio
|
7 |
sdk_version: 5.8.0
|
8 |
app_file: app.py
|
9 |
-
pinned:
|
10 |
---
|
11 |
|
12 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: "Fineweb2: Compare My Annotations"
|
3 |
+
emoji: π
|
4 |
colorFrom: yellow
|
5 |
colorTo: gray
|
6 |
sdk: gradio
|
7 |
sdk_version: 5.8.0
|
8 |
app_file: app.py
|
9 |
+
pinned: true
|
10 |
---
|
11 |
|
12 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argilla as rg
|
2 |
+
import gradio as gr
|
3 |
+
from plotly.subplots import make_subplots
|
4 |
+
import plotly.graph_objects as go
|
5 |
+
import os
|
6 |
+
from typing import List
|
7 |
+
from collections import defaultdict
|
8 |
+
|
9 |
+
client = rg.Argilla(
|
10 |
+
api_url=os.getenv("ARGILLA_FW2_URL"),
|
11 |
+
api_key=os.getenv("ARGILLA_FW2_KEY")
|
12 |
+
)
|
13 |
+
|
14 |
+
def get_stats(dataset_idx, username):
|
15 |
+
# dataset = client.datasets(dataset)
|
16 |
+
dataset = datasets[dataset_idx]
|
17 |
+
user_id = client.users(username).id
|
18 |
+
|
19 |
+
user_label_counts = defaultdict(int)
|
20 |
+
all_label_counts = defaultdict(int)
|
21 |
+
|
22 |
+
for record in dataset.records:
|
23 |
+
for response in record.responses["educational_value"]:
|
24 |
+
label = response.value
|
25 |
+
if response.user_id == user_id:
|
26 |
+
user_label_counts[label] += 1
|
27 |
+
all_label_counts[label] += 1
|
28 |
+
else:
|
29 |
+
all_label_counts[label] += 1
|
30 |
+
|
31 |
+
return user_label_counts, all_label_counts
|
32 |
+
|
33 |
+
|
34 |
+
def build_plot(user_label_counts, all_label_counts):
|
35 |
+
|
36 |
+
labels = ['None', 'Minimal', 'Basic', 'Good', 'Excellent', 'β Problematic Content β']
|
37 |
+
user_counts = [user_label_counts[label] for label in labels]
|
38 |
+
overall_counts = [all_label_counts[label] for label in labels]
|
39 |
+
|
40 |
+
fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type': 'domain'}]], subplot_titles=['My Label Usage', 'Team Label Usage'])
|
41 |
+
|
42 |
+
fig.add_trace(go.Pie(labels=labels, values=user_counts, name="User Label Counts"),1,1)
|
43 |
+
|
44 |
+
fig.add_trace(go.Pie(labels=labels,values=overall_counts, name="Overall Label Counts"),1,2)
|
45 |
+
|
46 |
+
fig.update_layout(
|
47 |
+
title="User vs Overall Label Counts",
|
48 |
+
barmode="group"
|
49 |
+
)
|
50 |
+
|
51 |
+
return fig
|
52 |
+
|
53 |
+
|
54 |
+
def update_dashboard(dataset_idx, username):
|
55 |
+
user_label_counts, all_label_counts = get_stats(dataset_idx, username)
|
56 |
+
plot = build_plot(user_label_counts, all_label_counts)
|
57 |
+
|
58 |
+
return plot
|
59 |
+
|
60 |
+
|
61 |
+
|
62 |
+
with gr.Blocks() as demo:
|
63 |
+
gr.Markdown("# How do my annotations compare to my team's?")
|
64 |
+
|
65 |
+
with gr.Row():
|
66 |
+
datasets = client.datasets.list()
|
67 |
+
dataset_choices = [(dataset.name, idx) for idx, dataset in enumerate(datasets)]
|
68 |
+
datasets_dropdown = gr.Dropdown(
|
69 |
+
choices=dataset_choices,
|
70 |
+
label="Select your dataset",
|
71 |
+
value=0,
|
72 |
+
visible=True
|
73 |
+
)
|
74 |
+
|
75 |
+
search_box = gr.Textbox(type="text", label="Enter your username:")
|
76 |
+
with gr.Row():
|
77 |
+
search_button = gr.Button("Search π")
|
78 |
+
|
79 |
+
with gr.Row():
|
80 |
+
plot_output = gr.Plot(label="Team and user annotations")
|
81 |
+
|
82 |
+
search_button.click(
|
83 |
+
fn=update_dashboard,
|
84 |
+
inputs=[datasets_dropdown,search_box],
|
85 |
+
outputs=[plot_output]
|
86 |
+
)
|
87 |
+
|
88 |
+
|
89 |
+
if __name__ == "__main__":
|
90 |
+
demo.launch()
|
91 |
+
|