davanstrien's picture
davanstrien HF staff
Update app.py
0a87721 verified
raw
history blame
7.63 kB
# app dashboard from https://huggingface.co/spaces/davanstrien/argilla-progress/blob/main/app.py
import os
from typing import List
import argilla as rg
import gradio as gr
import pandas as pd
import plotly.colors as colors
import plotly.graph_objects as go
client = rg.Argilla(
api_url=os.getenv("ARGILLA_API_URL"),
api_key=os.getenv("ARGILLA_API_KEY"),
)
def get_progress(dataset: rg.Dataset) -> dict:
dataset_progress = dataset.progress(with_users_distribution=True)
total, completed = dataset_progress["total"], dataset_progress["completed"]
progress = (completed / total) * 100 if total > 0 else 0
return {
"total": total,
"annotated": completed,
"progress": progress,
"users": {
username: user_progress["completed"].get("submitted") + user_progress["pending"].get("submitted")
for username, user_progress in dataset_progress["users"].items()
}
}
def create_progress_bar(progress):
top_labels = ['Completed', 'Pending']
colors = ['rgba(38, 24, 74, 0.8)',
# 'rgba(71, 58, 131, 0.8)',
# 'rgba(122, 120, 168, 0.8)', 'rgba(164, 163, 204, 0.85)',
'rgba(190, 192, 213, 1)']
x_data = [[progress["annotated"], progress["total"] - progress["annotated"]]]
y_data = ['Progress']
fig = go.Figure()
for i in range(0, len(x_data[0])):
for xd, yd in zip(x_data, y_data):
fig.add_trace(go.Bar(
x=[xd[i]], y=[yd],
orientation='h',
marker=dict(
color=colors[i],
line=dict(color='rgb(248, 248, 249)', width=1)
),
hoverinfo='text',
hovertext=f"{top_labels[i]} records: {xd[i]}"
))
fig.update_layout(
xaxis=dict(
showgrid=False,
showline=False,
showticklabels=False,
zeroline=False,
domain=[0.15, 1]
),
yaxis=dict(
showgrid=False,
showline=False,
showticklabels=False,
zeroline=False,
domain=[0.15, 0.5]
),
barmode='stack',
paper_bgcolor='rgb(248, 248, 255)',
plot_bgcolor='rgb(248, 248, 255)',
margin=dict(l=120, r=10, t=140, b=80),
showlegend=False
)
annotations = []
for yd, xd in zip(y_data, x_data):
# labeling the y-axis
annotations.append(dict(xref='paper', yref='y',
x=0.14, y=yd,
xanchor='right',
text=str(yd),
font=dict(family='Arial', size=14,
color='rgb(67, 67, 67)'),
showarrow=False, align='right'))
# labeling the first percentage of each bar (x_axis)
annotations.append(dict(xref='x', yref='y',
x=xd[0] / 2, y=yd,
text=str(xd[0]),
font=dict(family='Arial', size=14,
color='rgb(248, 248, 255)'),
showarrow=False))
# # labeling the first Likert scale (on the top)
# if yd == y_data[-1]:
# annotations.append(dict(xref='x', yref='paper',
# x=xd[0] / 2, y=1.1,
# text=top_labels[0],
# font=dict(family='Arial', size=14,
# color='rgb(67, 67, 67)'),
# showarrow=False))
space = xd[0]
for i in range(1, len(xd)):
# labeling the rest of percentages for each bar (x_axis)
annotations.append(dict(xref='x', yref='y',
x=space + (xd[i]/2), y=yd,
text=str(xd[i]),
font=dict(family='Arial', size=14,
color='rgb(248, 248, 255)'),
showarrow=False))
# # labeling the Likert scale
# if yd == y_data[-1]:
# annotations.append(dict(xref='x', yref='paper',
# x=space + (xd[i]/2), y=1.1,
# text=top_labels[i],
# font=dict(family='Arial', size=14,
# color='rgb(67, 67, 67)'),
# showarrow=False))
space += xd[i]
fig.update_layout(annotations=annotations, height=80)
return fig
def create_piechart(user_annotations):
sorted_users = sorted(user_annotations.items(), key=lambda x: x[1], reverse=True)
color_scale = colors.qualitative.Pastel + colors.qualitative.Set3
labels, values, user_colors = [], [], []
for i, (user, contribution) in enumerate(sorted_users):
labels.append(user)
values.append(contribution)
user_colors.append(color_scale[i % len(color_scale)])
fig = go.Figure(
go.Pie(
labels=labels,
values=values,
)
)
fig.update_layout(
title_text="User contributions to the total end dataset",
height=500,
margin=dict(l=10, r=10, t=50, b=10),
paper_bgcolor="#F0F0F0", # Light gray background
plot_bgcolor="#F0F0F0", # Light gray background
)
return fig
def get_datasets(client: rg.Argilla) -> List[rg.Dataset]:
return client.datasets.list()
datasets = get_datasets(client)
from typing import Optional
def update_dashboard(dataset_idx: Optional[int] = None):
if dataset_idx is None:
return [None, None, None]
dataset = datasets[dataset_idx]
progress = get_progress(dataset)
progress_bar = create_progress_bar(progress)
piechart = create_piechart(progress["users"])
leaderboard_df = pd.DataFrame(
list(progress["users"].items()), columns=["User", "Annotations"]
)
leaderboard_df = leaderboard_df.sort_values(
"Annotations", ascending=False
).reset_index(drop=True)
return progress_bar, piechart, leaderboard_df
with gr.Blocks() as demo:
gr.Markdown("# Argilla Dataset Dashboard")
dataset_choices = [(dataset.name, idx) for idx, dataset in enumerate(datasets)]
datasets_dropdown = gr.Dropdown(
choices=dataset_choices,
label="Select your dataset",
value=0,
visible=True
)
def set_selected_dataset(dataset_idx) -> None:
global selected_dataset
dataset = datasets[dataset_idx]
selected_dataset = dataset
with gr.Row():
progress_bar_output = gr.Plot(label="Overall Progress")
gr.Markdown("## Contributor Leaderboard")
with gr.Row():
leaderboard_output = gr.Dataframe(
headers=["User", "Submitted records"]
)
piechart_output = gr.Plot(label="User contributions")
demo.load(
update_dashboard,
inputs=[datasets_dropdown],
outputs=[progress_bar_output, piechart_output, leaderboard_output],
every=60*5,
)
datasets_dropdown.change(
update_dashboard,
inputs=[datasets_dropdown],
outputs=[progress_bar_output, piechart_output, leaderboard_output],
)
if __name__ == "__main__":
demo.launch()