sparrow-ui / views /dashboard.py
katanaml's picture
Update dashboard.py
b3d1b58
raw
history blame contribute delete
No virus
15.5 kB
import streamlit as st
import numpy as np
import pandas as pd
import json
import altair as alt
from pathlib import Path
import requests
class Dashboard:
class Model:
pageTitle = "Dashboard"
wordsTitle = "Words"
inferenceTimeTitle = "Inference Time"
documentsTitle = "Documents"
dailyInferenceTitle = "Top Daily Inference"
accuracyTitle = "Mean Accuracy"
titleModelEval = "## Evaluation Accuracy"
titleInferencePerformance = "## Inference Performance"
titleDatasetInfo = "## Dataset Info"
titleDataAnnotation = "## Data Annotation"
titleTrainingPerformance = "## Training Performance"
titleEvaluationPerformance = "## Evaluation Performance"
status_file = "docs/status.json"
annotation_files_dir = "docs/json"
def view(self, model):
# st.title(model.pageTitle)
api_url = "https://katanaml-org-sparrow-ml.hf.space/api-inference/v1/sparrow-ml/statistics"
json_data_inference = []
response = requests.get(api_url)
if response.status_code == 200:
json_data_inference = response.json()
else:
print(f"Error: Unable to fetch data from the API (status code {response.status_code})")
api_url_t = "https://katanaml-org-sparrow-ml.hf.space/api-training/v1/sparrow-ml/statistics/training"
json_data_training = []
response_t = requests.get(api_url_t)
if response_t.status_code == 200:
json_data_training = response_t.json()
else:
print(f"Error: Unable to fetch data from the API (status code {response_t.status_code})")
api_url_e = "https://katanaml-org-sparrow-ml.hf.space/api-training/v1/sparrow-ml/statistics/evaluate"
json_data_evaluate = []
response_e = requests.get(api_url_e)
if response_e.status_code == 200:
json_data_evaluate = response_e.json()
else:
print(f"Error: Unable to fetch data from the API (status code {response_e.status_code})")
with st.container():
col1, col2, col3, col4, col5 = st.columns(5)
with col1:
words_count = 0
delta_words = 0
if len(json_data_inference) > 3:
for i in range(0, len(json_data_inference)):
words_count = words_count + json_data_inference[i][1]
avg_word_count = words_count / len(json_data_inference)
avg_word_last = (json_data_inference[len(json_data_inference) - 1][1]
+ json_data_inference[len(json_data_inference) - 2][1] +
json_data_inference[len(json_data_inference) - 3][1]) / 3
if avg_word_last >= avg_word_count:
delta_words = round(100 - ((avg_word_count * 100) / avg_word_last), 2)
else:
delta_words = round(100 - ((avg_word_last * 100) / avg_word_count), 2) * -1
words_count = words_count / 1000
st.metric(label=model.wordsTitle, value=str(words_count) + 'K', delta=str(delta_words) + "%")
with col2:
docs_count = len(json_data_inference)
delta_docs = 0
if docs_count > 3:
inference_dates = []
for i in range(0, len(json_data_inference)):
inference_dates.append(json_data_inference[i][4].split(" ")[0])
inference_dates_unique = []
for item in inference_dates:
if item not in inference_dates_unique:
inference_dates_unique.append(item)
if len(inference_dates_unique) > 3:
inference_dates_dict = {}
for i, key in enumerate(inference_dates_unique):
inference_dates_dict[key] = [0]
for i in range(0, len(json_data_inference)):
inference_dates_dict[json_data_inference[i][4].split(" ")[0]][0] = \
inference_dates_dict[json_data_inference[i][4].split(" ")[0]][0] + 1
# calculate average for values from inference_dates_dict
avg_value = 0
for key, value in inference_dates_dict.items():
avg_value = avg_value + value[0]
avg_value = round(avg_value / len(inference_dates_dict), 2)
# calculate average for last 3 values from inference_dates_dict
avg_value_last = 0
for i in range(1, 4):
avg_value_last = avg_value_last + inference_dates_dict[inference_dates_unique[len(inference_dates_unique) - i]][0]
avg_value_last = round(avg_value_last / 3, 2)
if avg_value_last > avg_value:
delta_docs = round(100 - ((avg_value * 100) / avg_value_last), 2)
else:
delta_docs = round(100 - ((avg_value_last * 100) / avg_value), 2) * -1
st.metric(label=model.documentsTitle, value=docs_count, delta=str(delta_docs) + "%")
with col3:
inference_dates = []
for i in range(0, len(json_data_inference)):
inference_dates.append(json_data_inference[i][4].split(" ")[0])
inference_dates_unique = []
for item in inference_dates:
if item not in inference_dates_unique:
inference_dates_unique.append(item)
inference_dates_dict = {}
for i, key in enumerate(inference_dates_unique):
inference_dates_dict[key] = [0]
for i in range(0, len(json_data_inference)):
inference_dates_dict[json_data_inference[i][4].split(" ")[0]][0] = \
inference_dates_dict[json_data_inference[i][4].split(" ")[0]][0] + 1
# loop through the dictionary and find the max value
max_value = 0
for key, value in inference_dates_dict.items():
if value[0] > max_value:
max_value = value[0]
# calculate average for values from inference_dates_dict
avg_value = 0
for key, value in inference_dates_dict.items():
avg_value = avg_value + value[0]
avg_value = round(avg_value / len(inference_dates_dict), 2)
avg_delta = round(100 - ((avg_value * 100) / max_value), 2)
st.metric(label=model.dailyInferenceTitle, value=max_value, delta=str(avg_delta) + "%")
with col4:
inference_time_avg = 0
# calculate inference time average
for i in range(0, len(json_data_inference)):
inference_time_avg = inference_time_avg + json_data_inference[i][0]
inference_time_avg = round(inference_time_avg / len(json_data_inference), 2)
delta_time = 0
if len(json_data_inference) > 3:
avg_time_last = (json_data_inference[len(json_data_inference) - 1][0] +
json_data_inference[len(json_data_inference) - 2][0] +
json_data_inference[len(json_data_inference) - 3][0]) / 3
if avg_time_last > inference_time_avg:
delta_time = round(100 - ((inference_time_avg * 100) / avg_time_last), 2)
else:
delta_time = round(100 - ((avg_time_last * 100) / inference_time_avg), 2) * -1
st.metric(label=model.inferenceTimeTitle, value=str(inference_time_avg) + " s", delta=str(delta_time) + "%",
delta_color="inverse")
with col5:
models_unique = []
models_dict = {}
for i in range(0, len(json_data_evaluate)):
if json_data_evaluate[i][3] not in models_unique:
models_unique.append(json_data_evaluate[i][3])
models_dict[json_data_evaluate[i][3]] = json_data_evaluate[i][1]['mean_accuracy']
avg_accuracy = 0
for key, value in models_dict.items():
avg_accuracy = avg_accuracy + value
avg_accuracy = round(avg_accuracy / len(models_dict), 2)
if len(models_unique) > 3:
# calculate average accuracy for last 3 values
avg_accuracy_last = 0
for i in range(1, 4):
avg_accuracy_last = avg_accuracy_last + models_dict[models_unique[len(models_unique) - i]]
avg_accuracy_last = round(avg_accuracy_last / 3, 2)
else:
avg_accuracy_last = avg_accuracy
if avg_accuracy_last > avg_accuracy:
delta_accuracy = round(100 - ((avg_accuracy * 100) / avg_accuracy_last), 2)
else:
delta_accuracy = round(100 - ((avg_accuracy_last * 100) / avg_accuracy), 2) * -1
st.metric(label=model.accuracyTitle, value=avg_accuracy, delta=str(delta_accuracy) + "%",
delta_color="inverse")
st.markdown("---")
with st.container():
col1, col2 = st.columns(2)
with col1:
st.write(model.titleInferencePerformance)
models_dict = {}
models = []
for i in range(0, len(json_data_inference)):
models.append(json_data_inference[i][3])
models_unique = []
for item in models:
if item not in models_unique:
models_unique.append(item)
for i, key in enumerate(models_unique):
models_dict[key] = []
for i in range(0, len(json_data_inference)):
models_dict[json_data_inference[i][3]].append(round(json_data_inference[i][0]))
data = pd.DataFrame(models_dict)
st.line_chart(data)
with col2:
st.write(model.titleModelEval)
models_unique = []
models_dict = {}
for i in range(0, len(json_data_evaluate)):
if json_data_evaluate[i][3] not in models_unique:
models_unique.append(json_data_evaluate[i][3])
models_dict[json_data_evaluate[i][3]] = json_data_evaluate[i][1]['accuracies']
data = pd.DataFrame(models_dict)
st.line_chart(data)
st.markdown("---")
with st.container():
col1, col2, col3 = st.columns(3)
with col1:
with st.container():
st.write(model.titleDataAnnotation)
total, completed, in_progress = self.calculate_annotation_stats(model)
data = pd.DataFrame({"Status": ["Completed", "In Progress"], "Value": [completed, in_progress]})
# Create a horizontal bar chart
chart = alt.Chart(data).mark_bar().encode(
x='Value:Q',
y=alt.Y('Status:N', sort='-x'),
color=alt.Color('Status:N', legend=None)
)
st.altair_chart(chart)
with col2:
with st.container():
st.write(model.titleDatasetInfo)
api_url = "https://katanaml-org-sparrow-data.hf.space/api-dataset/v1/sparrow-data/dataset_info"
# Make the GET request
response = requests.get(api_url)
# Check if the request was successful (status code 200)
names = []
rows = []
if response.status_code == 200:
# Convert the response content to a JSON object
json_data = response.json()
for i in range(0, len(json_data['splits'])):
names.append(json_data['splits'][i]['name'])
rows.append(json_data['splits'][i]['number_of_rows'])
else:
print(f"Error: Unable to fetch data from the API (status code {response.status_code})")
data = pd.DataFrame({"Dataset": names, "Value": rows})
# Create a horizontal bar chart
chart = alt.Chart(data).mark_bar().encode(
x='Value:Q',
y=alt.Y('Dataset:N', sort='-x'),
color=alt.Color('Dataset:N', legend=None)
)
st.altair_chart(chart)
with col3:
with st.container():
st.write(model.titleTrainingPerformance)
models_dict = {}
for i in range(0, len(json_data_training)):
models_dict[i] = round(json_data_training[i][0])
data = pd.DataFrame({"Runs": models_dict.keys(), "Value": list(models_dict.values())})
# Create a horizontal bar chart
chart = alt.Chart(data).mark_bar().encode(
x='Value:Q',
y=alt.Y('Runs:N', sort='-x'),
color=alt.Color('Runs:N', legend=None)
)
st.altair_chart(chart)
st.markdown("---")
with st.container():
st.write(model.titleEvaluationPerformance)
runs_dict = {}
for i in range(0, len(json_data_evaluate)):
runs_dict[i] = round(json_data_evaluate[i][0])
data = pd.DataFrame({"Runs": runs_dict.keys(), "Value": list(runs_dict.values())})
# Create a horizontal bar chart
chart = alt.Chart(data).mark_bar().encode(
x='Value:Q',
y=alt.Y('Runs:N', sort='-x'),
color=alt.Color('Runs:N', legend=None)
)
st.altair_chart(chart)
def calculate_annotation_stats(self, model):
completed = 0
in_progress = 0
data_dir_path = Path(model.annotation_files_dir)
for file_name in data_dir_path.glob("*.json"):
with open(file_name, "r") as f:
data = json.load(f)
v = data['meta']['version']
if v == 'v0.1':
in_progress += 1
else:
completed += 1
total = completed + in_progress
status_json = {
"annotations": [
{
"completed": completed,
"in_progress": in_progress,
"total": total
}
]
}
with open(model.status_file, "w") as f:
json.dump(status_json, f, indent=2)
return total, completed, in_progress