|
from datasets import load_dataset |
|
import streamlit as st |
|
from ast import literal_eval |
|
import pandas as pd |
|
|
|
|
|
nlp_tasks = ["text-classification", "text-generation", "text2text-generation", "token-classification", "fill-mask", "question-answering", |
|
"translation", "conversational", "sentence-similarity", "summarization", "multiple-choice", "zero-shot-classification", "table-question-answering" |
|
] |
|
audio_tasks = ["automatic-speech-recognition", "audio-classification", "text-to-speech", "audio-to-audio", "voice-activity-detection"] |
|
cv_tasks = ["image-classification", "image-segmentation", "zero-shot-image-classification", "image-to-image", "unconditional-image-generation", "object-detection"] |
|
multimodal = ["feature-extraction", "text-to-image", "visual-question-answering", "image-to-text", "document-question-answering"] |
|
tabular = ["tabular-classification", "tabular-regression"] |
|
|
|
modalities = { |
|
"nlp": nlp_tasks, |
|
"audio": audio_tasks, |
|
"cv": cv_tasks, |
|
"multimodal": multimodal, |
|
"tabular": tabular, |
|
"rl": ["reinforcement-learning"] |
|
} |
|
|
|
def modality(row): |
|
pipeline = row["pipeline"] |
|
for modality, tasks in modalities.items(): |
|
if pipeline in tasks: |
|
return modality |
|
if type(pipeline) == "str": |
|
return "unk_modality" |
|
return None |
|
|
|
st.cache(allow_output_mutation=True) |
|
def process_dataset(version): |
|
|
|
dataset = load_dataset("open-source-metrics/model-repos-stats", revision=version) |
|
|
|
|
|
data = dataset["train"].to_pandas() |
|
|
|
|
|
data["modality"] = data.apply(modality, axis=1) |
|
|
|
|
|
data["length_bins"] = pd.cut(data["text_length"], [0, 200, 1000, 2000, 3000, 4000, 5000, 7500, 10000, 20000, 50000]) |
|
|
|
return data |
|
|
|
def eval_tags(row): |
|
tags = row["tags"] |
|
if tags == "none" or tags == [] or tags == "{}": |
|
return [] |
|
if tags[0] != "[": |
|
tags = str([tags]) |
|
val = literal_eval(tags) |
|
if isinstance(val, dict): |
|
return [] |
|
return val |
|
|
|
def change_pct(old, new): |
|
if new == 0: |
|
return -10000000 |
|
return round(100* (new - old) / new, 3) |
|
|
|
def change_and_delta(old_old, old, new): |
|
curr_change = change_pct(old, new) |
|
prev_change = change_pct(old_old, old) |
|
delta = round(curr_change-prev_change, 3) |
|
if delta > 0: |
|
delta = f"+{delta}%" |
|
curr_change = f"{curr_change}%" |
|
return curr_change, delta |