Spaces:
Running
Running
import gradio as gr | |
import datasets | |
import huggingface_hub | |
theme = gr.themes.Soft( | |
primary_hue="green", | |
) | |
def check_model(model_id): | |
try: | |
task = huggingface_hub.model_info(model_id).pipeline_tag | |
except Exception: | |
return None, None | |
try: | |
from transformers import pipeline | |
ppl = pipeline(task=task, model=model_id) | |
return model_id, ppl | |
except Exception as e: | |
return model_id, e | |
def check_dataset(dataset_id, dataset_config="default", dataset_split="test"): | |
try: | |
configs = datasets.get_dataset_config_names(dataset_id) | |
except Exception: | |
# Dataset may not exist | |
return None, dataset_config, dataset_split | |
if dataset_config not in configs: | |
# Need to choose dataset subset (config) | |
return dataset_id, configs, dataset_split | |
ds = datasets.load_dataset(dataset_id, dataset_config) | |
if isinstance(ds, datasets.DatasetDict): | |
# Need to choose dataset split | |
if dataset_split not in ds.keys(): | |
return dataset_id, None, list(ds.keys()) | |
elif not isinstance(ds, datasets.Dataset): | |
# Unknown type | |
return dataset_id, None, None | |
return dataset_id, dataset_config, dataset_split | |
def try_submit(model_id, dataset_id, dataset_config, dataset_split): | |
# Validate model | |
m_id, ppl = check_model(model_id=model_id) | |
if m_id is None: | |
gr.Warning(f'Model "{model_id}" is not accessible. Please set your HF_TOKEN if it is a private model.') | |
return dataset_config, dataset_split | |
if isinstance(ppl, Exception): | |
gr.Warning(f'Failed to load "{model_id} model": {ppl}') | |
return dataset_config, dataset_split | |
# Validate dataset | |
d_id, config, split = check_dataset(dataset_id=dataset_id, dataset_config=dataset_config, dataset_split=dataset_split) | |
if d_id is None: | |
gr.Warning(f'Dataset "{dataset_id}" is not accessible. Please set your HF_TOKEN if it is a private dataset.') | |
elif isinstance(config, list): | |
gr.Warning(f'Dataset "{dataset_id}" does have "{dataset_config}" config. Please choose a valid config.') | |
config = gr.Dropdown.update(choices=config, value=config[0]) | |
elif isinstance(split, list): | |
gr.Warning(f'Dataset "{dataset_id}" does have "{dataset_split}" split. Please choose a valid split.') | |
split = gr.Dropdown.update(choices=split, value=split[0]) | |
return config, split | |
with gr.Blocks(theme=theme) as iface: | |
with gr.Row(): | |
with gr.Column(): | |
model_id_input = gr.Textbox( | |
label="Hugging Face model id", | |
placeholder="cardiffnlp/twitter-roberta-base-sentiment-latest", | |
) | |
# TODO: Add supported model pairs: Text Classification - text-classification | |
model_type = gr.Dropdown( | |
label="Hugging Face model type", | |
choices=[ | |
("Auto-detect", 0), | |
("Text Classification", 1), | |
], | |
value=0, | |
) | |
with gr.Column(): | |
dataset_id_input = gr.Textbox( | |
label="Hugging Face dataset id", | |
placeholder="tweet_eval", | |
) | |
dataset_config_input = gr.Dropdown( | |
label="Hugging Face dataset subset", | |
choices=[ | |
"default", | |
], | |
allow_custom_value=True, | |
value="default", | |
) | |
dataset_split_input = gr.Dropdown( | |
label="Hugging Face dataset split", | |
choices=[ | |
"test", | |
], | |
allow_custom_value=True, | |
value="test", | |
) | |
with gr.Row(): | |
run_btn = gr.Button("Validate and submit evaluation task", variant="primary") | |
run_btn.click( | |
try_submit, | |
inputs=[ | |
model_id_input, | |
dataset_id_input, | |
dataset_config_input, | |
dataset_split_input | |
], | |
outputs=[ | |
dataset_config_input, | |
dataset_split_input | |
], | |
) | |
iface.queue(max_size=20) | |
iface.launch() | |