import gradio as gr import datasets import huggingface_hub theme = gr.themes.Soft( primary_hue="green", ) def check_model(model_id): try: task = huggingface_hub.model_info(model_id).pipeline_tag except Exception: return None, None try: from transformers import pipeline ppl = pipeline(task=task, model=model_id) return model_id, ppl except Exception as e: return model_id, e def check_dataset(dataset_id, dataset_config="default", dataset_split="test"): try: configs = datasets.get_dataset_config_names(dataset_id) except Exception: # Dataset may not exist return None, dataset_config, dataset_split if dataset_config not in configs: # Need to choose dataset subset (config) return dataset_id, configs, dataset_split ds = datasets.load_dataset(dataset_id, dataset_config) if isinstance(ds, datasets.DatasetDict): # Need to choose dataset split if dataset_split not in ds.keys(): return dataset_id, None, list(ds.keys()) elif not isinstance(ds, datasets.Dataset): # Unknown type return dataset_id, None, None return dataset_id, dataset_config, dataset_split def try_submit(model_id, dataset_id, dataset_config, dataset_split): # Validate model m_id, ppl = check_model(model_id=model_id) if m_id is None: gr.Warning(f'Model "{model_id}" is not accessible. Please set your HF_TOKEN if it is a private model.') return dataset_config, dataset_split if isinstance(ppl, Exception): gr.Warning(f'Failed to load "{model_id} model": {ppl}') return dataset_config, dataset_split # Validate dataset d_id, config, split = check_dataset(dataset_id=dataset_id, dataset_config=dataset_config, dataset_split=dataset_split) if d_id is None: gr.Warning(f'Dataset "{dataset_id}" is not accessible. Please set your HF_TOKEN if it is a private dataset.') elif isinstance(config, list): gr.Warning(f'Dataset "{dataset_id}" does have "{dataset_config}" config. Please choose a valid config.') config = gr.Dropdown.update(choices=config, value=config[0]) elif isinstance(split, list): gr.Warning(f'Dataset "{dataset_id}" does have "{dataset_split}" split. Please choose a valid split.') split = gr.Dropdown.update(choices=split, value=split[0]) return config, split with gr.Blocks(theme=theme) as iface: with gr.Row(): with gr.Column(): model_id_input = gr.Textbox( label="Hugging Face model id", placeholder="cardiffnlp/twitter-roberta-base-sentiment-latest", ) # TODO: Add supported model pairs: Text Classification - text-classification model_type = gr.Dropdown( label="Hugging Face model type", choices=[ ("Auto-detect", 0), ("Text Classification", 1), ], value=0, ) with gr.Column(): dataset_id_input = gr.Textbox( label="Hugging Face dataset id", placeholder="tweet_eval", ) dataset_config_input = gr.Dropdown( label="Hugging Face dataset subset", choices=[ "default", ], allow_custom_value=True, value="default", ) dataset_split_input = gr.Dropdown( label="Hugging Face dataset split", choices=[ "test", ], allow_custom_value=True, value="test", ) with gr.Row(): run_btn = gr.Button("Validate and submit evaluation task", variant="primary") run_btn.click( try_submit, inputs=[ model_id_input, dataset_id_input, dataset_config_input, dataset_split_input ], outputs=[ dataset_config_input, dataset_split_input ], ) iface.queue(max_size=20) iface.launch()