Spaces:
Runtime error
Runtime error
import json | |
import re | |
from pathlib import Path | |
import requests | |
import streamlit as st | |
import yaml | |
from huggingface_hub import hf_hub_download | |
from streamlit_tags import st_tags | |
# exact same regex as in the Hub server. Please keep in sync. | |
REGEX_YAML_BLOCK = re.compile(r"---[\n\r]+([\S\s]*?)[\n\r]+---[\n\r]") | |
with open("languages.json") as f: | |
lang2name = json.load(f) | |
def try_parse_yaml(yaml_block): | |
try: | |
metadata = yaml.load(yaml_block, yaml.SafeLoader) | |
except yaml.YAMLError as e: | |
print("Error while parsing the metadata YAML:") | |
if hasattr(e, "problem_mark"): | |
if e.context is not None: | |
st.error( | |
str(e.problem_mark) | |
+ "\n " | |
+ str(e.problem) | |
+ " " | |
+ str(e.context) | |
+ "\nPlease correct the README.md and retry." | |
) | |
else: | |
st.error( | |
str(e.problem_mark) | |
+ "\n " | |
+ str(e.problem) | |
+ "\nPlease correct the README.md and retry." | |
) | |
else: | |
st.error( | |
"Something went wrong while parsing the metadata. " | |
"Make sure it's written according to the YAML spec!" | |
) | |
return None | |
return metadata | |
def main(): | |
st.markdown("## 1. Load your model's metadata") | |
st.markdown("Enter your model's path below.") | |
model_id = st.text_input("", placeholder="<username>/<model>") | |
if not model_id.strip(): | |
st.stop() | |
try: | |
readme_path = hf_hub_download(model_id, filename="README.md") | |
except requests.exceptions.HTTPError: | |
st.error( | |
f"ERROR: https://huggingface.co/{model_id}/blob/main/README.md " | |
f"not found, make sure you've entered a correct model path!" | |
) | |
st.stop() | |
content = Path(readme_path).read_text() | |
match = REGEX_YAML_BLOCK.search(content) | |
if match: | |
meta_yaml = match.group(1) | |
else: | |
st.error( | |
"ERROR: Couldn't find the metadata section inside your model's `README.md`. Do you have some basic metadata " | |
"enclosed in `---` as described in [the Hub documentation](https://huggingface.co/docs/hub/model-repos#model-card-metadata)?" | |
) | |
st.stop() | |
metadata = try_parse_yaml(meta_yaml) | |
if metadata is None: | |
st.stop() | |
else: | |
st.success("Successfully loaded the metadata!") | |
with st.expander("Inspect the parsed metadata for debugging"): | |
st.json(metadata) | |
st.markdown("## 2. Edit the data") | |
############################ | |
# LANGUAGES | |
############################ | |
st.markdown("### Language(s)") | |
st.markdown( | |
"For each spoken language that your model handles, enter an " | |
"[ISO 639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) language code, or " | |
"find an appropriate alternative from " | |
"[our list here](https://huggingface.co/spaces/huggingface/hf-speech-bench/blob/main/languages.json). " | |
"When in doubt, use the most generic language code, e.g. `en` instead of `en-GB` and `en-US`." | |
) | |
st.markdown("*Example*: `cs, hsb, pl`") | |
metadata["language"] = metadata["language"] if "language" in metadata else [] | |
metadata["language"] = ( | |
metadata["language"] | |
if isinstance(metadata["language"], list) | |
else [metadata["language"]] | |
) | |
languages = st_tags( | |
label="", text="add more if needed, and press enter", value=metadata["language"] | |
) | |
lang_names = [lang2name[lang] if lang in lang2name else lang for lang in languages] | |
st.markdown("These languages will be parsed by the leaderboard as: ") | |
st.code(", ".join(lang_names)) | |
############################ | |
# TRAIN DATASETS | |
############################ | |
st.markdown("### Training dataset(s)") | |
st.markdown("List the datasets that your model was trained on.") | |
st.markdown("*Example*: `librispeech_asr, mozilla-foundation/common_voice_8_0`") | |
if "datasets" not in metadata: | |
metadata["datasets"] = [] | |
train_datasets = st_tags( | |
label="", text="add more if needed, and press enter", value=metadata["datasets"] | |
) | |
if "common_voice" in train_datasets: | |
st.warning( | |
"WARNING: `common_voice` is deprecated, please replace it with its equivalent: " | |
"`mozilla-foundation/common_voice_6_1`" | |
) | |
############################ | |
# MODEL NAME | |
############################ | |
st.markdown("### Model name") | |
st.markdown("Enter a descriptive name for your model.") | |
st.markdown("*Example*: `XLS-R Wav2Vec2 LM Spanish by Jane Doe`") | |
if "model_index" not in metadata: | |
metadata["model_index"] = [{}] | |
if "name" not in ["model_index"][0]: | |
metadata["model_index"][0]["name"] = model_id.split("/")[-1] | |
model_name = st.text_input("", value=metadata["model_index"][0]["name"]) | |
############################ | |
# EVAL DATASETS | |
############################ | |
st.markdown("### Evaluation metrics") | |
if __name__ == "__main__": | |
main() | |