Spaces:

huggingface
/

speech-bench-metrics-editor

Runtime error

App Files Files Community

speech-bench-metrics-editor / app.py

speech-test

Initial commit

47e279a over 2 years ago

raw

history blame

5.21 kB

	import json
	import re
	from pathlib import Path

	import requests
	import streamlit as st
	import yaml
	from huggingface_hub import hf_hub_download
	from streamlit_tags import st_tags

	# exact same regex as in the Hub server. Please keep in sync.
	REGEX_YAML_BLOCK = re.compile(r"---[\n\r]+([\S\s]*?)[\n\r]+---[\n\r]")

	with open("languages.json") as f:
	lang2name = json.load(f)


	def try_parse_yaml(yaml_block):
	try:
	metadata = yaml.load(yaml_block, yaml.SafeLoader)
	except yaml.YAMLError as e:
	print("Error while parsing the metadata YAML:")
	if hasattr(e, "problem_mark"):
	if e.context is not None:
	st.error(
	str(e.problem_mark)
	+ "\n "
	+ str(e.problem)
	+ " "
	+ str(e.context)
	+ "\nPlease correct the README.md and retry."
	)
	else:
	st.error(
	str(e.problem_mark)
	+ "\n "
	+ str(e.problem)
	+ "\nPlease correct the README.md and retry."
	)
	else:
	st.error(
	"Something went wrong while parsing the metadata. "
	"Make sure it's written according to the YAML spec!"
	)
	return None
	return metadata


	def main():
	st.markdown("## 1. Load your model's metadata")
	st.markdown("Enter your model's path below.")
	model_id = st.text_input("", placeholder="<username>/<model>")
	if not model_id.strip():
	st.stop()
	try:
	readme_path = hf_hub_download(model_id, filename="README.md")
	except requests.exceptions.HTTPError:
	st.error(
	f"ERROR: https://huggingface.co/{model_id}/blob/main/README.md "
	f"not found, make sure you've entered a correct model path!"
	)
	st.stop()

	content = Path(readme_path).read_text()
	match = REGEX_YAML_BLOCK.search(content)
	if match:
	meta_yaml = match.group(1)
	else:
	st.error(
	"ERROR: Couldn't find the metadata section inside your model's `README.md`. Do you have some basic metadata "
	"enclosed in `---` as described in [the Hub documentation](https://huggingface.co/docs/hub/model-repos#model-card-metadata)?"
	)
	st.stop()

	metadata = try_parse_yaml(meta_yaml)
	if metadata is None:
	st.stop()
	else:
	st.success("Successfully loaded the metadata!")
	with st.expander("Inspect the parsed metadata for debugging"):
	st.json(metadata)

	st.markdown("## 2. Edit the data")

	############################
	# LANGUAGES
	############################
	st.markdown("### Language(s)")
	st.markdown(
	"For each spoken language that your model handles, enter an "
	"[ISO 639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) language code, or "
	"find an appropriate alternative from "
	"[our list here](https://huggingface.co/spaces/huggingface/hf-speech-bench/blob/main/languages.json). "
	"When in doubt, use the most generic language code, e.g. `en` instead of `en-GB` and `en-US`."
	)
	st.markdown("Example: `cs, hsb, pl`")
	metadata["language"] = metadata["language"] if "language" in metadata else []
	metadata["language"] = (
	metadata["language"]
	if isinstance(metadata["language"], list)
	else [metadata["language"]]
	)

	languages = st_tags(
	label="", text="add more if needed, and press enter", value=metadata["language"]
	)
	lang_names = [lang2name[lang] if lang in lang2name else lang for lang in languages]
	st.markdown("These languages will be parsed by the leaderboard as: ")
	st.code(", ".join(lang_names))

	############################
	# TRAIN DATASETS
	############################
	st.markdown("### Training dataset(s)")
	st.markdown("List the datasets that your model was trained on.")
	st.markdown("Example: `librispeech_asr, mozilla-foundation/common_voice_8_0`")

	if "datasets" not in metadata:
	metadata["datasets"] = []

	train_datasets = st_tags(
	label="", text="add more if needed, and press enter", value=metadata["datasets"]
	)
	if "common_voice" in train_datasets:
	st.warning(
	"WARNING: `common_voice` is deprecated, please replace it with its equivalent: "
	"`mozilla-foundation/common_voice_6_1`"
	)

	############################
	# MODEL NAME
	############################
	st.markdown("### Model name")
	st.markdown("Enter a descriptive name for your model.")
	st.markdown("Example: `XLS-R Wav2Vec2 LM Spanish by Jane Doe`")

	if "model_index" not in metadata:
	metadata["model_index"] = [{}]
	if "name" not in ["model_index"][0]:
	metadata["model_index"][0]["name"] = model_id.split("/")[-1]
	model_name = st.text_input("", value=metadata["model_index"][0]["name"])

	############################
	# EVAL DATASETS
	############################
	st.markdown("### Evaluation metrics")


	if __name__ == "__main__":
	main()