Check-my-progress-Audio-Course

Running

File size: 7,045 Bytes

3d7abcf
 
 
d071597
 
3d7abcf
d071597
 
b838740
d071597
 
 
 
 
 
3d7abcf
 
 
b838740
e04bd55
3d7abcf
e04bd55
3d7abcf
 
e04bd55
3d7abcf
 
e04bd55
 
 
 
 
b3439f4
 
e04bd55
 
 
 
b3439f4
 
 
 
 
 
 
 
 
 
 
 
3d7abcf
b3439f4
 
3d7abcf
e04bd55
 
 
 
 
b3439f4
e04bd55
b3439f4
e04bd55
 
 
 
 
b3439f4
e04bd55
 
 
 
b3439f4
 
e04bd55
 
 
 
 
 
 
 
 
 
 
b3439f4
3d7abcf
 
 
 
 
 
 
 
 
 
 
 
 
 
e04bd55
d071597
e04bd55
d071597
 
 
b3439f4
 
e04bd55
 
 
 
b3439f4
e04bd55
d071597
e04bd55
 
d071597
b3439f4
3d7abcf
 
e04bd55
3d7abcf
d071597
 
3d7abcf
e04bd55
 
3d7abcf
 
 
 
 
d071597
 
 
3d7abcf
 
 
 
 
e04bd55
b3439f4
e04bd55
3d7abcf
 
 
 
 
b3439f4
 
 
3d7abcf
 
 
 
 
d071597
 
 
3d7abcf
 
 
 
b3439f4
d071597
3d7abcf
b838740
e04bd55
 
 
 
 
 
 
 
b3439f4
e04bd55
 
b3439f4
e04bd55
 
 
 
 
 
b3439f4
 
 
 
 
 
 
 
 
 
e04bd55
 
b3439f4
e04bd55
 
 
b3439f4
3d7abcf
b3439f4
3d7abcf
d071597
3d7abcf
b3439f4
3d7abcf
 
d071597
3d7abcf
d071597
 
3d7abcf
dd19921
3d7abcf
dd19921
3d7abcf
 
d071597
3d7abcf
d071597
3d7abcf

import gradio as gr
from huggingface_hub import HfApi, hf_hub_download
from huggingface_hub.repocard import metadata_load
import requests
import re
import pandas as pd
from huggingface_hub import ModelCard


def pass_emoji(passed):
    if passed is True:
        passed = "✅"
    else:
        passed = "❌"
    return passed

api = HfApi()


def get_user_models(hf_username, task):
    """
    List the user's models for a given task
    :param hf_username: User HF username
    """
    models = api.list_models(author=hf_username, filter=[task])
    user_model_ids = [x.modelId for x in models]

    match task:
      case "audio-classification":
        dataset = 'marsyas/gtzan'
      case "automatic-speech-recognition":
        dataset = 'PolyAI/minds14'
      case "text-to-speech":
        dataset = ""
      case _:
        print("Unsupported task")

    dataset_specific_models = []

    if dataset == "": 
      return user_model_ids
    else: 
        for model in user_model_ids:
          meta = get_metadata(model)
          if meta is None:
              continue
          try:
            if meta["datasets"] == [dataset]:
                dataset_specific_models.append(model)
          except: 
            continue
        return dataset_specific_models


def calculate_best_result(user_models, task):
  """
  Calculate the best results of a unit for a given task
  :param user_model_ids: models of a user
  """

  best_model = ""

  if task == "audio-classification":
    best_result = -100
    larger_is_better = True
  elif task == "automatic-speech-recognition":
    best_result = 100
    larger_is_better = False

  for model in user_models:
    meta = get_metadata(model)
    if meta is None:
      continue
    metric = parse_metrics(model, task)

    if larger_is_better:
     if metric > best_result:
      best_result = metric
      best_model = meta['model-index'][0]["name"]
    else:
      if metric < best_result:
        best_result = metric
        best_model = meta['model-index'][0]["name"]

  return best_result, best_model


def get_metadata(model_id):
  """
  Get model metadata (contains evaluation data)
  :param model_id
  """
  try:
    readme_path = hf_hub_download(model_id, filename="README.md")
    return metadata_load(readme_path)
  except requests.exceptions.HTTPError:
    # 404 README.md not found
    return None


def extract_metric(model_card_content, task):
    """
    Extract the metric value from the models' model card
    :param model_card_content: model card content
    """
    accuracy_pattern = r"Accuracy: (\d+\.\d+)"
    wer_pattern = r"Wer: (\d+\.\d+)"

    if task == "audio-classification":
      pattern = accuracy_pattern
    elif task == "automatic-speech-recognition":
      pattern = wer_pattern

    match = re.search(pattern, model_card_content)
    if match:
        metric = match.group(1)
        return float(metric)
    else:
        return None


def parse_metrics(model, task):
  """
  Get model card and parse it
  :param model_id: model id
  """
  card = ModelCard.load(model)
  return extract_metric(card.content, task)


def certification(hf_username):
  results_certification = [
      {
          "unit": "Unit 4: Audio Classification",
          "task": "audio-classification",
          "baseline_metric": 0.87,
          "best_result": 0,
          "best_model_id": "",
          "passed_": False
      },
  {
          "unit": "Unit 5: Automatic Speech Recognition",
          "task": "automatic-speech-recognition",
          "baseline_metric": 0.37,
          "best_result": 0,
          "best_model_id": "",
          "passed_": False
  },
  {
          "unit": "Unit 6: Text-to-Speech",
          "task": "text-to-speech",
          "baseline_metric": 0,
          "best_result": 0,
          "best_model_id": "",
          "passed_": False
  },
  {
          "unit": "Unit 7: TBD",
          "task": "TBD",
          "baseline_metric": 0.99,
          "best_result": 0,
          "best_model_id": "",
          "passed_": False
  },
  ]

  for unit in results_certification:
    unit["passed"] = pass_emoji(unit["passed_"])

    match unit["task"]:
      case "audio-classification":
        try:
          user_ac_models = get_user_models(hf_username, task = "audio-classification")
          best_result, best_model_id = calculate_best_result(user_ac_models, task = "audio-classification")
          unit["best_result"] = best_result
          unit["best_model_id"] = best_model_id
          if unit["best_result"] >= unit["baseline_metric"]:
            unit["passed_"] = True
            unit["passed"] = pass_emoji(unit["passed_"])
        except: print("Either no relevant models found, or no metrics in the model card for audio classificaiton")
      case "automatic-speech-recognition":
        try:
          user_asr_models = get_user_models(hf_username, task = "automatic-speech-recognition")
          best_result, best_model_id = calculate_best_result(user_asr_models, task = "automatic-speech-recognition")
          unit["best_result"] = best_result
          unit["best_model_id"] = best_model_id
          if unit["best_result"] <= unit["baseline_metric"]:
            unit["passed_"] = True
            unit["passed"] = pass_emoji(unit["passed_"])
        except: print("Either no relevant models found, or no metrics in the model card for automatic speech recognition")
      case "text-to-speech":
        try:
          user_tts_models = get_user_models(hf_username, task = "text-to-speech")
          if user_tts_models: 
            unit["best_result"] = 0
            unit["best_model_id"] = user_tts_models[0]
            unit["passed_"] = True
            unit["passed"] = pass_emoji(unit["passed_"])
        except: print("Either no relevant models found, or no metrics in the model card for automatic speech recognition")
        print("Evaluation for this unit is work in progress")
      case _:
        print("Unknown task")

  print(results_certification)

  df = pd.DataFrame(results_certification)
  df = df[['passed', 'unit', 'task', 'baseline_metric', 'best_result', 'best_model_id']]
  return df
    
with gr.Blocks() as demo:
    gr.Markdown(f"""
    # 🏆 Check your progress in the Audio Course 🏆
    
    - To get a certificate of completion, you must **pass 3 out of 4 assignments before July 31st 2023**.
    - To get an honors certificate, you must **pass 4 out of 4 assignments before July 31st 2023**.

    To pass an assignment, your model's metric should be equal to or higher than the baseline metric.
    
    Make sure that you have uploaded your model(s) to Hub and type your Hugging Face Username here to check if you pass (in my case MariaK)
    """)
    
    hf_username = gr.Textbox(placeholder="MariaK", label="Your Hugging Face Username")
    check_progress_button = gr.Button(value="Check my progress")
    output = gr.components.Dataframe(value=certification(hf_username))    
    check_progress_button.click(fn=certification, inputs=hf_username, outputs=output)

demo.launch()