Spaces:

sasha
/

evaluation-buddy

Sleeping

App Files Files Community

Sasha commited on Mar 17, 2022

Commit

4474a2c

•

1 Parent(s): 03a66e4

adding some hacky task type detection

Browse files

Files changed (1) hide show

app.py +17 -8

app.py CHANGED Viewed

@@ -23,7 +23,9 @@ top_datasets= ['glue', 'super_glue', 'wikitext', 'imdb', 'squad', 'squad_es', \
                 'sick', 'xsum', 'wikiann', 'yelp_polarity', 'hellaswag', 'piqa', \
                 'race', 'winogrande']
-tasks= ['text-classification', 'question-answering-extractive', 'automatic-speech-recognition']
 metrics= ['matthews_correlation', 'perplexity', 'meteor', 'code_eval', 'super_glue', 'rouge', 'mauve', 'cer', 'accuracy', 'recall', 'bleurt', 'sari', 'precision', 'mean_iou', 'squad', 'mahalanobis', 'chrf', 'mae', 'squad_v2', 'seqeval', 'cuad', 'wiki_split', 'google_bleu', 'competition_math', 'pearsonr', 'xtreme_s', 'comet', 'gleu', 'spearmanr', 'f1', 'frugalscore', 'bertscore', 'indic_glue', 'mse', 'xnli', 'ter', 'coval', 'wer', 'bleu', 'glue', 'sacrebleu']
 with st.sidebar.expander("Datasets", expanded=True):
@@ -50,11 +52,12 @@ st.markdown("### Description")
 st.markdown(dataset_builder.info.description)
 st.markdown("For more information about this dataset, check out [its website](https://huggingface.co/datasets/"+dataset_name+")")
 st.markdown("### Dataset-Specific Metrics")
 if dataset_name in metrics:
     st.markdown("Great news! Your dataset has a dedicated metric for it! You can use it like this:")
     code = ''' from datasets import load_metric
- metric = load_metric('''+dataset+''', '''+config+''')'''
     st.code(code, language='python')
     dedicated_metric = True
 else:
@@ -65,14 +68,20 @@ st.markdown("### Task-Specific Metrics")
 try:
     task = dataset_builder.info.task_templates[0].task
-    st.markdown("The task associated to it is: " + task)
     if task == 'automatic-speech-recognition':
         st.markdown('Automatic Speech Recognition has some dedicated metrics such as:')
         st.markdown('[Word Error Rate](https://huggingface.co/metrics/wer)')
         st.markdown('[Character Error Rate](https://huggingface.co/metrics/cer)')
-    else:
-        st.markdown("The task for your dataset doesn't have any dedicated metrics, but you can still use general ones!")
-except:
     st.markdown("The task for your dataset doesn't have any dedicated metrics, but you can still use general ones!")
@@ -106,14 +115,14 @@ try:
     #proportion = [0.85, 0.15]
     stdev_dataset= statistics.stdev(proportion)
     if stdev_dataset <= balanced_stdev:
-            st.markdown("Since your dataset is well-balanced, you can look at using:")
             st.markdown('[Accuracy](https://huggingface.co/metrics/accuracy)')
             accuracy_code = '''from datasets import load_metric
         metric = load_metric("accuracy")'''
             st.code(accuracy_code, language='python')
     else:
-            st.markdown("Since your dataset is not well-balanced, you can look at using:")
             st.markdown('[F1 Score](https://huggingface.co/metrics/f1)')
             accuracy_code = '''from datasets import load_metric
         metric = load_metric("accuracy")'''

                 'sick', 'xsum', 'wikiann', 'yelp_polarity', 'hellaswag', 'piqa', \
                 'race', 'winogrande']
+tasks= ['text classification', 'question answering', 'automatic speech recognition', 'natural language inference', \
+        'machine translation', 'sentiment analysis', 'text simplification', 'named entity recognition', \
+        'reading comprehension']
 metrics= ['matthews_correlation', 'perplexity', 'meteor', 'code_eval', 'super_glue', 'rouge', 'mauve', 'cer', 'accuracy', 'recall', 'bleurt', 'sari', 'precision', 'mean_iou', 'squad', 'mahalanobis', 'chrf', 'mae', 'squad_v2', 'seqeval', 'cuad', 'wiki_split', 'google_bleu', 'competition_math', 'pearsonr', 'xtreme_s', 'comet', 'gleu', 'spearmanr', 'f1', 'frugalscore', 'bertscore', 'indic_glue', 'mse', 'xnli', 'ter', 'coval', 'wer', 'bleu', 'glue', 'sacrebleu']
 with st.sidebar.expander("Datasets", expanded=True):
 st.markdown(dataset_builder.info.description)
 st.markdown("For more information about this dataset, check out [its website](https://huggingface.co/datasets/"+dataset_name+")")
 st.markdown("### Dataset-Specific Metrics")
 if dataset_name in metrics:
     st.markdown("Great news! Your dataset has a dedicated metric for it! You can use it like this:")
     code = ''' from datasets import load_metric
+ metric = load_metric('''+dataset_name+''', '''+dataset_config+''')'''
     st.code(code, language='python')
     dedicated_metric = True
 else:
 try:
     task = dataset_builder.info.task_templates[0].task
+except:
+    for t in tasks:
+        if t in str(dataset_builder.info.description).lower():
+            task = t
+        else:
+            task = None
+if task is not None:
+    st.markdown("The task associated to it your dataset is: " + task.replace('-',' '))
     if task == 'automatic-speech-recognition':
         st.markdown('Automatic Speech Recognition has some dedicated metrics such as:')
         st.markdown('[Word Error Rate](https://huggingface.co/metrics/wer)')
         st.markdown('[Character Error Rate](https://huggingface.co/metrics/cer)')
+else:
     st.markdown("The task for your dataset doesn't have any dedicated metrics, but you can still use general ones!")
     #proportion = [0.85, 0.15]
     stdev_dataset= statistics.stdev(proportion)
     if stdev_dataset <= balanced_stdev:
+            st.markdown("Since your dataset is well-balanced (with a standard deviation of " + str(round(stdev_dataset,2)) +"), you can look at using:")
             st.markdown('[Accuracy](https://huggingface.co/metrics/accuracy)')
             accuracy_code = '''from datasets import load_metric
         metric = load_metric("accuracy")'''
             st.code(accuracy_code, language='python')
     else:
+            st.markdown("Since your dataset is not well-balanced (with a standard deviation of " + str(round(stdev_dataset,2)) +"), you can look at using:")
             st.markdown('[F1 Score](https://huggingface.co/metrics/f1)')
             accuracy_code = '''from datasets import load_metric
         metric = load_metric("accuracy")'''