add evaluation

Browse files

Files changed (13) hide show

README.md +24 -4
eval.py +25 -7
full_eval.sh +15 -0
log_mozilla-foundation_common_voice_8_0_it_test_predictions.txt +0 -0
log_mozilla-foundation_common_voice_8_0_it_test_predictions_greedy.txt +0 -0
log_mozilla-foundation_common_voice_8_0_it_test_targets.txt +0 -0
log_speech-recognition-community-v2_dev_data_it_validation_predictions.txt +0 -0
log_speech-recognition-community-v2_dev_data_it_validation_predictions_greedy.txt +0 -0
log_speech-recognition-community-v2_dev_data_it_validation_targets.txt +0 -0
mozilla-foundation_common_voice_8_0_it_test_eval_results.txt +2 -2
mozilla-foundation_common_voice_8_0_it_test_eval_results_greedy.txt +2 -2
speech-recognition-community-v2_dev_data_it_validation_eval_results.txt +2 -0
speech-recognition-community-v2_dev_data_it_validation_eval_results_greedy.txt +2 -0

README.md CHANGED Viewed

@@ -22,16 +22,36 @@ model-index:
     metrics:
        - name: Test WER
          type: wer
-         value: 9.07
        - name: Test CER
          type: cer
-         value: 2.21
        - name: Test WER (+LM)
          type: wer
-         value: 7.05
        - name: Test CER (+LM)
          type: cer
-         value: 1.86
 ---
 # XLS-R-1B-ITALIAN

     metrics:
        - name: Test WER
          type: wer
+         value: 10.06
        - name: Test CER
          type: cer
+         value: 2.57
        - name: Test WER (+LM)
          type: wer
+         value: 7.59
        - name: Test CER (+LM)
          type: cer
+         value: 2.11
+  - task:
+      name: Automatic Speech Recognition
+      type: automatic-speech-recognition
+    dataset:
+      name: Robust Speech Event - Dev Data
+      type: speech-recognition-community-v2/dev_data
+      args: sv
+    metrics:
+       - name: Test WER
+         type: wer
+         value: 23.38
+       - name: Test CER
+         type: cer
+         value: 9.41
+       - name: Test WER (+LM)
+         type: wer
+         value: 15.84
+       - name: Test CER (+LM)
+         type: cer
+         value: 8.93
 ---
 # XLS-R-1B-ITALIAN

eval.py CHANGED Viewed

@@ -1,12 +1,11 @@
 #!/usr/bin/env python3
 from datasets import load_dataset, load_metric, Audio, Dataset
-from transformers import pipeline, AutoFeatureExtractor, AutoTokenizer
 import re
 import torch
 import argparse
 from typing import Dict
 def log_results(result: Dataset, args: Dict[str, str]):
     """ DO NOT CHANGE. This function computes and logs the result metrics. """
@@ -68,17 +67,30 @@ def main(args):
     # dataset = dataset.select(range(10))
     # load processor
-    feature_extractor = AutoFeatureExtractor.from_pretrained(args.model_id)
-    sampling_rate = feature_extractor.sampling_rate
     # resample audio
-    dataset = dataset.cast_column("audio", Audio(sampling_rate=sampling_rate))
     # load eval pipeline
     if args.device is None:
         args.device = 0 if torch.cuda.is_available() else -1
-    asr = pipeline("automatic-speech-recognition", model=args.model_id, device=args.device)
     # build normalizer config
     tokenizer = AutoTokenizer.from_pretrained(args.model_id)
     tokens = [x for x in tokenizer.convert_ids_to_tokens(range(0, tokenizer.vocab_size))]
@@ -106,6 +118,9 @@ def main(args):
     # run inference on all examples
     result = dataset.map(map_to_pred, remove_columns=dataset.column_names)
     # compute and log_results
     # do not change function below
     log_results(result, args)
@@ -135,6 +150,9 @@ if __name__ == "__main__":
     parser.add_argument(
         "--log_outputs", action='store_true', help="If defined, write outputs to log file for analysis."
     )
     parser.add_argument(
         "--device",
         type=int,

 #!/usr/bin/env python3
 from datasets import load_dataset, load_metric, Audio, Dataset
+from transformers import pipeline, AutoFeatureExtractor, AutoTokenizer, AutoConfig, AutoModelForCTC, Wav2Vec2Processor, Wav2Vec2ProcessorWithLM
 import re
 import torch
 import argparse
 from typing import Dict
 def log_results(result: Dataset, args: Dict[str, str]):
     """ DO NOT CHANGE. This function computes and logs the result metrics. """
     # dataset = dataset.select(range(10))
     # load processor
+    if args.greedy:
+        processor = Wav2Vec2Processor.from_pretrained(args.model_id)
+        decoder = None
+    else:
+        processor = Wav2Vec2ProcessorWithLM.from_pretrained(args.model_id)
+        decoder = processor.decoder
+    feature_extractor = processor.feature_extractor
+    tokenizer = processor.tokenizer
     # resample audio
+    dataset = dataset.cast_column("audio", Audio(sampling_rate=feature_extractor.sampling_rate))
     # load eval pipeline
     if args.device is None:
         args.device = 0 if torch.cuda.is_available() else -1
+    config = AutoConfig.from_pretrained(args.model_id)
+    model = AutoModelForCTC.from_pretrained(args.model_id)
+    #asr = pipeline("automatic-speech-recognition", model=args.model_id, device=args.device)
+    asr = pipeline("automatic-speech-recognition", config=config, model=model, tokenizer=tokenizer,
+                   feature_extractor=feature_extractor, decoder=decoder, device=args.device)
     # build normalizer config
     tokenizer = AutoTokenizer.from_pretrained(args.model_id)
     tokens = [x for x in tokenizer.convert_ids_to_tokens(range(0, tokenizer.vocab_size))]
     # run inference on all examples
     result = dataset.map(map_to_pred, remove_columns=dataset.column_names)
+    # filtering out empty targets
+    result = result.filter(lambda example: example["target"] != "")
     # compute and log_results
     # do not change function below
     log_results(result, args)
     parser.add_argument(
         "--log_outputs", action='store_true', help="If defined, write outputs to log file for analysis."
     )
+    parser.add_argument(
+        "--greedy", action='store_true', help="If defined, the LM will be ignored during inference."
+    )
     parser.add_argument(
         "--device",
         type=int,

full_eval.sh ADDED Viewed

	@@ -0,0 +1,15 @@

+# CV 8 - TEST
+python eval.py --model_id jonatasgrosman/wav2vec2-xls-r-1b-italian --dataset mozilla-foundation/common_voice_8_0 --config it --split test --log_outputs --greedy
+mv log_mozilla-foundation_common_voice_8_0_it_test_predictions.txt log_mozilla-foundation_common_voice_8_0_it_test_predictions_greedy.txt
+mv mozilla-foundation_common_voice_8_0_it_test_eval_results.txt mozilla-foundation_common_voice_8_0_it_test_eval_results_greedy.txt
+python eval.py --model_id jonatasgrosman/wav2vec2-xls-r-1b-italian --dataset mozilla-foundation/common_voice_8_0 --config it --split test --log_outputs
+# HF EVENT - DEV
+python eval.py --model_id jonatasgrosman/wav2vec2-xls-r-1b-italian --dataset speech-recognition-community-v2/dev_data --config it --split validation --chunk_length_s 5.0 --stride_length_s 1.0 --log_outputs --greedy
+mv log_speech-recognition-community-v2_dev_data_it_validation_predictions.txt log_speech-recognition-community-v2_dev_data_it_validation_predictions_greedy.txt
+mv speech-recognition-community-v2_dev_data_it_validation_eval_results.txt speech-recognition-community-v2_dev_data_it_validation_eval_results_greedy.txt
+python eval.py --model_id jonatasgrosman/wav2vec2-xls-r-1b-italian --dataset speech-recognition-community-v2/dev_data --config it --split validation --chunk_length_s 5.0 --stride_length_s 1.0 --log_outputs

log_mozilla-foundation_common_voice_8_0_it_test_predictions.txt CHANGED Viewed