ivangtorre commited on
Commit
5016634
1 Parent(s): 51a557e

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +5 -3
README.md CHANGED
@@ -73,9 +73,11 @@ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
73
  import torch
74
  from jiwer import cer
75
  import torch.nn.functional as F
 
 
76
 
77
-
78
- librispeech_eval = load_dataset("ivangtorre/second_americas_nlp_2022", split="validation")
79
 
80
  model = Wav2Vec2ForCTC.from_pretrained("ivangtorre/wav2vec2-xlsr-300m-quechua")
81
  processor = Wav2Vec2Processor.from_pretrained("ivangtorre/wav2vec2-xlsr-300m-quechua")
@@ -90,7 +92,7 @@ def map_to_pred(batch):
90
  batch["transcription"] = processor.batch_decode(predicted_ids)
91
  return batch
92
 
93
- result = librispeech_eval.map(map_to_pred, batched=True, batch_size=1)
94
 
95
  print("CER:", cer(result["source_processed"], result["transcription"]))
96
  ```
 
73
  import torch
74
  from jiwer import cer
75
  import torch.nn.functional as F
76
+ from datasets import load_dataset
77
+ import soundfile as sf
78
 
79
+ americasnlp = load_dataset("ivangtorre/second_americas_nlp_2022", "quechua", split="dev")
80
+ quechua = americasnlp.filter(lambda language: language['subset']=='quechua')
81
 
82
  model = Wav2Vec2ForCTC.from_pretrained("ivangtorre/wav2vec2-xlsr-300m-quechua")
83
  processor = Wav2Vec2Processor.from_pretrained("ivangtorre/wav2vec2-xlsr-300m-quechua")
 
92
  batch["transcription"] = processor.batch_decode(predicted_ids)
93
  return batch
94
 
95
+ result = quechua.map(map_to_pred, batched=True, batch_size=1)
96
 
97
  print("CER:", cer(result["source_processed"], result["transcription"]))
98
  ```