Spaces:

moro23
/

Hausa-English-ASR

Runtime error

App Files Files Community

moro23 commited on Aug 16, 2022

Commit

deeb5cc

1 Parent(s): e406f4c

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -10

app.py CHANGED Viewed

@@ -7,12 +7,12 @@ from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
 nltk.download("punkt")
 ##
-token_value = "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"
 #Loading the pre-trained model and the tokenizer
-model_name = "moro23/wav2vec-large-xls-r-300-ha-colab_4"
 #tokenizer = Wav2Vec2Tokenizer.from_pretrained(model_name, use_auth_token=token_value)
-tokenizer = Wav2Vec2Processor.from_pretrained(model_name, use_auth_token=token_value)
-model = Wav2Vec2ForCTC.from_pretrained(model_name, use_auth_token=token_value)
 def load_data(input_file):
@@ -23,7 +23,7 @@ def load_data(input_file):
       speech = speech[:,0] + speech[:,1]
   #Resampling the audio at 16KHz
   if sample_rate !=16000:
-    speech = librosa.resample(speech, sample_rate,16000)
   return speech
 def correct_casing(input_sentence):
@@ -35,15 +35,15 @@ def asr_transcript(input_file):
   speech = load_data(input_file)
   #Tokenize
-  input_values = tokenizer(speech, return_tensors="pt").input_values
   #Take logits
-  logits = model(input_values).logits
   #Take argmax
-  predicted_ids = torch.argmax(logits, dim=-1)
   #Get the words from predicted word ids
-  transcription = tokenizer.decode(predicted_ids[0])
   #Correcting the letter casing
   transcription = correct_casing(transcription.lower())
   return transcription
-gr.Interface(asr_transcript, inputs = gr.inputs.Audio(source="microphone", type="filepath", optional=True, label="Speaker"), outputs = gr.outputs.Textbox(label="Output Text"), title="ASR For Hausa", description = "This application displays transcribed text for given audio input", theme="grass").launch(share=True)

 nltk.download("punkt")
 ##
+#token_value = "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"
 #Loading the pre-trained model and the tokenizer
+#model_name = "moro23/wav2vec-large-xls-r-300-ha-colab_4"
 #tokenizer = Wav2Vec2Tokenizer.from_pretrained(model_name, use_auth_token=token_value)
+#tokenizer = Wav2Vec2Processor.from_pretrained(model_name, use_auth_token=token_value)
+#model = Wav2Vec2ForCTC.from_pretrained(model_name, use_auth_token=token_value)
 def load_data(input_file):
       speech = speech[:,0] + speech[:,1]
   #Resampling the audio at 16KHz
   if sample_rate !=16000:
+    speech = librosa.resample(speech, sample_rate, 16000)
   return speech
 def correct_casing(input_sentence):
   speech = load_data(input_file)
   #Tokenize
+  input_dict = tokenizer(speech, return_tensors="pt", padding=True)
   #Take logits
+  logits = model(input_dict.input_values.to("cuda").logits
   #Take argmax
+  predicted_ids = torch.argmax(logits, dim=-1)[0]
   #Get the words from predicted word ids
+  transcription = tokenizer.decode(predicted_ids)
   #Correcting the letter casing
   transcription = correct_casing(transcription.lower())
   return transcription
+gr.Interface(asr_transcript, inputs = gr.Audio(source="microphone", type="filepath", optional=True, label="Speaker"), outputs = gr.Textbox(label="Output Text"), title="ASR For Hausa", description = "This application displays transcribed text for given audio input").launch()