SeyedAli commited on
Commit
98a69c9
1 Parent(s): 29b0243

Update app.txt

Browse files
Files changed (1) hide show
  1. app.txt +24 -4
app.txt CHANGED
@@ -1,10 +1,30 @@
 
1
  import torchaudio
2
- import librosa
3
  from transformers import Wav2Vec2ForCTC,Wav2Vec2Processor,pipeline
 
4
  processor = Wav2Vec2Processor.from_pretrained(model_name_or_path)
5
  model = Wav2Vec2ForCTC.from_pretrained("m3hrdadfi/wav2vec2-large-xlsr-persian")
6
- def ASR(Audio):
7
- audiofile=torchaudio.load(Audio,16000)
8
 
9
- iface = gr.Interface(fn=ASR, inputs="audio", outputs="text")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  iface.launch(share=False)
 
1
+ import torch
2
  import torchaudio
 
3
  from transformers import Wav2Vec2ForCTC,Wav2Vec2Processor,pipeline
4
+
5
  processor = Wav2Vec2Processor.from_pretrained(model_name_or_path)
6
  model = Wav2Vec2ForCTC.from_pretrained("m3hrdadfi/wav2vec2-large-xlsr-persian")
 
 
7
 
8
+ def speech_file_to_array_fn(path, sampling_rate):
9
+ speech_array, _sampling_rate = torchaudio.load(path)
10
+ resampler = torchaudio.transforms.Resample(_sampling_rate)
11
+ speech = resampler(speech_array).squeeze().numpy()
12
+ return speech
13
+
14
+ def predict(path, sampling_rate):
15
+ speech = speech_file_to_array_fn(path, sampling_rate)
16
+ inputs = feature_extractor(speech, sampling_rate=sampling_rate, return_tensors="pt", padding=True)
17
+ inputs = {key: inputs[key].to(device) for key in inputs}
18
+
19
+ with torch.no_grad():
20
+ logits = model(**inputs).logits
21
+
22
+ scores = F.softmax(logits, dim=1).detach().cpu().numpy()[0]
23
+ outputs = [{"Label": config.id2label[i], "Score": f"{round(score * 100, 3):.1f}%"} for i, score in enumerate(scores)]
24
+ return outputs
25
+
26
+ def SER(Audio):
27
+ return predict(Audio,16000)
28
+
29
+ iface = gr.Interface(fn=SER, inputs="audio", outputs="text")
30
  iface.launch(share=False)