AhmadMahmood447 commited on
Commit
333ea2d
1 Parent(s): 134ca21

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -0
app.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
3
+ import gradio as gr
4
+ import torchaudio
5
+
6
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
7
+ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
8
+
9
+ model_id = "distil-whisper/distil-large-v3"
10
+
11
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(
12
+ model_id, torch_dtype=torch_dtype, use_safetensors=True
13
+ )
14
+ model.to(device)
15
+
16
+ processor = AutoProcessor.from_pretrained(model_id)
17
+
18
+ pipe = pipeline(
19
+ "automatic-speech-recognition",
20
+ model=model,
21
+ tokenizer=processor.tokenizer,
22
+ feature_extractor=processor.feature_extractor,
23
+ max_new_tokens=128,
24
+ chunk_length_s=25,
25
+ batch_size=16,
26
+ torch_dtype=torch_dtype,
27
+ device=device,
28
+ )
29
+
30
+ def speech_to_text(audio_file):
31
+ try:
32
+ waveform, sample_rate = torchaudio.load(audio_file)
33
+ if waveform.size(0) > 1:
34
+ resample = torchaudio.transforms.Resample(sample_rate, sample_rate)
35
+ waveform = resample(waveform)
36
+ waveform_np = waveform.numpy()
37
+ print("pass to pipe")
38
+ result = pipe(waveform_np[0])
39
+ print("result",result)
40
+ return result["text"]
41
+ except Exception as e:
42
+ print(f"Error: {str(e)}")
43
+
44
+
45
+ iface = gr.Interface(fn=speech_to_text, inputs="file", outputs="text", title="Speech-to-Text")
46
+
47
+ if __name__ == "__main__":
48
+ iface.launch()