Spaces:
Running
on
Zero
Running
on
Zero
ASR Transcriber optimisation for CPU
Browse files- Model changed to OpenAI/Whisper-small
- Enabled better_transformer
app.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
import gradio as gr
|
2 |
-
import time
|
3 |
import os
|
4 |
from pydub import AudioSegment
|
5 |
|
@@ -12,27 +11,41 @@ def audio_converter(audio_file:str):
|
|
12 |
return audio_wav_filename
|
13 |
|
14 |
def asr_transcriber(audio_file):
|
15 |
-
from transformers import pipeline
|
16 |
import torch
|
17 |
-
import
|
18 |
|
19 |
audio_file_wav = audio_converter(audio_file)
|
20 |
-
|
21 |
# Check for CUDA availability (GPU)
|
22 |
if torch.cuda.is_available():
|
23 |
device_id = torch.device('cuda')
|
24 |
else:
|
25 |
device_id = torch.device('cpu')
|
26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
# Initialize the ASR pipeline
|
28 |
pipe = pipeline(
|
29 |
"automatic-speech-recognition",
|
30 |
-
model=
|
31 |
-
torch_dtype=
|
32 |
-
device=device_id
|
33 |
)
|
34 |
-
|
35 |
-
|
|
|
|
|
|
|
|
|
36 |
language = None
|
37 |
task = "transcribe"
|
38 |
|
@@ -51,7 +64,7 @@ with gr.Blocks() as transcriberUI:
|
|
51 |
"""
|
52 |
# Ola Xara & Solange!
|
53 |
Clicar no botao abaixo para selecionar o Audio a ser transcrito!
|
54 |
-
Ambiente
|
55 |
""")
|
56 |
inp = gr.File(label="Arquivo de Audio", show_label=True, file_count="single", file_types=["m4a"])
|
57 |
transcribe = gr.Textbox(label="Transcricao", show_label=True, show_copy_button=True)
|
|
|
1 |
import gradio as gr
|
|
|
2 |
import os
|
3 |
from pydub import AudioSegment
|
4 |
|
|
|
11 |
return audio_wav_filename
|
12 |
|
13 |
def asr_transcriber(audio_file):
|
14 |
+
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
|
15 |
import torch
|
16 |
+
import optimum
|
17 |
|
18 |
audio_file_wav = audio_converter(audio_file)
|
19 |
+
|
20 |
# Check for CUDA availability (GPU)
|
21 |
if torch.cuda.is_available():
|
22 |
device_id = torch.device('cuda')
|
23 |
else:
|
24 |
device_id = torch.device('cpu')
|
25 |
+
|
26 |
+
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
27 |
+
|
28 |
+
#device_id = "mps" for Mac only
|
29 |
+
#torch_dtype = float16
|
30 |
+
flash = False
|
31 |
+
ts = True
|
32 |
+
|
33 |
+
#Try to optimize when CPU and float32
|
34 |
+
model_id = "openai/whisper-small"
|
35 |
+
|
36 |
# Initialize the ASR pipeline
|
37 |
pipe = pipeline(
|
38 |
"automatic-speech-recognition",
|
39 |
+
model=model_id,
|
40 |
+
torch_dtype=torch_dtype,
|
41 |
+
device=device_id,
|
42 |
)
|
43 |
+
|
44 |
+
if device_id == "mps":
|
45 |
+
torch.mps.empty_cache()
|
46 |
+
elif not flash:
|
47 |
+
pipe.model = pipe.model.to_bettertransformer()
|
48 |
+
|
49 |
language = None
|
50 |
task = "transcribe"
|
51 |
|
|
|
64 |
"""
|
65 |
# Ola Xara & Solange!
|
66 |
Clicar no botao abaixo para selecionar o Audio a ser transcrito!
|
67 |
+
Ambiente de Teste: pode demorar um pouco. Nao fiquem nervosos :-)
|
68 |
""")
|
69 |
inp = gr.File(label="Arquivo de Audio", show_label=True, file_count="single", file_types=["m4a"])
|
70 |
transcribe = gr.Textbox(label="Transcricao", show_label=True, show_copy_button=True)
|