Spaces:
Sleeping
Sleeping
File size: 2,464 Bytes
6b9c021 c3ed104 6b9c021 ea2f95f 81a412a c27a48e 6b9c021 ea2f95f ce037b1 6b9c021 ea2f95f 863d084 a0291f0 ea2f95f a0291f0 a61a05c c27a48e ff8667c a4340f7 c27a48e a4340f7 c27a48e dc74cbe 5721619 6b9c021 a4340f7 6b9c021 ea2f95f 863d084 ea2f95f 6b9c021 a4340f7 863d084 a4340f7 6b9c021 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
import os
import gradio as gr
import torch
from transformers import pipeline
title = "Transcribe speech in several languages"
device = "cuda:0" if torch.cuda.is_available() else "cpu"
asr_pipe_audio2Text_Ge = pipeline(task="automatic-speech-recognition", model="jonatasgrosman/wav2vec2-large-xlsr-53-german")
asr_pipe_whisper = pipeline(task="automatic-speech-recognition", model="openai/whisper-medium", device=device)
def transcribeFile(inputlang, audio_path : str) -> str:
#transcription = asr_pipe_audio2Text_Ge(audio_path)
#transcription = asr_pipe_whisper(audio_path, max_new_tokens=256, generate_kwargs={"task":"transcribe"})
if inputlang == "Auto Detect":
transcription = asr_pipe_whisper(audio_path, chunk_length_s=10, stride_length_s=(4, 2), generate_kwargs={"task":"transcribe"}, batch_size=32)
elif inputlang == "German":
transcription = asr_pipe_audio2Text_Ge(audio_path, chunk_length_s=10, stride_length_s=(4, 2), batch_size=32)
return transcription["text"]
def translateAudio(audio_path):
translationOutput = asr_pipe_whisper(audio_path, max_new_tokens=256, generate_kwargs={"task":"translate"})
return translationOutput
def transcribeFileMulti(inputlang, audio_path : str) -> str:
if inputlang == "English":
transcription = asr_pipe_whisper(audio_path)
elif inputlang == "German":
transcription = asr_pipe_audio2Text_Ge(audio_path)
translation = translateAudio(audio_path)
t1 = transcription["text"]
t2 = translation["text"]
output = t1+t2
return output #transcription["text"]
app1 = gr.Interface(
fn=transcribeFile,
#inputs=gr.inputs.Audio(label="Upload audio file", type="filepath"),
inputs=[gr.Radio(["Auto Detect", "German"], value="Auto Detect", label="Source Language", info="Select the language of the speech you want to transcribe"),
gr.Audio(source="upload", type="filepath",label="Upload audio file")],
outputs="text",
title=title
)
app2 = gr.Interface(
fn=transcribeFileMulti,
inputs=[gr.Radio(["Auto Detect", "German"], value="Auto Detect", label="Source Language", info="Select the language of the speech you want to transcribe"),
gr.Audio(source="microphone", type="filepath")],
outputs="text",
title=title
)
demo = gr.TabbedInterface([app1, app2], ["Audio File", "Microphone"])
if __name__ == "__main__":
demo.launch()
|