Spaces:
Runtime error
Runtime error
File size: 3,166 Bytes
a48dac6 3915d32 a48dac6 4bcd948 a48dac6 4bcd948 a48dac6 9ca4d96 a48dac6 ff7533e a48dac6 ff7533e 4bcd948 7538ca1 65eb9eb e2afe6d 65eb9eb 3d69495 7d18ad5 a48dac6 ff7533e aea591d a48dac6 c66ab14 2f2f365 7d18ad5 d40a14a a48dac6 71b3330 e2afe6d a48dac6 792ee14 71b3330 e2afe6d 792ee14 a67cf5b 792ee14 a48dac6 792ee14 3915d32 792ee14 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
import torch
import gradio as gr
import pytube as pt
from transformers import pipeline
from huggingface_hub import model_info
import time
import unicodedata
# from gradio.themes.utils.theme_dropdown import create_theme_dropdown
MODEL_NAME = "SakshiRathi77/wav2vec2-large-xlsr-300m-hi-kagglex"
lang = "hi"
# my_theme = gr.Theme.from_hub('freddyaboulton/dracula_revamped')
device = 0 if torch.cuda.is_available() else "cpu"
pipe = pipeline(
task="automatic-speech-recognition",
model=MODEL_NAME,
device=device,
)
def transcribe(microphone, file_upload):
warn_output = ""
if (microphone is not None) and (file_upload is not None):
warn_output = (
"WARNING: You've uploaded an audio file and used the microphone. "
"The recorded file from the microphone will be used and the uploaded audio will be discarded.\n"
)
elif (microphone is None) and (file_upload is None):
return "ERROR: You have to either use the microphone or upload an audio file"
file = microphone if microphone is not None else file_upload
text = pipe(file)["text"]
return warn_output + text
def rt_transcribe(audio, state=""):
time.sleep(2)
text = pipe(audio)["text"]
state += unicodedata.normalize("NFC",text) + " "
return state, state
demo = gr.Blocks()
examples=[["examples/example1.mp3"], ["examples/example2.mp3"],["examples/example3.mp3"]]
title ="""
HindiSpeechPro: WAV2VEC-Powered ASR Interface
"""
description = """
<p>
<center>
Welcome to the HindiSpeechPro, a cutting-edge interface powered by a fine-tuned version of facebook/wav2vec2-xls-r-300m on the common_voice dataset.
<img src="https://huggingface.co/spaces/SakshiRathi77/SakshiRathi77-Wav2Vec2-hi-kagglex/resolve/main/Images/main_image2.png" alt="logo" ;>
</center>
</p>
"""
# article = "<p style='text-align: center'><a href='https://github.com/SakshiRathi77/ASR' target='_blank'>Source Code on Github</a></p><p style='text-align: center'><a href='https://huggingface.co/blog/fine-tune-xlsr-wav2vec2' target='_blank'>Reference</a></p><p style='text-align: center'><a href='https://forms.gle/hjfc3F1P7m3weQVAA' target='_blank'><img src='https://e7.pngegg.com/pngimages/794/310/png-clipart-customer-review-feedback-user-service-others-miscellaneous-text-thumbnail.png' alt='Feedback Form' ;></a></p>"
mf_transcribe = gr.Interface(
fn=transcribe,
inputs=[
gr.inputs.Audio(source="microphone", type="filepath"),
gr.inputs.Audio(source="upload", type="filepath"),
],
outputs="text",
# theme="huggingface",
title=title,
description= description ,
allow_flagging="never",
examples=examples,
)
rt_transcribe = gr.Interface(
fn=rt_transcribe,
inputs=[
gr.Audio(source="microphone", type="filepath", streaming=True),
"state"
],
outputs=[ "textbox",
"state"],
# theme="huggingface",
title=title,
description= description ,
allow_flagging="never",
live=True,
)
with demo:
gr.TabbedInterface([mf_transcribe, rt_transcribe], ["Transcribe Audio", "Transcribe Realtime Voice"])
demo.launch(share=True)
|