Spaces:
Sleeping
Sleeping
File size: 3,331 Bytes
69fe4c0 ebff6ea 598487f 69fe4c0 ebff6ea 69fe4c0 598487f 69fe4c0 598487f 69fe4c0 598487f 69fe4c0 598487f b502f30 ebff6ea 69fe4c0 598487f 69fe4c0 86a86db 69fe4c0 bc29b0f 69fe4c0 ebff6ea 69fe4c0 598487f 69fe4c0 598487f 69fe4c0 598487f 69fe4c0 598487f 69fe4c0 598487f 69fe4c0 598487f 69fe4c0 ebff6ea 69fe4c0 ed7d1fd 69fe4c0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
import json
import requests
from datetime import datetime
import time
import traceback
API_URL = "https://api-inference.huggingface.co/models/"
def date_now():
return datetime.now().strftime("%Y-%m-%d %H:%M:%S")
def record_opt(msg):
return f"{date_now()} {msg}\n"
def speech_recognize(audio, model_name, access_token, opt):
opt += record_opt("Transcription starts ...")
yield "Transcribing, please wait..", opt
start = time.monotonic()
with open(audio, "rb") as f:
data = f.read()
try:
url = API_URL + model_name
print(f">>> url is {url}")
headers = {"Authorization": f"Bearer {access_token}"}
response = requests.request("POST", url, headers=headers, data=data)
text = json.loads(response.content.decode("utf-8"))
print(f">>> text is {text}")
text = text['text']
except:
text = f"Transcription failed:\n{traceback.format_exc()}"
cost = time.monotonic() - start
opt += record_opt(f"Transcription ends, time consuming{cost:.3f}s")
yield text, opt
import gradio as gr
with gr.Blocks() as demo:
gr.HTML("""<h2 align="center">Automatic Speech Recognition (OpenAI Whisper with Inference API)</h2>""")
with gr.Row():
gr.Markdown(
"""🤗 Call the huggingface API and use the OpenAI Whisper model for speech recognition, which can also be called speech to text(Speech to Text, STT)
👉 The purpose is to practice using the Gradio Audio component and explore using the Huggingface Inference API
> 💡Tip: You need to fill in the Huggingface access token to call the Huggingface Inference API
"""
)
with gr.Row():
with gr.Column():
audio = gr.Audio(source="microphone", type="filepath")
model_name = gr.Dropdown(
label="Select model",
choices=[
"openai/whisper-large-v3",
"openai/whisper-large-v2",
"openai/whisper-large",
"openai/whisper-medium",
"openai/whisper-small",
"openai/whisper-base",
"openai/whisper-tiny",
],
value="openai/whisper-large-v3",
)
access_token = gr.Textbox(label="Huggingface access token")
with gr.Column():
output = gr.Textbox(label="Transcription results")
operation = gr.Textbox(label="Component operation history")
audio.start_recording(
lambda x: x + record_opt("Start recording ..."),
inputs=operation, outputs=operation
)
audio.play(
lambda x: x + record_opt("Play recording"),
inputs=operation, outputs=operation
)
audio.pause(
lambda x: x + record_opt("Pause playback"),
inputs=operation, outputs=operation
)
audio.stop(
lambda x: x + record_opt("Stop play"),
inputs=operation, outputs=operation
)
audio.end(
lambda x: x + record_opt("Finished playing"),
inputs=operation, outputs=operation
)
audio.stop_recording(speech_recognize, inputs=[audio, model_name, access_token, operation], outputs=[output, operation])
demo.queue(max_size=4, concurrency_count=4)
demo.launch()
|