import speech_recognition as sr import gradio as gr import numpy as np def recognize_speech(audio_data): # nontype to wav # tuple(sample_rate, numpy array) # get numpy array # get sample rate # convert to AudioData # recognize speech # return text audio_data = sr.AudioData(np.array(audio_data[1]), sample_rate=audio_data[0] , sample_width=2) recognizer = sr.Recognizer() try: text = recognizer.recognize_google(audio_data) return f"Recognized Speech: {text}" except sr.UnknownValueError: return "Speech Recognition could not understand audio." except sr.RequestError as e: return f"Could not request results from Google Speech Recognition service; {e}" audio_input = gr.Audio(type="numpy") iface = gr.Interface(fn=recognize_speech, inputs=audio_input , outputs="text") iface.launch()