Spaces:
Sleeping
Sleeping
import gradio as gr | |
from asr import transcribe_audio # Import your ASR function | |
from lid import detect_language # Import your Language Detection function | |
from tts import synthesize # Import the correct TTS function | |
def process_audio(audio_data): | |
# Step 1: Perform ASR (Audio-to-Text) | |
transcription = transcribe_audio(audio_data) | |
# Step 2: Detect language | |
language = detect_language(audio_data) | |
# Step 3: Generate Text Response based on ASR result (Future model generation) | |
# Replace this with your model inference logic | |
generated_text = f"Detected Language: {language}\n\nTranscription: {transcription}" | |
# Step 4: Convert generated text into speech using TTS | |
speech_output, _ = synthesize(text=generated_text, lang=language, speed=1.0) | |
return generated_text, speech_output | |
# Define the Gradio Interface | |
interface = gr.Interface( | |
fn=process_audio, | |
inputs=gr.Audio(type="numpy"), # Removed 'source' argument and kept 'type' | |
outputs=[gr.Textbox(label="Generated Text"), gr.Audio(label="Generated Speech")], | |
live=True | |
) | |
if __name__ == "__main__": | |
interface.launch() | |