Spaces:
Sleeping
Sleeping
import gradio as gr | |
from audio_processing import process_audio, print_results | |
import torch | |
import spaces | |
print(f"CUDA available: {torch.cuda.is_available()}") | |
if torch.cuda.is_available(): | |
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}") | |
else: | |
print("No CUDA GPUs available. Running on CPU.") | |
def transcribe_audio(audio_file, translate, model_size): | |
language_segments, final_segments = process_audio(audio_file, translate=translate, model_size=model_size) | |
output = "Detected language changes:\n\n" | |
for segment in language_segments: | |
output += f"Language: {segment['language']}\n" | |
output += f"Time: {segment['start']:.2f}s - {segment['end']:.2f}s\n\n" | |
output += f"Transcription with language detection and speaker diarization (using {model_size} model):\n\n" | |
for segment in final_segments: | |
output += f"[{segment['start']:.2f}s - {segment['end']:.2f}s] ({segment['language']}) {segment['speaker']}:\n" | |
output += f"Original: {segment['text']}\n" | |
if translate: | |
output += f"Translated: {segment['translated']}\n" | |
output += "\n" | |
return output | |
iface = gr.Interface( | |
fn=transcribe_audio, | |
inputs=[ | |
gr.Audio(type="filepath"), | |
gr.Checkbox(label="Enable Translation"), | |
gr.Dropdown(choices=["tiny", "base", "small", "medium", "large","large-v2","large-v3"], label="Whisper Model Size", value="small") | |
], | |
outputs="text", | |
title="WhisperX Audio Transcription and Translation" | |
) | |
iface.launch() |