import torch import soundfile as sf import gradio as gr import spaces from clearvoice import ClearVoice import os import random @spaces.GPU def fn_clearvoice_sr(input_wav, apply_se): wavname = input_wav.split('/')[-1] myClearVoice = ClearVoice(task='speech_super_resolution', model_names=['MossFormer2_SR_48K']) fs = 48000 if apply_se: new_wavname = wavname.replace('.wav', str(random.randint(0,1000))+'.wav') myClearVoice_se = ClearVoice(task='speech_enhancement', model_names=['MossFormer2_SE_48K']) output_wav_dict = myClearVoice_se(input_path=input_wav, online_write=True, output_path=new_wavname) input_wav = new_wavname output_wav_dict = myClearVoice(input_path=input_wav, online_write=False) if isinstance(output_wav_dict, dict): key = next(iter(output_wav_dict)) output_wav = output_wav_dict[key] else: output_wav = output_wav_dict sf.write('enhanced_high_res.wav', output_wav[0,:], fs) return 'enhanced_high_res.wav' demo = gr.Blocks() sr_demo = gr.Interface( fn=fn_clearvoice_sr, inputs = [ gr.Audio(label="Input Audio", type="filepath"), gr.Checkbox(label="Apply Speech Enhancement", value=True), ], outputs = [ gr.Audio(label="Output Audio", type="filepath"), ], title = "ClearVoice: Speech Super Resolution", description = ("ClearVoice ([Github Repo](https://github.com/modelscope/ClearerVoice-Studio/tree/main/clearvoice)) is AI-powered and transform low-resolution audio (effective sampling rate ≥ 16 kHz) into crystal-clear, high-resolution audio at 48 kHz. It supports most of audio types. " "To try it, simply upload your audio, or click one of the examples. "), article = ("

FRCRN: Boosting Feature Representation Using Frequency Recurrence for Monaural Speech Enhancement

" "

MossFormer2: Combining Transformer and RNN-Free Recurrent Network for Enhanced Time-Domain Monaural Speech Separation

"), examples = [ ["examples/mandarin_speech_16kHz.wav", True], ["examples/LJSpeech-001-0001-22k.wav", True], ["examples/LibriTTS_986_129388_24k.wav", True], ["examples/english_speech_48kHz.wav", True], ], cache_examples = True, ) with demo: gr.TabbedInterface([sr_demo], ["Task 4: Speech Super Resolution"]) demo.launch()