import os import time import json import random import gradio as gr import torch import torchaudio import numpy as np from scipy.io import wavfile import scipy.signal as sps from denoiser.demucs import Demucs from pydub import AudioSegment modelpath = './denoiser/master64.th' def transcribe(file_upload, microphone): file = microphone if microphone is not None else file_upload model = Demucs(hidden=64) state_dict = torch.load(modelpath, map_location='cpu') model.load_state_dict(state_dict) demucs = model x, sr = torchaudio.load(file) out = demucs(x[None])[0] out = out / max(out.abs().max().item(), 1) torchaudio.save('enhanced.wav', out, sr) enhanced = AudioSegment.from_wav('enhanced.wav') #只有去完噪的需要降bitrate再做語音識別 enhanced.export('enhanced.wav', format="wav", bitrate="256k") return "enhanced.wav" demo = gr.Interface( fn=transcribe, inputs=[ gr.inputs.Audio(source="microphone", type="filepath", optional=True, label="語音質檢麥克風實時錄音"), gr.inputs.Audio(source="upload", type="filepath", optional=True, label="語音質檢原始音檔"), ], #outputs=gr.outputs.Textbox(label="語音質檢音檔噪音去除結果"), outputs=gr.outputs.Audio(type="filepath", label="Output"), title="
語音質檢噪音去除 (語音增強):Meta Denoiser", description=( "為了提升語音識別的效果,可以在識別前先進行噪音去除" ), allow_flagging="never", examples=[ ["exampleAudio/15s_2020-03-27_sep1.wav"], ["exampleAudio/13s_2020-03-27_sep2.wav"], ["exampleAudio/30s_2020-04-23_sep1.wav"], ["exampleAudio/15s_2020-04-23_sep2.wav"], ], ) demo.launch(enable_queue=True)