File size: 1,792 Bytes
23111c6 32c2d1f 23111c6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
import gradio as gr
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
import soundfile as sf
import numpy as np
import os
# import torch
SAMPLE_RATE = 8000
def get_sample_rate(audio_file_path):
_, sample_rate = sf.read(audio_file_path, always_2d=True)
return sample_rate
def change_sample_rate(input_audio_file_path, output_audio_file_path, sample_rate):
# do ffmpeg -i $input_audio_file_path -ar $sample_rate $output_audio_file_path
os.system(f'ffmpeg -i {input_audio_file_path} -ar {sample_rate} {output_audio_file_path}')
def audio_is_stereo(audio_file_path):
audio, _ = sf.read(audio_file_path, always_2d=True)
return audio.shape[1] == 2
def set_mono(input_audio_file_path, output_audio_file_path):
os.system(f'ffmpeg -i {input_audio_file_path} -ac 1 {output_audio_file_path}')
os.system('wget https://maximofn.com/wp-content/uploads/2023/10/vocals.wav')
input = "vocals.wav"
input_8k = "vocals_8k.wav"
input_8k_mono = "vocals_8k_mono.wav"
sr = get_sample_rate(input)
if sr != SAMPLE_RATE:
change_sample_rate(input, input_8k, SAMPLE_RATE)
else:
input_8k = input
if audio_is_stereo(input_8k):
set_mono(input_8k, input_8k_mono)
else:
input_8k_mono = input_8k
# device = 'cuda' if torch.cuda.is_available() else 'cpu'
device = 'cpu'
separation = pipeline(Tasks.speech_separation, model='damo/speech_mossformer_separation_temporal_8k', device=device)
print("Separating...")
result = separation(input_8k_mono)
print("Separated!")
print("Saving...")
for i, signal in enumerate(result['output_pcm_list']):
save_file = f'output_spk{i}.wav'
sf.write(save_file, np.frombuffer(signal, dtype=np.int16), SAMPLE_RATE)
print("Saved!")
with gr.Blocks() as demo:
gr.Textbox("Subtify")
demo.launch()
|