Spaces:
Build error
Build error
from svoice.separate import * | |
import scipy.io.wavfile as wav | |
import gradio as gr | |
import os | |
import torch | |
import soundfile as sf | |
from transformers import pipeline | |
from glob import glob | |
load_model() | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
BASE_PATH = os.path.dirname(os.path.abspath(__file__)) | |
os.makedirs('input', exist_ok=True) | |
os.makedirs('separated', exist_ok=True) | |
print("Loading ASR model...") | |
pipe = pipeline("automatic-speech-recognition", model="openai/whisper-base", device=0 if device == "cuda" else -1) | |
print("ASR model loaded!") | |
def transcribe_audio(audiopath): | |
audio_input, sr = sf.read(audiopath) | |
return pipe(audio_input, sampling_rate=sr, return_tensors=False, padding=True, max_new_tokens=500)['text'] | |
def separator(audio, rec_audio, example): | |
outputs= {} | |
for f in glob('input/*'): | |
os.remove(f) | |
for f in glob('separated/*'): | |
os.remove(f) | |
if audio: | |
wav.write('input/original.wav', audio[0], audio[1]) | |
elif rec_audio: | |
wav.write('input/original.wav', rec_audio[0], rec_audio[1]) | |
else: | |
os.system(f'cp {example} input/original.wav') | |
separate_demo(mix_dir="./input") | |
separated_files = glob(os.path.join('separated', "*.wav")) | |
separated_files = sorted([f for f in separated_files if "original.wav" not in f]) | |
outputs["transcripts"] = [] | |
for i, f in enumerate(separated_files): | |
print(f"Transcribing separated audio {i+1} ...") | |
outputs["transcripts"].append(transcribe_audio(f)) | |
print("Text:", outputs["transcripts"][-1]) | |
return separated_files + outputs['transcripts'] | |
def set_example_audio(example: list) -> dict: | |
return gr.Audio.update(value=example[0]) | |
demo = gr.Blocks() | |
with demo: | |
gr.Markdown(''' | |
<center> | |
<h1>Multiple Voice Separation with Transcription DEMO</h1> | |
<div style="display:flex;align-items:center;justify-content:center;"><iframe src="https://streamable.com/e/0x8osl?autoplay=1&nocontrols=1" frameborder="0" allow="autoplay"></iframe></div> | |
<p> | |
This is a demo for the multiple voice separation algorithm. The algorithm is trained on the LibriMix7 dataset and can be used to separate multiple voices from a single audio file. | |
</p> | |
</center> | |
''') | |
with gr.Row(): | |
input_audio = gr.Audio(label="Input audio", type="numpy") | |
rec_audio = gr.Audio(label="Record Using Microphone", type="numpy", source="microphone") | |
with gr.Row(): | |
output_audio1 = gr.Audio(label='Speaker 1', interactive=False) | |
output_text1 = gr.Text(label='Speaker 1', interactive=False) | |
output_audio2 = gr.Audio(label='Speaker 2', interactive=False) | |
output_text2 = gr.Text(label='Speaker 2', interactive=False) | |
with gr.Row(): | |
output_audio3 = gr.Audio(label='Speaker 3', interactive=False) | |
output_text3 = gr.Text(label='Speaker 3', interactive=False) | |
output_audio4 = gr.Audio(label='Speaker 4', interactive=False) | |
output_text4 = gr.Text(label='Speaker 4', interactive=False) | |
with gr.Row(): | |
output_audio5 = gr.Audio(label='Speaker 5', interactive=False) | |
output_text5 = gr.Text(label='Speaker 5', interactive=False) | |
output_audio6 = gr.Audio(label='Speaker 6', interactive=False) | |
output_text6 = gr.Text(label='Speaker 6', interactive=False) | |
with gr.Row(): | |
output_audio7 = gr.Audio(label='Speaker 7', interactive=False) | |
output_text7 = gr.Text(label='Speaker 7', interactive=False) | |
outputs_audio = [output_audio1, output_audio2, output_audio3, output_audio4, output_audio5, output_audio6, output_audio7] | |
outputs_text = [output_text1, output_text2, output_text3, output_text4, output_text5, output_text6, output_text7] | |
button = gr.Button("Separate") | |
examples = [ | |
"samples/mixture1.wav", | |
"samples/mixture2.wav", | |
"samples/mixture3.wav" | |
] | |
example_selector = gr.inputs.Radio(examples, label="Example Audio") | |
button.click(separator, inputs=[input_audio, rec_audio, example_selector], outputs=outputs_audio + outputs_text) | |
demo.launch() |