Maximofn commited on
Commit
32c2d1f
1 Parent(s): 6ff02d0

draft of separate audios

Browse files
Files changed (1) hide show
  1. app.py +53 -0
app.py CHANGED
@@ -1,4 +1,57 @@
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
 
4
  with gr.Blocks() as demo:
 
1
  import gradio as gr
2
+ from modelscope.pipelines import pipeline
3
+ from modelscope.utils.constant import Tasks
4
+ import soundfile as sf
5
+ import numpy as np
6
+ import os
7
+ # import torch
8
+
9
+ SAMPLE_RATE = 8000
10
+
11
+ def get_sample_rate(audio_file_path):
12
+ _, sample_rate = sf.read(audio_file_path, always_2d=True)
13
+ return sample_rate
14
+
15
+ def change_sample_rate(input_audio_file_path, output_audio_file_path, sample_rate):
16
+ # do ffmpeg -i $input_audio_file_path -ar $sample_rate $output_audio_file_path
17
+ os.system(f'ffmpeg -i {input_audio_file_path} -ar {sample_rate} {output_audio_file_path}')
18
+
19
+ def audio_is_stereo(audio_file_path):
20
+ audio, _ = sf.read(audio_file_path, always_2d=True)
21
+ return audio.shape[1] == 2
22
+
23
+ def set_mono(input_audio_file_path, output_audio_file_path):
24
+ os.system(f'ffmpeg -i {input_audio_file_path} -ac 1 {output_audio_file_path}')
25
+
26
+ os.system('wget https://maximofn.com/wp-content/uploads/2023/10/vocals.wav')
27
+ input = "vocals.wav"
28
+ input_8k = "vocals_8k.wav"
29
+ input_8k_mono = "vocals_8k_mono.wav"
30
+
31
+ sr = get_sample_rate(input)
32
+
33
+ if sr != SAMPLE_RATE:
34
+ change_sample_rate(input, input_8k, SAMPLE_RATE)
35
+ else:
36
+ input_8k = input
37
+
38
+ if audio_is_stereo(input_8k):
39
+ set_mono(input_8k, input_8k_mono)
40
+ else:
41
+ input_8k_mono = input_8k
42
+
43
+ # device = 'cuda' if torch.cuda.is_available() else 'cpu'
44
+ device = 'cpu'
45
+ separation = pipeline(Tasks.speech_separation, model='damo/speech_mossformer_separation_temporal_8k', device=device)
46
+ print("Separating...")
47
+ result = separation(input_8k_mono)
48
+ print("Separated!")
49
+
50
+ print("Saving...")
51
+ for i, signal in enumerate(result['output_pcm_list']):
52
+ save_file = f'output_spk{i}.wav'
53
+ sf.write(save_file, np.frombuffer(signal, dtype=np.int16), SAMPLE_RATE)
54
+ print("Saved!")
55
 
56
 
57
  with gr.Blocks() as demo: