Update separe.py
Browse files
separe.py
CHANGED
@@ -60,6 +60,12 @@ def main(args):
|
|
60 |
input_name = input.split(".")[0]
|
61 |
output_name = output.split(".")[0]
|
62 |
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
# Set input files with 8k sample rate and mono
|
64 |
input_8k = f"{input_name}_8k.wav"
|
65 |
input_8k_mono = f"{input_name}_8k_mono.wav"
|
@@ -67,14 +73,12 @@ def main(args):
|
|
67 |
# Check if input has 8k sample rate, if not, change it
|
68 |
sr = get_sample_rate(input)
|
69 |
if sr != SAMPLE_RATE:
|
70 |
-
print("Changing sample rate...")
|
71 |
change_sample_rate(input, input_8k, SAMPLE_RATE)
|
72 |
else:
|
73 |
input_8k = input
|
74 |
|
75 |
# Check if input is stereo, if yes, set it to mono
|
76 |
if audio_is_stereo(input_8k):
|
77 |
-
print("Setting mono...")
|
78 |
set_mono(input_8k, input_8k_mono)
|
79 |
else:
|
80 |
input_8k_mono = input_8k
|
@@ -82,16 +86,16 @@ def main(args):
|
|
82 |
# Separate audio voices
|
83 |
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
84 |
separation = pipeline(Tasks.speech_separation, model='damo/speech_mossformer_separation_temporal_8k', device=device)
|
85 |
-
print("Separating...")
|
86 |
result = separation(input_8k_mono)
|
87 |
-
print("Separated!")
|
88 |
|
89 |
# Save separated audio voices
|
90 |
-
print("Saving...")
|
91 |
for i, signal in enumerate(result['output_pcm_list']):
|
92 |
-
save_file = f'{
|
93 |
sf.write(save_file, np.frombuffer(signal, dtype=np.int16), SAMPLE_RATE)
|
94 |
-
|
|
|
|
|
|
|
95 |
|
96 |
if __name__ == '__main__':
|
97 |
argparser = argparse.ArgumentParser(description='Separate speech from a stereo audio file')
|
|
|
60 |
input_name = input.split(".")[0]
|
61 |
output_name = output.split(".")[0]
|
62 |
|
63 |
+
# Get folder of output file
|
64 |
+
input_folder = input_name.split("/")[0]
|
65 |
+
output_folder = output_name.split("/")[0]
|
66 |
+
input_file_name = input_name.split("/")[1]
|
67 |
+
output_file_name = output_name.split("/")[1]
|
68 |
+
|
69 |
# Set input files with 8k sample rate and mono
|
70 |
input_8k = f"{input_name}_8k.wav"
|
71 |
input_8k_mono = f"{input_name}_8k_mono.wav"
|
|
|
73 |
# Check if input has 8k sample rate, if not, change it
|
74 |
sr = get_sample_rate(input)
|
75 |
if sr != SAMPLE_RATE:
|
|
|
76 |
change_sample_rate(input, input_8k, SAMPLE_RATE)
|
77 |
else:
|
78 |
input_8k = input
|
79 |
|
80 |
# Check if input is stereo, if yes, set it to mono
|
81 |
if audio_is_stereo(input_8k):
|
|
|
82 |
set_mono(input_8k, input_8k_mono)
|
83 |
else:
|
84 |
input_8k_mono = input_8k
|
|
|
86 |
# Separate audio voices
|
87 |
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
88 |
separation = pipeline(Tasks.speech_separation, model='damo/speech_mossformer_separation_temporal_8k', device=device)
|
|
|
89 |
result = separation(input_8k_mono)
|
|
|
90 |
|
91 |
# Save separated audio voices
|
|
|
92 |
for i, signal in enumerate(result['output_pcm_list']):
|
93 |
+
save_file = f'{output_folder}/{output_file_name}_speaker{i:003d}.wav'
|
94 |
sf.write(save_file, np.frombuffer(signal, dtype=np.int16), SAMPLE_RATE)
|
95 |
+
|
96 |
+
# Remove temporary files
|
97 |
+
os.remove(input_8k)
|
98 |
+
os.remove(input_8k_mono)
|
99 |
|
100 |
if __name__ == '__main__':
|
101 |
argparser = argparse.ArgumentParser(description='Separate speech from a stereo audio file')
|