Spaces:
Runtime error
Runtime error
Commit
·
e385e48
1
Parent(s):
0951938
exception
Browse files- src/__pycache__/predict.cpython-310.pyc +0 -0
- src/predict.py +25 -21
- src/se_extractor.py +2 -4
src/__pycache__/predict.cpython-310.pyc
CHANGED
Binary files a/src/__pycache__/predict.cpython-310.pyc and b/src/__pycache__/predict.cpython-310.pyc differ
|
|
src/predict.py
CHANGED
@@ -266,27 +266,31 @@ class Predictor:
|
|
266 |
"voice_clone_2":f"https://demovidelyusergenerations.s3.amazonaws.com/{gen_id}-voice-clone-2.mp3"
|
267 |
}
|
268 |
if method_type == 'voice_clone_with_emotions':
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
|
|
|
|
|
|
|
|
290 |
if method_type == 'voice_clone_with_multi_lang':
|
291 |
print("Inside multilang")
|
292 |
#voice clone with multi-lingugal
|
|
|
266 |
"voice_clone_2":f"https://demovidelyusergenerations.s3.amazonaws.com/{gen_id}-voice-clone-2.mp3"
|
267 |
}
|
268 |
if method_type == 'voice_clone_with_emotions':
|
269 |
+
try:
|
270 |
+
print("INSIDE emotions")
|
271 |
+
base_speaker_tts,tone_color_converter = self.base_speaker_tts,self.tone_color_converter
|
272 |
+
reference_speaker = local_file_path
|
273 |
+
print("here 1")
|
274 |
+
target_se, audio_name = se_extractor.get_se(reference_speaker, tone_color_converter, target_dir=openvoice_dir, vad=False)
|
275 |
+
print("here 2")
|
276 |
+
src_path = os.path.join(results_dir,f"{gen_id}-tmp-emotions.wav")
|
277 |
+
openvoice_output = os.path.join(results_dir,f"{gen_id}-4.wav")
|
278 |
+
base_speaker_tts.tts(passage,src_path,speaker='default',language='English',speed=1.0,use_emotions=True)
|
279 |
+
source_se = torch.load(f'{self.ckpt_base}/en_style_se.pth').to(self.device)
|
280 |
+
tone_color_converter.convert(audio_src_path=src_path,src_se=source_se,tgt_se=target_se,output_path=openvoice_output,message='')
|
281 |
+
if process_audio:
|
282 |
+
(new_sr, wav1) = self._fn(openvoice_output,"Midpoint",32,0.5)
|
283 |
+
sf.write(openvoice_output,wav1,new_sr)
|
284 |
+
|
285 |
+
mp3_final_output_1 = str(openvoice_output).replace('wav','mp3')
|
286 |
+
self.convert_wav_to_mp3(openvoice_output,mp3_final_output_1)
|
287 |
+
print(mp3_final_output_1)
|
288 |
+
self.upload_file_to_s3(mp3_final_output_1,'demovidelyusergenerations',f"{gen_id}-voice-with-emotions.mp3")
|
289 |
+
shutil.rmtree(os.path.join(output_dir,gen_id))
|
290 |
+
return {"voice_clone_with_emotions":f"https://demovidelyusergenerations.s3.amazonaws.com/{gen_id}-voice-with-emotions.mp3"
|
291 |
+
}
|
292 |
+
except Exception as e:
|
293 |
+
return {"error":f"Unexpected error{e}"}
|
294 |
if method_type == 'voice_clone_with_multi_lang':
|
295 |
print("Inside multilang")
|
296 |
#voice clone with multi-lingugal
|
src/se_extractor.py
CHANGED
@@ -7,13 +7,11 @@ from pydub import AudioSegment
|
|
7 |
from faster_whisper import WhisperModel
|
8 |
from whisper_timestamped.transcribe import get_audio_tensor, get_vad_segments
|
9 |
|
10 |
-
model_size = "medium"
|
11 |
# Run on GPU with FP16
|
12 |
model = None
|
13 |
def split_audio_whisper(audio_path, target_dir='processed',needs_offset=True):
|
14 |
-
|
15 |
-
|
16 |
-
model = WhisperModel(model_size, device="cuda", compute_type="float16")
|
17 |
audio = AudioSegment.from_file(audio_path)
|
18 |
max_len = len(audio)
|
19 |
|
|
|
7 |
from faster_whisper import WhisperModel
|
8 |
from whisper_timestamped.transcribe import get_audio_tensor, get_vad_segments
|
9 |
|
|
|
10 |
# Run on GPU with FP16
|
11 |
model = None
|
12 |
def split_audio_whisper(audio_path, target_dir='processed',needs_offset=True):
|
13 |
+
print("in whisper split")
|
14 |
+
model = WhisperModel('medium', device="cuda:0", compute_type="float16")
|
|
|
15 |
audio = AudioSegment.from_file(audio_path)
|
16 |
max_len = len(audio)
|
17 |
|