Spaces:

StormblessedKal
/

testspace

Runtime error

App Files Files

StormblessedKal commited on Jan 3, 2024

Commit

e385e48

1 Parent(s): 0951938

exception

Browse files

Files changed (3) hide show

src/__pycache__/predict.cpython-310.pyc +0 -0
src/predict.py +25 -21
src/se_extractor.py +2 -4

src/__pycache__/predict.cpython-310.pyc CHANGED Viewed

Binary files a/src/__pycache__/predict.cpython-310.pyc and b/src/__pycache__/predict.cpython-310.pyc differ

src/predict.py CHANGED Viewed

@@ -266,27 +266,31 @@ class Predictor:
                     "voice_clone_2":f"https://demovidelyusergenerations.s3.amazonaws.com/{gen_id}-voice-clone-2.mp3"
                     }
         if method_type == 'voice_clone_with_emotions':
-            print("INSIDE emotions")
-            base_speaker_tts,tone_color_converter = self.base_speaker_tts,self.tone_color_converter
-            reference_speaker = local_file_path
-            target_se, audio_name = se_extractor.get_se(reference_speaker, tone_color_converter, target_dir=openvoice_dir, vad=False)
-            src_path = os.path.join(results_dir,f"{gen_id}-tmp-emotions.wav")
-            openvoice_output = os.path.join(results_dir,f"{gen_id}-4.wav")
-            base_speaker_tts.tts(passage,src_path,speaker='default',language='English',speed=1.0,use_emotions=True)
-            source_se = torch.load(f'{self.ckpt_base}/en_style_se.pth').to(self.device)
-            tone_color_converter.convert(audio_src_path=src_path,src_se=source_se,tgt_se=target_se,output_path=openvoice_output,message='')
-            if process_audio:
-                (new_sr, wav1) = self._fn(openvoice_output,"Midpoint",32,0.5)
-                sf.write(openvoice_output,wav1,new_sr)
-            mp3_final_output_1 = str(openvoice_output).replace('wav','mp3')
-            self.convert_wav_to_mp3(openvoice_output,mp3_final_output_1)
-            print(mp3_final_output_1)
-            self.upload_file_to_s3(mp3_final_output_1,'demovidelyusergenerations',f"{gen_id}-voice-with-emotions.mp3")
-            shutil.rmtree(os.path.join(output_dir,gen_id))
-            return {"voice_clone_with_emotions":f"https://demovidelyusergenerations.s3.amazonaws.com/{gen_id}-voice-with-emotions.mp3"
-                    }
         if method_type == 'voice_clone_with_multi_lang':
             print("Inside multilang")
             #voice clone with multi-lingugal

                     "voice_clone_2":f"https://demovidelyusergenerations.s3.amazonaws.com/{gen_id}-voice-clone-2.mp3"
                     }
         if method_type == 'voice_clone_with_emotions':
+            try:
+                print("INSIDE emotions")
+                base_speaker_tts,tone_color_converter = self.base_speaker_tts,self.tone_color_converter
+                reference_speaker = local_file_path
+                print("here 1")
+                target_se, audio_name = se_extractor.get_se(reference_speaker, tone_color_converter, target_dir=openvoice_dir, vad=False)
+                print("here 2")
+                src_path = os.path.join(results_dir,f"{gen_id}-tmp-emotions.wav")
+                openvoice_output = os.path.join(results_dir,f"{gen_id}-4.wav")
+                base_speaker_tts.tts(passage,src_path,speaker='default',language='English',speed=1.0,use_emotions=True)
+                source_se = torch.load(f'{self.ckpt_base}/en_style_se.pth').to(self.device)
+                tone_color_converter.convert(audio_src_path=src_path,src_se=source_se,tgt_se=target_se,output_path=openvoice_output,message='')
+                if process_audio:
+                    (new_sr, wav1) = self._fn(openvoice_output,"Midpoint",32,0.5)
+                    sf.write(openvoice_output,wav1,new_sr)
+                mp3_final_output_1 = str(openvoice_output).replace('wav','mp3')
+                self.convert_wav_to_mp3(openvoice_output,mp3_final_output_1)
+                print(mp3_final_output_1)
+                self.upload_file_to_s3(mp3_final_output_1,'demovidelyusergenerations',f"{gen_id}-voice-with-emotions.mp3")
+                shutil.rmtree(os.path.join(output_dir,gen_id))
+                return {"voice_clone_with_emotions":f"https://demovidelyusergenerations.s3.amazonaws.com/{gen_id}-voice-with-emotions.mp3"
+                        }
+            except Exception as e:
+                return {"error":f"Unexpected error{e}"}
         if method_type == 'voice_clone_with_multi_lang':
             print("Inside multilang")
             #voice clone with multi-lingugal

src/se_extractor.py CHANGED Viewed

@@ -7,13 +7,11 @@ from pydub import AudioSegment
 from faster_whisper import WhisperModel
 from whisper_timestamped.transcribe import get_audio_tensor, get_vad_segments
-model_size = "medium"
 # Run on GPU with FP16
 model = None
 def split_audio_whisper(audio_path, target_dir='processed',needs_offset=True):
-    global model
-    if model is None:
-        model = WhisperModel(model_size, device="cuda", compute_type="float16")
     audio = AudioSegment.from_file(audio_path)
     max_len = len(audio)

 from faster_whisper import WhisperModel
 from whisper_timestamped.transcribe import get_audio_tensor, get_vad_segments
 # Run on GPU with FP16
 model = None
 def split_audio_whisper(audio_path, target_dir='processed',needs_offset=True):
+    print("in whisper split")
+    model = WhisperModel('medium', device="cuda:0", compute_type="float16")
     audio = AudioSegment.from_file(audio_path)
     max_len = len(audio)