Spaces:

mazalaai
/

tts

Sleeping

App Files Files Community

MAZALA2024 commited on Oct 21, 2024

Commit

b2461b4

verified ·

1 Parent(s): 3ae57b7

Update voice_processing.py

Browse files

Files changed (1) hide show

voice_processing.py +24 -28

voice_processing.py CHANGED Viewed

@@ -34,11 +34,12 @@ limitation = os.getenv("SYSTEM") == "spaces"
 config = Config()
-# Edge TTS
-tts_voice_list = asyncio.get_event_loop().run_until_complete(edge_tts.list_voices())
-tts_voices = ["mn-MN-BataaNeural", "mn-MN-YesuiNeural"]  # Specific voices
-# RVC models
 model_root = "weights"
 models = [d for d in os.listdir(model_root) if os.path.isdir(f"{model_root}/{d}")]
 models.sort()
@@ -46,7 +47,12 @@ models.sort()
 def get_unique_filename(extension):
     return f"{uuid.uuid4()}.{extension}"
 def model_data(model_name):
     pth_path = [
         f"{model_root}/{model_name}/{f}"
         for f in os.listdir(f"{model_root}/{model_name}")
@@ -92,7 +98,8 @@ def model_data(model_name):
         index_file = index_files[0]
         print(f"Index file found: {index_file}")
-    return tgt_sr, net_g, vc, version, index_file, if_f0
 def load_hubert():
     models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
@@ -108,21 +115,14 @@ def load_hubert():
     return hubert_model.eval()
 def get_model_names():
-    model_root = "weights"  # Assuming this is where your models are stored
     return [d for d in os.listdir(model_root) if os.path.isdir(f"{model_root}/{d}")]
-# Add this helper function to ensure a new event loop is created if none exists
-def run_async_in_thread(fn, *args):
-    loop = asyncio.new_event_loop()
-    asyncio.set_event_loop(loop)
-    result = loop.run_until_complete(fn(*args))
-    loop.close()
-    return result
 def parallel_tts(tasks):
-    with ThreadPoolExecutor() as executor:
-        futures = [executor.submit(run_async_in_thread, tts, *task) for task in tasks]
-        results = [future.result() for future in futures]
     return results
 async def tts(
@@ -133,7 +133,7 @@ async def tts(
     use_uploaded_voice,
     uploaded_voice,
 ):
-    # Default values for parameters used in EdgeTTS
     speed = 0  # Default speech speed
     f0_up_key = 0  # Default pitch adjustment
     f0_method = "rmvpe"  # Default pitch extraction method
@@ -160,7 +160,7 @@ async def tts(
             # EdgeTTS processing
             if limitation and len(tts_text) > 12000:
                 return (
-                    f"Text characters should be at most 12000 in this huggingface space, but got {len(tts_text)} characters.",
                     None,
                     None,
                 )
@@ -179,14 +179,15 @@ async def tts(
         # Common processing after loading the audio
         duration = len(audio) / sr
         print(f"Audio duration: {duration}s")
-        if limitation and duration >= 20000:
             return (
-                f"Audio should be less than 20 seconds in this huggingface space, but got {duration}s.",
                 None,
                 None,
             )
         f0_up_key = int(f0_up_key)
         tgt_sr, net_g, vc, version, index_file, if_f0 = model_data(model_name)
         # Setup for RMVPE or other pitch extraction methods
@@ -229,7 +230,7 @@ async def tts(
     except EOFError:
         info = (
-            "output not valid. This may occur when input text and speaker do not match."
         )
         print(info)
         return info, None, None
@@ -238,11 +239,6 @@ async def tts(
         print(traceback_info)
         return str(e), None, None
-voice_mapping = {
-    "Mongolian Male": "mn-MN-BataaNeural",
-    "Mongolian Female": "mn-MN-YesuiNeural"
-}
 hubert_model = load_hubert()
-rmvpe_model = RMVPE("rmvpe.pt", config.is_half, config.device)

 config = Config()
+# Edge TTS voices
+loop = asyncio.get_event_loop()
+tts_voice_list = loop.run_until_complete(edge_tts.list_voices())
+tts_voices = ["mn-MN-BataaNeural", "mn-MN-YesuiNeural"]
+# RVC models directory
 model_root = "weights"
 models = [d for d in os.listdir(model_root) if os.path.isdir(f"{model_root}/{d}")]
 models.sort()
 def get_unique_filename(extension):
     return f"{uuid.uuid4()}.{extension}"
+model_cache = {}
 def model_data(model_name):
+    if model_name in model_cache:
+        return model_cache[model_name]
     pth_path = [
         f"{model_root}/{model_name}/{f}"
         for f in os.listdir(f"{model_root}/{model_name}")
         index_file = index_files[0]
         print(f"Index file found: {index_file}")
+    model_cache[model_name] = (tgt_sr, net_g, vc, version, index_file, if_f0)
+    return model_cache[model_name]
 def load_hubert():
     models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
     return hubert_model.eval()
 def get_model_names():
     return [d for d in os.listdir(model_root) if os.path.isdir(f"{model_root}/{d}")]
+# Initialize a global ThreadPoolExecutor
+executor = ThreadPoolExecutor(max_workers=20)  # Adjust based on your server
 def parallel_tts(tasks):
+    futures = [executor.submit(run_async_in_thread, tts, *task) for task in tasks]
+    results = [future.result() for future in futures]
     return results
 async def tts(
     use_uploaded_voice,
     uploaded_voice,
 ):
+    # Default values for parameters
     speed = 0  # Default speech speed
     f0_up_key = 0  # Default pitch adjustment
     f0_method = "rmvpe"  # Default pitch extraction method
             # EdgeTTS processing
             if limitation and len(tts_text) > 12000:
                 return (
+                    f"Text characters should be at most 12000 in this Hugging Face Space, but got {len(tts_text)} characters.",
                     None,
                     None,
                 )
         # Common processing after loading the audio
         duration = len(audio) / sr
         print(f"Audio duration: {duration}s")
+        if limitation and duration >= 20:
             return (
+                f"Audio should be less than 20 seconds in this Hugging Face Space, but got {duration}s.",
                 None,
                 None,
             )
         f0_up_key = int(f0_up_key)
+        # Load the model using cached data
         tgt_sr, net_g, vc, version, index_file, if_f0 = model_data(model_name)
         # Setup for RMVPE or other pitch extraction methods
     except EOFError:
         info = (
+            "Output not valid. This may occur when input text and speaker do not match."
         )
         print(info)
         return info, None, None
         print(traceback_info)
         return str(e), None, None
+# Initialize the global models
 hubert_model = load_hubert()
+rmvpe_model = RMVPE("rmvpe.pt", config.is_half, config.device)