Spaces:

mrfakename
/

E2-F5-TTS

Running on Zero

mrfakename commited on Nov 1

Commit

5a9adbc

•

1 Parent(s): fb41309

Sync from GitHub repo

This Space is synced from the GitHub repo: https://github.com/SWivid/F5-TTS. Please submit contributions to the Space there

Files changed (2) hide show

src/f5_tts/train/datasets/prepare_csv_wavs.py CHANGED Viewed

@@ -54,8 +54,7 @@ def prepare_csv_wavs_dir(input_dir):
 def get_audio_duration(audio_path):
     audio, sample_rate = torchaudio.load(audio_path)
-    num_channels = audio.shape[0]
-    return audio.shape[1] / (sample_rate * num_channels)
 def read_audio_text_pairs(csv_file_path):

 def get_audio_duration(audio_path):
     audio, sample_rate = torchaudio.load(audio_path)
+    return audio.shape[1] / sample_rate
 def read_audio_text_pairs(csv_file_path):

src/f5_tts/train/finetune_gradio.py CHANGED Viewed

@@ -172,10 +172,9 @@ def load_settings(project_name):
 # Load metadata
 def get_audio_duration(audio_path):
-    """Calculate the duration of an audio file."""
     audio, sample_rate = torchaudio.load(audio_path)
-    num_channels = audio.shape[0]
-    return audio.shape[1] / (sample_rate * num_channels)
 def clear_text(text):
@@ -383,13 +382,17 @@ def start_training(
     stream=False,
     logger="wandb",
 ):
-    global training_process, tts_api, stop_signal
-    if tts_api is not None:
-        del tts_api
         gc.collect()
         torch.cuda.empty_cache()
         tts_api = None
     path_project = os.path.join(path_data, dataset_name)
@@ -1557,7 +1560,7 @@ If you encounter a memory error, try reducing the batch size per GPU to a smalle
                 last_per_steps = gr.Number(label="Last per Steps", value=100)
             with gr.Row():
-                mixed_precision = gr.Radio(label="mixed_precision", choices=["none", "fp16", "fpb16"], value="none")
                 cd_logger = gr.Radio(label="logger", choices=["wandb", "tensorboard"], value="wandb")
                 start_button = gr.Button("Start Training")
                 stop_button = gr.Button("Stop Training", interactive=False)

 # Load metadata
 def get_audio_duration(audio_path):
+    """Calculate the duration mono of an audio file."""
     audio, sample_rate = torchaudio.load(audio_path)
+    return audio.shape[1] / sample_rate
 def clear_text(text):
     stream=False,
     logger="wandb",
 ):
+    global training_process, tts_api, stop_signal, pipe
+    if tts_api is not None or pipe is not None:
+        if tts_api is not None:
+            del tts_api
+        if pipe is not None:
+            del pipe
         gc.collect()
         torch.cuda.empty_cache()
         tts_api = None
+        pipe = None
     path_project = os.path.join(path_data, dataset_name)
                 last_per_steps = gr.Number(label="Last per Steps", value=100)
             with gr.Row():
+                mixed_precision = gr.Radio(label="mixed_precision", choices=["none", "fp16", "bf16"], value="none")
                 cd_logger = gr.Radio(label="logger", choices=["wandb", "tensorboard"], value="wandb")
                 start_button = gr.Button("Start Training")
                 stop_button = gr.Button("Stop Training", interactive=False)