Spaces:

fffiloni
/

instant-TTS-Bark-cloning

Paused

App Files Files Community

fffiloni commited on Aug 22, 2023

Commit

0cdadc9

•

1 Parent(s): 0bd14d2

Update app.py

Browse files

Files changed (1) hide show

app.py +65 -2

app.py CHANGED Viewed

@@ -71,8 +71,48 @@ def infer(prompt, input_wav_file):
     for item in contents:
         print(item)
-    return "output.wav", f"bark_voices/{file_name}/{contents[1]}"
 css = """
 #col-container {max-width: 780px; margin-left: auto; margin-right: auto;}
 """
@@ -95,6 +135,7 @@ with gr.Blocks(css=css) as demo:
         )
         submit_btn = gr.Button("Submit")
         cloned_out = gr.Audio(
             label="Text to speech output"
@@ -112,7 +153,29 @@ with gr.Blocks(css=css) as demo:
         ],
         outputs = [
             cloned_out,
-            npz_file
         ]
     )

     for item in contents:
         print(item)
+    return "output.wav", f"bark_voices/{file_name}/{contents[1]}", gr.update(visible=False), gr.update(visible=True)
+def infer_with_npz(prompt, input_wav_file):
+    # Path to your WAV file
+    source_path = input_wav_file
+    # Extract the file name without the extension
+    file_name = os.path.splitext(os.path.basename(source_path))[0]
+    # List all the files and subdirectories in the given directory
+    contents = os.listdir(f"bark_voices/{file_name}")
+    # Print the contents
+    for item in contents:
+        print(item)
+    os.remove(contents[0])
+    # cloning a speaker.
+    text = prompt
+    # It assumes that you have a speaker file in `bark_voices/speaker_n/speaker.npz`
+    output_dict = model.synthesize(
+        text,
+        config,
+        speaker_id=f"{file_name}",
+        voice_dirs="bark_voices/"
+    )
+    print(output_dict)
+    sample_rate = 24000  # Replace with the actual sample rate
+    wavfile.write(
+        'output.wav',
+        sample_rate,
+        output_dict['wav']
+    )
+    # Print again the contents
+    for item in contents:
+        print(item)
+    return 'output.wav'
+def uploaded_audio():
+    return gr.update(visible=True), gr.update(visible=False)
 css = """
 #col-container {max-width: 780px; margin-left: auto; margin-right: auto;}
 """
         )
         submit_btn = gr.Button("Submit")
+        submit_with_npz_btn = gr.Button("Submit 2", visible=False)
         cloned_out = gr.Audio(
             label="Text to speech output"
         ],
         outputs = [
             cloned_out,
+            npz_file,
+            submit_btn,
+            submit_with_npz_btn
+        ]
+    )
+    submit_with_npz_btn.click(
+        fn = infer_with_npz,
+        inputs = [
+            prompt,
+            audio_in
+        ],
+        outputs = [
+            cloned_out
+        ]
+    )
+    audio_in.upload(
+        fn=uploaded_audio,
+        inputs=[],
+        outputs=[
+            submit_btn,
+            submit_with_npz_btn
         ]
     )