rvc-models

Runtime error

App Files Files Community

ArkanDash commited on May 18, 2023

Commit

d3873a6

1 Parent(s): 4d12c76

feat(app): added support direct upload for gcolab

Browse files

Files changed (6) hide show

.gitattributes +2 -31
app-full.py +50 -49
app.py +15 -23
requirements-full.txt +49 -0
weights/ayaka-jp/cover.png +0 -0
weights/nilou-jp/cover.png +0 -0

.gitattributes CHANGED Viewed

@@ -32,35 +32,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
-weights/alice/added_IVF141_Flat_nprobe_4.index filter=lfs diff=lfs merge=lfs -text
-weights/ayaka-jp/added_IVF1830_Flat_nprobe_9.index filter=lfs diff=lfs merge=lfs -text
-weights/nilou-zh/added_IVF1939_Flat_nprobe_9.index filter=lfs diff=lfs merge=lfs -text
-weights/teio/added_IVF3421_Flat_nprobe_11.index filter=lfs diff=lfs merge=lfs -text
-weights/ayaka-jp/added_IVF415_Flat_nprobe_6.index filter=lfs diff=lfs merge=lfs -text
-weights/klee-jp/added_IVF282_Flat_nprobe_5.index filter=lfs diff=lfs merge=lfs -text
-weights/klee-jp/cover.png filter=lfs diff=lfs merge=lfs -text
-weights/nahida-jp/added_IVF265_Flat_nprobe_5.index filter=lfs diff=lfs merge=lfs -text
-weights/nahida-jp/cover.png filter=lfs diff=lfs merge=lfs -text
-weights/hutao-jp/added_IVF265_Flat_nprobe_5.index filter=lfs diff=lfs merge=lfs -text
-weights/hutao-jp/cover.png filter=lfs diff=lfs merge=lfs -text
-weights/raiden-jp/added_IVF783_Flat_nprobe_7.index filter=lfs diff=lfs merge=lfs -text
-weights/raiden-jp/cover.png filter=lfs diff=lfs merge=lfs -text
-weights/kazuha-jp/added_IVF677_Flat_nprobe_7.index filter=lfs diff=lfs merge=lfs -text
-weights/kazuha-jp/cover.png filter=lfs diff=lfs merge=lfs -text
-weights/childe-jp/added_IVF428_Flat_nprobe_6.index filter=lfs diff=lfs merge=lfs -text
-weights/childe-jp/cover.png filter=lfs diff=lfs merge=lfs -text
-weights/noah/added_IVF467_Flat_nprobe_6.index filter=lfs diff=lfs merge=lfs -text
-weights/noah/cover.jpeg filter=lfs diff=lfs merge=lfs -text
-weights/rie/added_IVF325_Flat_nprobe_5.index filter=lfs diff=lfs merge=lfs -text
-weights/rie/cover.png filter=lfs diff=lfs merge=lfs -text
-weights/wanderer-jp/added_IVF128_Flat_nprobe_4.index filter=lfs diff=lfs merge=lfs -text
-weights/wanderer-jp/cover.png filter=lfs diff=lfs merge=lfs -text
-weights/xiao-jp/cover.png filter=lfs diff=lfs merge=lfs -text
-weights/xiao-jp/added_IVF233_Flat_nprobe_5.index filter=lfs diff=lfs merge=lfs -text
-weights/anji/added_IVF198_Flat_nprobe_4.index filter=lfs diff=lfs merge=lfs -text
-weights/anji/cover.png filter=lfs diff=lfs merge=lfs -text
-weights/ariana/added_IVF133_Flat_nprobe_4.index filter=lfs diff=lfs merge=lfs -text
-weights/ariana/cover.png filter=lfs diff=lfs merge=lfs -text
-weights/once/added_IVF229_Flat_nprobe_5.index filter=lfs diff=lfs merge=lfs -text
-weights/once/cover.png filter=lfs diff=lfs merge=lfs -text
 *.index filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 *.index filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text
+*.jpg filter=lfs diff=lfs merge=lfs -text

app-full.py CHANGED Viewed

@@ -29,6 +29,8 @@ limitation = os.getenv("SYSTEM") == "spaces"  # limit audio length in huggingfac
 def create_vc_fn(tgt_sr, net_g, vc, if_f0, file_index, file_big_npy):
     def vc_fn(
         input_audio,
         f0_up_key,
         f0_method,
         index_rate,
@@ -45,20 +47,18 @@ def create_vc_fn(tgt_sr, net_g, vc, if_f0, file_index, file_big_npy):
                 asyncio.run(edge_tts.Communicate(tts_text, "-".join(tts_voice.split('-')[:-1])).save("tts.mp3"))
                 audio, sr = librosa.load("tts.mp3", sr=16000, mono=True)
             else:
-                if args.files:
-                    audio, sr = librosa.load(input_audio, sr=16000, mono=True)
-                else:
                     if input_audio is None:
                         return "You need to upload an audio", None
-                    sampling_rate, audio = input_audio
                     duration = audio.shape[0] / sampling_rate
-                    if duration > 20 and limitation:
-                        return "Please upload an audio file that is less than 20 seconds. If you need to generate a longer audio file, please use Colab.", None
                     audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
                     if len(audio.shape) > 1:
                         audio = librosa.to_mono(audio.transpose(1, 0))
                     if sampling_rate != 16000:
                         audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
             times = [0, 0, 0]
             f0_up_key = int(f0_up_key)
             audio_opt = vc.pipeline(
@@ -86,31 +86,31 @@ def create_vc_fn(tgt_sr, net_g, vc, if_f0, file_index, file_big_npy):
 def cut_vocal_and_inst(yt_url):
     if yt_url != "":
-        if not os.path.exists("/content/youtube_audio"):
-            os.mkdir("/content/youtube_audio")
         ydl_opts = {
             'format': 'bestaudio/best',
             'postprocessors': [{
                 'key': 'FFmpegExtractAudio',
                 'preferredcodec': 'wav',
             }],
-            "outtmpl": '/content/youtube_audio/audio',
         }
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
             ydl.download([yt_url])
-        yt_audio_path = "/content/youtube_audio/audio.wav"
         command = f"demucs --two-stems=vocals {yt_audio_path}"
         result = subprocess.run(command.split(), stdout=subprocess.PIPE)
         print(result.stdout.decode())
-        return ("/content/rvc-models/separated/htdemucs/audio/vocals.wav", "/content/rvc-models/separated/htdemucs/audio/no_vocals.wav", yt_audio_path, "/content/rvc-models/separated/htdemucs/audio/vocals.wav")
 def combine_vocal_and_inst(audio_data, audio_volume):
     print(audio_data)
-    if not os.path.exists("/content/result"):
-        os.mkdir("/content/result")
-    vocal_path = "/content/result/output.wav"
-    inst_path = "/content/rvc-models/separated/htdemucs/audio/no_vocals.wav"
-    output_path = "/content/result/combine.mp3"
     with wave.open(vocal_path, "w") as wave_file:
         wave_file.setnchannels(1)
         wave_file.setsampwidth(2)
@@ -140,11 +140,16 @@ def change_to_tts_mode(tts_mode):
     else:
         return gr.Audio.update(visible=True), gr.Textbox.update(visible=False), gr.Dropdown.update(visible=False)
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
     parser.add_argument('--api', action="store_true", default=False)
-    parser.add_argument("--share", action="store_true", default=False, help="share gradio app")
-    parser.add_argument("--files", action="store_true", default=False, help="load audio from path")
     args, unknown = parser.parse_known_args()
     load_hubert()
     models = []
@@ -182,7 +187,6 @@ if __name__ == '__main__':
             "# <center> RVC Models\n"
             "## <center> The input audio should be clean and pure voice without background music.\n"
             "### <center> More feature will be added soon... \n"
-            "![visitor badge](https://visitor-badge.glitch.me/badge?page_id=ArkanDash.Rvc-Models)\n\n"
             "[![image](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1hx6kKvIuv5XNY1Gai2PEuZhpO5z6xpVh?usp=sharing)\n\n"
             "[![Original Repo](https://badgen.net/badge/icon/github?icon=github&label=Original%20Repo)](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI)"
         )
@@ -198,18 +202,16 @@ if __name__ == '__main__':
                             '</div>'
                         )
                     with gr.Row():
-                        if args.files:
-                            with gr.Column():
-                                vc_youtube = gr.Textbox(label="Youtube URL")
-                                vc_convert = gr.Button("Convert", variant="primary")
-                                vc_vocal_preview = gr.Audio(label="Vocal Preview")
-                                vc_inst_preview = gr.Audio(label="Instrumental Preview")
-                                vc_audio_preview = gr.Audio(label="Audio Preview")
                         with gr.Column():
-                            if args.files:
-                                vc_input = gr.Textbox(label="Input audio path")
-                            else:
-                                vc_input = gr.Audio(label="Input audio"+' (less than 20 seconds)' if limitation else '')
                             vc_transpose = gr.Number(label="Transpose", value=0)
                             vc_f0method = gr.Radio(
                                 label="Pitch extraction algorithm, PM is fast but Harvest is better for low frequencies",
@@ -227,24 +229,23 @@ if __name__ == '__main__':
                             tts_mode = gr.Checkbox(label="tts (use edge-tts as input)", value=False)
                             tts_text = gr.Textbox(visible=False,label="TTS text (100 words limitation)" if limitation else "TTS text")
                             tts_voice = gr.Dropdown(label="Edge-tts speaker", choices=voices, visible=False, allow_custom_value=False, value="en-US-AnaNeural-Female")
-                            vc_submit = gr.Button("Generate", variant="primary")
                             vc_output1 = gr.Textbox(label="Output Message")
                             vc_output2 = gr.Audio(label="Output Audio")
-                        if args.files:
-                            with gr.Column():
-                                vc_volume = gr.Slider(
-                                    minimum=0,
-                                    maximum=10,
-                                    label="Vocal volume",
-                                    value=5,
-                                    interactive=True,
-                                    step=1
-                                )
-                                vc_outputCombine = gr.Audio(label="Output Combined Audio")
-                                vc_combine =  gr.Button("Combine",variant="primary")
-                vc_submit.click(vc_fn, [vc_input, vc_transpose, vc_f0method, vc_index_ratio, tts_mode, tts_text, tts_voice], [vc_output1, vc_output2])
-                tts_mode.change(change_to_tts_mode, [tts_mode], [vc_input, tts_text, tts_voice])
-                if args.files:
-                    vc_convert.click(cut_vocal_and_inst, vc_youtube, [vc_vocal_preview, vc_inst_preview, vc_audio_preview, vc_input])
-                    vc_combine.click(combine_vocal_and_inst, [vc_output2, vc_volume], vc_outputCombine)
-        app.queue(concurrency_count=1, max_size=20, api_open=args.api).launch(share=args.share)

 def create_vc_fn(tgt_sr, net_g, vc, if_f0, file_index, file_big_npy):
     def vc_fn(
         input_audio,
+        upload_audio,
+        upload_mode,
         f0_up_key,
         f0_method,
         index_rate,
                 asyncio.run(edge_tts.Communicate(tts_text, "-".join(tts_voice.split('-')[:-1])).save("tts.mp3"))
                 audio, sr = librosa.load("tts.mp3", sr=16000, mono=True)
             else:
+                if upload_mode:
                     if input_audio is None:
                         return "You need to upload an audio", None
+                    sampling_rate, audio = upload_audio
                     duration = audio.shape[0] / sampling_rate
                     audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
                     if len(audio.shape) > 1:
                         audio = librosa.to_mono(audio.transpose(1, 0))
                     if sampling_rate != 16000:
                         audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
+                else:
+                    audio, sr = librosa.load(input_audio, sr=16000, mono=True)
             times = [0, 0, 0]
             f0_up_key = int(f0_up_key)
             audio_opt = vc.pipeline(
 def cut_vocal_and_inst(yt_url):
     if yt_url != "":
+        if not os.path.exists("youtube_audio"):
+            os.mkdir("youtube_audio")
         ydl_opts = {
             'format': 'bestaudio/best',
             'postprocessors': [{
                 'key': 'FFmpegExtractAudio',
                 'preferredcodec': 'wav',
             }],
+            "outtmpl": 'youtube_audio/audio',
         }
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
             ydl.download([yt_url])
+        yt_audio_path = "youtube_audio/audio.wav"
         command = f"demucs --two-stems=vocals {yt_audio_path}"
         result = subprocess.run(command.split(), stdout=subprocess.PIPE)
         print(result.stdout.decode())
+        return ("separated/htdemucs/audio/vocals.wav", "separated/htdemucs/audio/no_vocals.wav", yt_audio_path, "separated/htdemucs/audio/vocals.wav")
 def combine_vocal_and_inst(audio_data, audio_volume):
     print(audio_data)
+    if not os.path.exists("result"):
+        os.mkdir("result")
+    vocal_path = "result/output.wav"
+    inst_path = "separated/htdemucs/audio/no_vocals.wav"
+    output_path = "result/combine.mp3"
     with wave.open(vocal_path, "w") as wave_file:
         wave_file.setnchannels(1)
         wave_file.setsampwidth(2)
     else:
         return gr.Audio.update(visible=True), gr.Textbox.update(visible=False), gr.Dropdown.update(visible=False)
+def change_to_upload_mode(upload_mode):
+    if upload_mode:
+        return gr.Textbox().update(visible=False), gr.Audio().update(visible=True)
+    else:
+        return gr.Textbox().update(visible=True), gr.Audio().update(visible=False)
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
     parser.add_argument('--api', action="store_true", default=False)
+    parser.add_argument("--colab", action="store_true", default=False, help="share gradio app")
     args, unknown = parser.parse_known_args()
     load_hubert()
     models = []
             "# <center> RVC Models\n"
             "## <center> The input audio should be clean and pure voice without background music.\n"
             "### <center> More feature will be added soon... \n"
             "[![image](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1hx6kKvIuv5XNY1Gai2PEuZhpO5z6xpVh?usp=sharing)\n\n"
             "[![Original Repo](https://badgen.net/badge/icon/github?icon=github&label=Original%20Repo)](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI)"
         )
                             '</div>'
                         )
                     with gr.Row():
                         with gr.Column():
+                            vc_youtube = gr.Textbox(label="Youtube URL")
+                            vc_convert = gr.Button("Convert", variant="primary")
+                            vc_vocal_preview = gr.Audio(label="Vocal Preview")
+                            vc_inst_preview = gr.Audio(label="Instrumental Preview")
+                            vc_audio_preview = gr.Audio(label="Audio Preview")
+                        with gr.Column():
+                            vc_input = gr.Textbox(label="Input audio path")
+                            vc_upload = gr.Audio(label="Upload audio file", visible=False, interactive=True)
+                            upload_mode = gr.Checkbox(label="Upload mode", value=False)
                             vc_transpose = gr.Number(label="Transpose", value=0)
                             vc_f0method = gr.Radio(
                                 label="Pitch extraction algorithm, PM is fast but Harvest is better for low frequencies",
                             tts_mode = gr.Checkbox(label="tts (use edge-tts as input)", value=False)
                             tts_text = gr.Textbox(visible=False,label="TTS text (100 words limitation)" if limitation else "TTS text")
                             tts_voice = gr.Dropdown(label="Edge-tts speaker", choices=voices, visible=False, allow_custom_value=False, value="en-US-AnaNeural-Female")
                             vc_output1 = gr.Textbox(label="Output Message")
                             vc_output2 = gr.Audio(label="Output Audio")
+                            vc_submit = gr.Button("Generate", variant="primary")
+                        with gr.Column():
+                            vc_volume = gr.Slider(
+                                minimum=0,
+                                maximum=10,
+                                label="Vocal volume",
+                                value=4,
+                                interactive=True,
+                                step=1
+                            )
+                            vc_outputCombine = gr.Audio(label="Output Combined Audio")
+                            vc_combine =  gr.Button("Combine",variant="primary")
+                vc_submit.click(vc_fn, [vc_input, vc_upload, upload_mode, vc_transpose, vc_f0method, vc_index_ratio, tts_mode, tts_text, tts_voice], [vc_output1, vc_output2])
+                vc_convert.click(cut_vocal_and_inst, vc_youtube, [vc_vocal_preview, vc_inst_preview, vc_audio_preview, vc_input])
+                vc_combine.click(combine_vocal_and_inst, [vc_output2, vc_volume], vc_outputCombine)
+                tts_mode.change(change_to_tts_mode, [tts_mode, upload_mode], [vc_input, vc_upload, upload_mode, tts_text, tts_voice])
+                upload_mode.change(change_to_upload_mode, [upload_mode], [vc_input, vc_upload])
+        app.queue(concurrency_count=1, max_size=20, api_open=args.api).launch(share=args.colab)

app.py CHANGED Viewed

@@ -39,20 +39,17 @@ def create_vc_fn(tgt_sr, net_g, vc, if_f0, file_index, file_big_npy):
                 asyncio.run(edge_tts.Communicate(tts_text, "-".join(tts_voice.split('-')[:-1])).save("tts.mp3"))
                 audio, sr = librosa.load("tts.mp3", sr=16000, mono=True)
             else:
-                if args.files:
-                    audio, sr = librosa.load(input_audio, sr=16000, mono=True)
-                else:
-                    if input_audio is None:
-                        return "You need to upload an audio", None
-                    sampling_rate, audio = input_audio
-                    duration = audio.shape[0] / sampling_rate
-                    if duration > 20 and limitation:
-                        return "Please upload an audio file that is less than 20 seconds. If you need to generate a longer audio file, please use Colab.", None
-                    audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
-                    if len(audio.shape) > 1:
-                        audio = librosa.to_mono(audio.transpose(1, 0))
-                    if sampling_rate != 16000:
-                        audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
             times = [0, 0, 0]
             f0_up_key = int(f0_up_key)
             audio_opt = vc.pipeline(
@@ -101,8 +98,7 @@ def change_to_tts_mode(tts_mode):
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
     parser.add_argument('--api', action="store_true", default=False)
-    parser.add_argument("--share", action="store_true", default=False, help="share gradio app")
-    parser.add_argument("--files", action="store_true", default=False, help="load audio from path")
     args, unknown = parser.parse_known_args()
     load_hubert()
     models = []
@@ -140,8 +136,7 @@ if __name__ == '__main__':
             "# <center> RVC Models (Outdated)\n"
             "## <center> The input audio should be clean and pure voice without background music.\n"
             "### <center> Updated Repository: [NEW RVC Models](https://huggingface.co/spaces/ArkanDash/rvc-models-new).\n"
-            "#### <center> Recommended to use the Google Colab version for more feature.\n"
-            "![visitor badge](https://visitor-badge.glitch.me/badge?page_id=ArkanDash.Rvc-Models)\n\n"
             "[![image](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1hx6kKvIuv5XNY1Gai2PEuZhpO5z6xpVh?usp=sharing)\n\n"
             "[![Original Repo](https://badgen.net/badge/icon/github?icon=github&label=Original%20Repo)](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI)"
         )
@@ -158,10 +153,7 @@ if __name__ == '__main__':
                         )
                     with gr.Row():
                         with gr.Column():
-                            if args.files:
-                                vc_input = gr.Textbox(label="Input audio path")
-                            else:
-                                vc_input = gr.Audio(label="Input audio"+' (less than 20 seconds)' if limitation else '')
                             vc_transpose = gr.Number(label="Transpose", value=0)
                             vc_f0method = gr.Radio(
                                 label="Pitch extraction algorithm, PM is fast but Harvest is better for low frequencies",
@@ -185,4 +177,4 @@ if __name__ == '__main__':
                             vc_output2 = gr.Audio(label="Output Audio")
                 vc_submit.click(vc_fn, [vc_input, vc_transpose, vc_f0method, vc_index_ratio, tts_mode, tts_text, tts_voice], [vc_output1, vc_output2])
                 tts_mode.change(change_to_tts_mode, [tts_mode], [vc_input, tts_text, tts_voice])
-        app.queue(concurrency_count=1, max_size=20, api_open=args.api).launch(share=args.share)

                 asyncio.run(edge_tts.Communicate(tts_text, "-".join(tts_voice.split('-')[:-1])).save("tts.mp3"))
                 audio, sr = librosa.load("tts.mp3", sr=16000, mono=True)
             else:
+                if input_audio is None:
+                    return "You need to upload an audio", None
+                sampling_rate, audio = input_audio
+                duration = audio.shape[0] / sampling_rate
+                if duration > 20 and limitation:
+                    return "Please upload an audio file that is less than 20 seconds. If you need to generate a longer audio file, please use Colab.", None
+                audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
+                if len(audio.shape) > 1:
+                    audio = librosa.to_mono(audio.transpose(1, 0))
+                if sampling_rate != 16000:
+                    audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
             times = [0, 0, 0]
             f0_up_key = int(f0_up_key)
             audio_opt = vc.pipeline(
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
     parser.add_argument('--api', action="store_true", default=False)
+    parser.add_argument("--colab", action="store_true", default=False, help="share gradio app")
     args, unknown = parser.parse_known_args()
     load_hubert()
     models = []
             "# <center> RVC Models (Outdated)\n"
             "## <center> The input audio should be clean and pure voice without background music.\n"
             "### <center> Updated Repository: [NEW RVC Models](https://huggingface.co/spaces/ArkanDash/rvc-models-new).\n"
+            "#### <center> [Recommended to use google colab for more features](https://colab.research.google.com/drive/1hx6kKvIuv5XNY1Gai2PEuZhpO5z6xpVh?usp=sharing)\n"
             "[![image](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1hx6kKvIuv5XNY1Gai2PEuZhpO5z6xpVh?usp=sharing)\n\n"
             "[![Original Repo](https://badgen.net/badge/icon/github?icon=github&label=Original%20Repo)](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI)"
         )
                         )
                     with gr.Row():
                         with gr.Column():
+                            vc_input = gr.Audio(label="Input audio"+' (less than 20 seconds)' if limitation else '')
                             vc_transpose = gr.Number(label="Transpose", value=0)
                             vc_f0method = gr.Radio(
                                 label="Pitch extraction algorithm, PM is fast but Harvest is better for low frequencies",
                             vc_output2 = gr.Audio(label="Output Audio")
                 vc_submit.click(vc_fn, [vc_input, vc_transpose, vc_f0method, vc_index_ratio, tts_mode, tts_text, tts_voice], [vc_output1, vc_output2])
                 tts_mode.change(change_to_tts_mode, [tts_mode], [vc_input, tts_text, tts_voice])
+        app.queue(concurrency_count=1, max_size=20, api_open=args.api).launch(share=args.colab)

requirements-full.txt ADDED Viewed

	@@ -0,0 +1,49 @@

+numba==0.56.4
+numpy==1.23.5
+scipy==1.9.3
+librosa==0.9.2
+llvmlite==0.39.0
+fairseq==0.12.2
+faiss-cpu==1.7.0; sys_platform == "darwin"
+faiss-cpu==1.7.2; sys_platform != "darwin"
+gradio
+Cython
+future>=0.18.3
+pydub>=0.25.1
+soundfile>=0.12.1
+ffmpeg-python>=0.2.0
+tensorboardX
+functorch>=2.0.0
+Jinja2>=3.1.2
+json5>=0.9.11
+Markdown
+matplotlib>=3.7.1
+matplotlib-inline>=0.1.6
+praat-parselmouth>=0.4.3
+Pillow>=9.1.1
+pyworld>=0.3.2
+resampy>=0.4.2
+scikit-learn>=1.2.2
+starlette>=0.26.1
+tensorboard
+tensorboard-data-server
+tensorboard-plugin-wit
+torchgen>=0.0.1
+tqdm>=4.65.0
+tornado>=6.2
+Werkzeug>=2.2.3
+uc-micro-py>=1.0.1
+sympy>=1.11.1
+tabulate>=0.9.0
+PyYAML>=6.0
+pyasn1>=0.4.8
+pyasn1-modules>=0.2.8
+fsspec>=2023.3.0
+absl-py>=1.4.0
+audioread
+uvicorn>=0.21.1
+colorama>=0.4.6
+edge-tts
+demucs
+yt_dlp
+ffmpeg

weights/ayaka-jp/cover.png CHANGED Viewed

Git LFS Details

SHA256: 1fe85d2c9895d4d0010660eb42ffa154edb7e0decc97f4444ba6009c69d029c0
Pointer size: 131 Bytes
Size of remote file: 838 kB

weights/nilou-jp/cover.png CHANGED Viewed

Git LFS Details

SHA256: f1b477f9aaa5837e8e979fd41e2e7e6cc52d61937539af69f6fc5c0537f5b524
Pointer size: 132 Bytes
Size of remote file: 1.6 MB