rvc

Runtime error

App Files Files Community

Spark808 commited on Dec 4, 2023

Commit

6df2588

•

1 Parent(s): a388cc2

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -36

app.py CHANGED Viewed

@@ -3,37 +3,46 @@ import json
 import argparse
 import traceback
 import logging
-from datetime import datetime
 import gradio as gr
 import numpy as np
 import librosa
 import torch
 from fairseq import checkpoint_utils
 from infer_pack.models import SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFsid_nono
 from vc_infer_pipeline import VC
-from config import is_half, device
 logging.getLogger("numba").setLevel(logging.WARNING)
 def create_vc_fn(tgt_sr, net_g, vc, if_f0, file_index, file_big_npy):
     def vc_fn(
-        vc_microphone,
-        vc_transpose,
-        vc_f0method,
-        vc_index_ratio,
-        vc_output1,
-        vc_output2
     ):
         try:
-            # Get the recorded audio from the microphone
-            audio, sr = vc_microphone()
-            # Your existing processing logic for audio
             times = [0, 0, 0]
-            f0_up_key = int(vc_transpose)
             audio_opt = vc.pipeline(
                 hubert_model,
                 net_g,
@@ -41,27 +50,22 @@ def create_vc_fn(tgt_sr, net_g, vc, if_f0, file_index, file_big_npy):
                 audio,
                 times,
                 f0_up_key,
-                vc_f0method,
                 file_index,
                 file_big_npy,
-                vc_index_ratio,
                 if_f0,
             )
             print(
                 f"[{datetime.now().strftime('%Y-%m-%d %H:%M')}]: npy: {times[0]}, f0: {times[1]}s, infer: {times[2]}s"
             )
-            vc_output1.update("Success")
-            vc_output2.update((tgt_sr, audio_opt))
-        except Exception as e:
             info = traceback.format_exc()
             print(info)
-            vc_output1.update(str(e))
-            vc_output2.update((None, None))
     return vc_fn
 def load_hubert():
     global hubert_model
     models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
@@ -76,7 +80,6 @@ def load_hubert():
         hubert_model = hubert_model.float()
     hubert_model.eval()
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
     parser.add_argument('--api', action="store_true", default=False)
@@ -110,7 +113,6 @@ if __name__ == '__main__':
             net_g = net_g.float()
         vc = VC(tgt_sr, device, is_half)
         models.append((name, title, cover, create_vc_fn(tgt_sr, net_g, vc, if_f0, index, npy)))
     with gr.Blocks() as app:
         gr.Markdown(
             "# <center> RVC generator\n"
@@ -123,14 +125,16 @@ if __name__ == '__main__':
                     with gr.Row():
                         gr.Markdown(
                             '<div align="center">'
-                            f'<div>{title}</div>\n' +
-                            (f'<img style="width:auto;height:300px;" src="file/{cover}">' if os.path.exists(cover) else "") +
                             '</div>'
                         )
                     with gr.Row():
                         with gr.Column():
-                            # Use microphone instead of file upload
-                            vc_microphone = gr.Microphone(label="Record your voice")
                             vc_transpose = gr.Number(label="Transpose", value=0)
                             vc_f0method = gr.Radio(
                                 label="Pitch extraction algorithm, PM is fast but Harvest is better for low frequencies",
@@ -149,6 +153,5 @@ if __name__ == '__main__':
                         with gr.Column():
                             vc_output1 = gr.Textbox(label="Output Message")
                             vc_output2 = gr.Audio(label="Output Audio")
-                vc_submit.click(vc_fn, vc_microphone, vc_transpose, vc_f0method, vc_index_ratio, vc_output1, vc_output2)
-        app.queue(concurrency_count=1, max_size=20, api_open=args.api).launch(share=args.share)

 import argparse
 import traceback
 import logging
 import gradio as gr
 import numpy as np
 import librosa
 import torch
+import asyncio
+import edge_tts
+from datetime import datetime
 from fairseq import checkpoint_utils
 from infer_pack.models import SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFsid_nono
 from vc_infer_pipeline import VC
+from config import (
+    is_half,
+    device
+)
 logging.getLogger("numba").setLevel(logging.WARNING)
 def create_vc_fn(tgt_sr, net_g, vc, if_f0, file_index, file_big_npy):
     def vc_fn(
+        input_audio,
+        f0_up_key,
+        f0_method,
+        index_rate
     ):
         try:
+            if args.files:
+                audio, sr = librosa.load(input_audio, sr=16000, mono=True)
+            else:
+                if input_audio is None:
+                    return "You need to upload an audio", None
+                sampling_rate, audio = input_audio
+                duration = audio.shape[0] / sampling_rate
+                if duration > 10000000:
+                    return "no", None
+                audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
+                if len(audio.shape) > 1:
+                    audio = librosa.to_mono(audio.transpose(1, 0))
+                if sampling_rate != 16000:
+                    audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
             times = [0, 0, 0]
+            f0_up_key = int(f0_up_key)
             audio_opt = vc.pipeline(
                 hubert_model,
                 net_g,
                 audio,
                 times,
                 f0_up_key,
+                f0_method,
                 file_index,
                 file_big_npy,
+                index_rate,
                 if_f0,
             )
             print(
                 f"[{datetime.now().strftime('%Y-%m-%d %H:%M')}]: npy: {times[0]}, f0: {times[1]}s, infer: {times[2]}s"
             )
+            return "Success", (tgt_sr, audio_opt)
+        except:
             info = traceback.format_exc()
             print(info)
+            return info, (None, None)
     return vc_fn
 def load_hubert():
     global hubert_model
     models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
         hubert_model = hubert_model.float()
     hubert_model.eval()
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
     parser.add_argument('--api', action="store_true", default=False)
             net_g = net_g.float()
         vc = VC(tgt_sr, device, is_half)
         models.append((name, title, cover, create_vc_fn(tgt_sr, net_g, vc, if_f0, index, npy)))
     with gr.Blocks() as app:
         gr.Markdown(
             "# <center> RVC generator\n"
                     with gr.Row():
                         gr.Markdown(
                             '<div align="center">'
+                            f'<div>{title}</div>\n'+
+                            (f'<img style="width:auto;height:300px;" src="file/{cover}">' if cover else "")+
                             '</div>'
                         )
                     with gr.Row():
                         with gr.Column():
+                            if args.files:
+                                vc_input = gr.Textbox(label="Input audio path")
+                            else:
+                                vc_input = gr.Audio(label="Input audio")
                             vc_transpose = gr.Number(label="Transpose", value=0)
                             vc_f0method = gr.Radio(
                                 label="Pitch extraction algorithm, PM is fast but Harvest is better for low frequencies",
                         with gr.Column():
                             vc_output1 = gr.Textbox(label="Output Message")
                             vc_output2 = gr.Audio(label="Output Audio")
+                vc_submit.click(vc_fn, [vc_input, vc_transpose, vc_f0method, vc_index_ratio], [vc_output1, vc_output2])
+        app.queue(concurrency_count=1, max_size=20, api_open=args.api).launch(share=args.share)