Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -3,46 +3,30 @@ import json
|
|
3 |
import argparse
|
4 |
import traceback
|
5 |
import logging
|
|
|
|
|
6 |
import gradio as gr
|
7 |
import numpy as np
|
8 |
import librosa
|
9 |
import torch
|
10 |
-
|
11 |
-
import edge_tts
|
12 |
-
from datetime import datetime
|
13 |
from fairseq import checkpoint_utils
|
14 |
from infer_pack.models import SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFsid_nono
|
15 |
from vc_infer_pipeline import VC
|
16 |
-
from config import
|
17 |
-
|
18 |
-
device
|
19 |
-
)
|
20 |
logging.getLogger("numba").setLevel(logging.WARNING)
|
21 |
|
|
|
22 |
def create_vc_fn(tgt_sr, net_g, vc, if_f0, file_index, file_big_npy):
|
23 |
-
def vc_fn(
|
24 |
-
input_audio,
|
25 |
-
f0_up_key,
|
26 |
-
f0_method,
|
27 |
-
index_rate
|
28 |
-
):
|
29 |
try:
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
return "You need to upload an audio", None
|
35 |
-
sampling_rate, audio = input_audio
|
36 |
-
duration = audio.shape[0] / sampling_rate
|
37 |
-
if duration > 10000000:
|
38 |
-
return "no", None
|
39 |
-
audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
|
40 |
-
if len(audio.shape) > 1:
|
41 |
-
audio = librosa.to_mono(audio.transpose(1, 0))
|
42 |
-
if sampling_rate != 16000:
|
43 |
-
audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
|
44 |
times = [0, 0, 0]
|
45 |
-
f0_up_key = int(
|
46 |
audio_opt = vc.pipeline(
|
47 |
hubert_model,
|
48 |
net_g,
|
@@ -50,12 +34,13 @@ def create_vc_fn(tgt_sr, net_g, vc, if_f0, file_index, file_big_npy):
|
|
50 |
audio,
|
51 |
times,
|
52 |
f0_up_key,
|
53 |
-
|
54 |
file_index,
|
55 |
file_big_npy,
|
56 |
-
|
57 |
if_f0,
|
58 |
)
|
|
|
59 |
print(
|
60 |
f"[{datetime.now().strftime('%Y-%m-%d %H:%M')}]: npy: {times[0]}, f0: {times[1]}s, infer: {times[2]}s"
|
61 |
)
|
@@ -64,8 +49,10 @@ def create_vc_fn(tgt_sr, net_g, vc, if_f0, file_index, file_big_npy):
|
|
64 |
info = traceback.format_exc()
|
65 |
print(info)
|
66 |
return info, (None, None)
|
|
|
67 |
return vc_fn
|
68 |
|
|
|
69 |
def load_hubert():
|
70 |
global hubert_model
|
71 |
models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
|
@@ -80,6 +67,7 @@ def load_hubert():
|
|
80 |
hubert_model = hubert_model.float()
|
81 |
hubert_model.eval()
|
82 |
|
|
|
83 |
if __name__ == '__main__':
|
84 |
parser = argparse.ArgumentParser()
|
85 |
parser.add_argument('--api', action="store_true", default=False)
|
@@ -113,6 +101,7 @@ if __name__ == '__main__':
|
|
113 |
net_g = net_g.float()
|
114 |
vc = VC(tgt_sr, device, is_half)
|
115 |
models.append((name, title, cover, create_vc_fn(tgt_sr, net_g, vc, if_f0, index, npy)))
|
|
|
116 |
with gr.Blocks() as app:
|
117 |
gr.Markdown(
|
118 |
"# <center> RVC generator\n"
|
@@ -125,16 +114,14 @@ if __name__ == '__main__':
|
|
125 |
with gr.Row():
|
126 |
gr.Markdown(
|
127 |
'<div align="center">'
|
128 |
-
f'<div>{title}</div>\n'+
|
129 |
-
(f'<img style="width:auto;height:300px;" src="file/{cover}">' if cover else "")+
|
130 |
'</div>'
|
131 |
)
|
132 |
with gr.Row():
|
133 |
with gr.Column():
|
134 |
-
|
135 |
-
|
136 |
-
else:
|
137 |
-
vc_input = gr.Audio(label="Input audio")
|
138 |
vc_transpose = gr.Number(label="Transpose", value=0)
|
139 |
vc_f0method = gr.Radio(
|
140 |
label="Pitch extraction algorithm, PM is fast but Harvest is better for low frequencies",
|
@@ -153,5 +140,6 @@ if __name__ == '__main__':
|
|
153 |
with gr.Column():
|
154 |
vc_output1 = gr.Textbox(label="Output Message")
|
155 |
vc_output2 = gr.Audio(label="Output Audio")
|
156 |
-
|
157 |
-
|
|
|
|
3 |
import argparse
|
4 |
import traceback
|
5 |
import logging
|
6 |
+
from datetime import datetime
|
7 |
+
|
8 |
import gradio as gr
|
9 |
import numpy as np
|
10 |
import librosa
|
11 |
import torch
|
12 |
+
|
|
|
|
|
13 |
from fairseq import checkpoint_utils
|
14 |
from infer_pack.models import SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFsid_nono
|
15 |
from vc_infer_pipeline import VC
|
16 |
+
from config import is_half, device
|
17 |
+
|
|
|
|
|
18 |
logging.getLogger("numba").setLevel(logging.WARNING)
|
19 |
|
20 |
+
|
21 |
def create_vc_fn(tgt_sr, net_g, vc, if_f0, file_index, file_big_npy):
|
22 |
+
def vc_fn(vc_transpose, vc_f0method, vc_index_ratio):
|
|
|
|
|
|
|
|
|
|
|
23 |
try:
|
24 |
+
# Get the recorded audio from the microphone
|
25 |
+
audio, sr = vc_microphone.record(num_frames=16000) # Adjust the sample rate if needed
|
26 |
+
|
27 |
+
# Your existing processing logic for audio
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
times = [0, 0, 0]
|
29 |
+
f0_up_key = int(vc_transpose)
|
30 |
audio_opt = vc.pipeline(
|
31 |
hubert_model,
|
32 |
net_g,
|
|
|
34 |
audio,
|
35 |
times,
|
36 |
f0_up_key,
|
37 |
+
vc_f0method,
|
38 |
file_index,
|
39 |
file_big_npy,
|
40 |
+
vc_index_ratio,
|
41 |
if_f0,
|
42 |
)
|
43 |
+
|
44 |
print(
|
45 |
f"[{datetime.now().strftime('%Y-%m-%d %H:%M')}]: npy: {times[0]}, f0: {times[1]}s, infer: {times[2]}s"
|
46 |
)
|
|
|
49 |
info = traceback.format_exc()
|
50 |
print(info)
|
51 |
return info, (None, None)
|
52 |
+
|
53 |
return vc_fn
|
54 |
|
55 |
+
|
56 |
def load_hubert():
|
57 |
global hubert_model
|
58 |
models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
|
|
|
67 |
hubert_model = hubert_model.float()
|
68 |
hubert_model.eval()
|
69 |
|
70 |
+
|
71 |
if __name__ == '__main__':
|
72 |
parser = argparse.ArgumentParser()
|
73 |
parser.add_argument('--api', action="store_true", default=False)
|
|
|
101 |
net_g = net_g.float()
|
102 |
vc = VC(tgt_sr, device, is_half)
|
103 |
models.append((name, title, cover, create_vc_fn(tgt_sr, net_g, vc, if_f0, index, npy)))
|
104 |
+
|
105 |
with gr.Blocks() as app:
|
106 |
gr.Markdown(
|
107 |
"# <center> RVC generator\n"
|
|
|
114 |
with gr.Row():
|
115 |
gr.Markdown(
|
116 |
'<div align="center">'
|
117 |
+
f'<div>{title}</div>\n' +
|
118 |
+
(f'<img style="width:auto;height:300px;" src="file/{cover}">' if cover else "") +
|
119 |
'</div>'
|
120 |
)
|
121 |
with gr.Row():
|
122 |
with gr.Column():
|
123 |
+
# Use microphone instead of file upload
|
124 |
+
vc_microphone = gr.Microphone(label="Record your voice")
|
|
|
|
|
125 |
vc_transpose = gr.Number(label="Transpose", value=0)
|
126 |
vc_f0method = gr.Radio(
|
127 |
label="Pitch extraction algorithm, PM is fast but Harvest is better for low frequencies",
|
|
|
140 |
with gr.Column():
|
141 |
vc_output1 = gr.Textbox(label="Output Message")
|
142 |
vc_output2 = gr.Audio(label="Output Audio")
|
143 |
+
|
144 |
+
vc_submit.click(vc_fn, [vc_transpose, vc_f0method, vc_index_ratio], [vc_output1, vc_output2])
|
145 |
+
app.queue(concurrency_count=1, max_size=20, api_open=args.api).launch(share=args.share)
|