Shashashasha commited on
Commit
52b4f8e
0 Parent(s):

Duplicate from Shashashasha/so-vits-fork-vika

Browse files
Files changed (4) hide show
  1. .gitattributes +34 -0
  2. README.md +14 -0
  3. app.py +120 -0
  4. requirements.txt +3 -0
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Voice Cloning
3
+ emoji: 😻
4
+ colorFrom: blue
5
+ colorTo: yellow
6
+ sdk: gradio
7
+ sdk_version: 3.27.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ duplicated_from: Shashashasha/so-vits-fork-vika
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from pathlib import Path
3
+
4
+ import gradio as gr
5
+ import librosa
6
+ import numpy as np
7
+ import torch
8
+ from huggingface_hub import hf_hub_download, list_repo_files
9
+ from so_vits_svc_fork.hparams import HParams
10
+ from so_vits_svc_fork.inference.core import Svc
11
+
12
+ ##########################################################
13
+ # REPLACE THESE VALUES TO CHANGE THE MODEL REPO/CKPT NAME
14
+ ##########################################################
15
+ repo_id = "Shashashasha/vika"
16
+ ckpt_name = None # None will pick latest
17
+ ##########################################################
18
+
19
+ # Figure out the latest generator by taking highest value one.
20
+ # Ex. if the repo has: G_0.pth, G_100.pth, G_200.pth, we'd use G_200.pth
21
+ if ckpt_name is None:
22
+ latest_id = sorted(
23
+ [
24
+ int(Path(x).stem.split("_")[1])
25
+ for x in list_repo_files(repo_id)
26
+ if x.startswith("G_") and x.endswith(".pth")
27
+ ]
28
+ )[-1]
29
+ ckpt_name = f"G_{latest_id}.pth"
30
+
31
+ generator_path = hf_hub_download(repo_id, ckpt_name)
32
+ config_path = hf_hub_download(repo_id, "config.json")
33
+ hparams = HParams(**json.loads(Path(config_path).read_text()))
34
+ speakers = list(hparams.spk.keys())
35
+ device = "cuda" if torch.cuda.is_available() else "cpu"
36
+ model = Svc(net_g_path=generator_path, config_path=config_path, device=device, cluster_model_path=None)
37
+
38
+
39
+ def predict(
40
+ speaker,
41
+ audio,
42
+ transpose: int = 0,
43
+ auto_predict_f0: bool = False,
44
+ cluster_infer_ratio: float = 0,
45
+ noise_scale: float = 0.4,
46
+ f0_method: str = "crepe",
47
+ db_thresh: int = -40,
48
+ pad_seconds: float = 0.5,
49
+ chunk_seconds: float = 0.5,
50
+ absolute_thresh: bool = False,
51
+ ):
52
+ audio, _ = librosa.load(audio, sr=model.target_sample)
53
+ audio = model.infer_silence(
54
+ audio.astype(np.float32),
55
+ speaker=speaker,
56
+ transpose=transpose,
57
+ auto_predict_f0=auto_predict_f0,
58
+ cluster_infer_ratio=cluster_infer_ratio,
59
+ noise_scale=noise_scale,
60
+ f0_method=f0_method,
61
+ db_thresh=db_thresh,
62
+ pad_seconds=pad_seconds,
63
+ chunk_seconds=chunk_seconds,
64
+ absolute_thresh=absolute_thresh,
65
+ )
66
+ return model.target_sample, audio
67
+
68
+
69
+ description=f"""
70
+ This app uses models trained with so-vits-svc-fork to clone your voice. Model currently being used is https://hf.co/{repo_id}.
71
+
72
+ To change the model being served, duplicate the space and update the `repo_id` in `app.py`.
73
+ """.strip()
74
+
75
+ article="""
76
+ <p style='text-align: center'>
77
+ <a href='https://github.com/voicepaw/so-vits-svc-fork' target='_blank'>Github Repo</a>
78
+ </p>
79
+ """.strip()
80
+
81
+ interface_mic = gr.Interface(
82
+ predict,
83
+ inputs=[
84
+ gr.Dropdown(speakers, value=speakers[0], label="Target Speaker"),
85
+ gr.Audio(type="filepath", source="microphone", label="Source Audio"),
86
+ gr.Slider(-12, 12, value=0, step=1, label="Transpose (Semitones)"),
87
+ gr.Checkbox(False, label="Auto Predict F0"),
88
+ gr.Slider(0.0, 1.0, value=0.0, step=0.1, label='cluster infer ratio'),
89
+ gr.Slider(0.0, 1.0, value=0.4, step=0.1, label="noise scale"),
90
+ gr.Dropdown(choices=["crepe", "crepe-tiny", "parselmouth", "dio", "harvest"], value='crepe', label="f0 method"),
91
+ ],
92
+ outputs="audio",
93
+ title="Voice Cloning",
94
+ description=description,
95
+ article=article,
96
+ )
97
+ interface_file = gr.Interface(
98
+ predict,
99
+ inputs=[
100
+ gr.Dropdown(speakers, value=speakers[0], label="Target Speaker"),
101
+ gr.Audio(type="filepath", source="upload", label="Source Audio"),
102
+ gr.Slider(-12, 12, value=0, step=1, label="Transpose (Semitones)"),
103
+ gr.Checkbox(False, label="Auto Predict F0"),
104
+ gr.Slider(0.0, 1.0, value=0.0, step=0.1, label='cluster infer ratio'),
105
+ gr.Slider(0.0, 1.0, value=0.4, step=0.1, label="noise scale"),
106
+ gr.Dropdown(choices=["crepe", "crepe-tiny", "parselmouth", "dio", "harvest"], value='crepe', label="f0 method"),
107
+ ],
108
+ outputs="audio",
109
+ title="Voice Cloning",
110
+ description=description,
111
+ article=article,
112
+ )
113
+ interface = gr.TabbedInterface(
114
+ [interface_mic, interface_file],
115
+ ["Clone From Mic", "Clone From File"],
116
+ )
117
+
118
+
119
+ if __name__ == '__main__':
120
+ interface.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ so_vits_svc_fork
2
+ huggingface_hub
3
+ gradio