Spaces:
Runtime error
Runtime error
Create rvc.py
Browse files
rvc.py
ADDED
@@ -0,0 +1,268 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
app.py β unified FastAPI backend
|
3 |
+
|
4 |
+
* /voiceβconvert β RVC voice conversion
|
5 |
+
* /uvr-remove β UVR vocalΒ / instrumental separation
|
6 |
+
|
7 |
+
Run:
|
8 |
+
uvicorn app:app --host 0.0.0.0 --port 8000
|
9 |
+
"""
|
10 |
+
|
11 |
+
import asyncio
|
12 |
+
import mimetypes
|
13 |
+
import shutil
|
14 |
+
import subprocess
|
15 |
+
import tempfile
|
16 |
+
import uuid
|
17 |
+
import zipfile
|
18 |
+
from pathlib import Path
|
19 |
+
from typing import List
|
20 |
+
|
21 |
+
import httpx
|
22 |
+
from fastapi import BackgroundTasks, FastAPI, HTTPException
|
23 |
+
from fastapi.responses import FileResponse
|
24 |
+
from pydantic import BaseModel, HttpUrl, conint
|
25 |
+
from fastapi.responses import JSONResponse
|
26 |
+
import base64
|
27 |
+
|
28 |
+
# βββββββββββββββ RVC IMPORT (lazy singleton) ββββββββββββββββββββββββββββββββ
|
29 |
+
from rvc_cli import import_voice_converter # change if your module is named differently
|
30 |
+
|
31 |
+
converter = import_voice_converter()
|
32 |
+
|
33 |
+
# βββββββββββββββ FASTAPI APP ββββββββββββββββββββββββββββββββββββββββββββββββ
|
34 |
+
app = FastAPI(
|
35 |
+
title="AudioΒ AIΒ Backend",
|
36 |
+
version="2.0.0",
|
37 |
+
description="Voice conversion (RVC)Β + vocal removal (UVR)",
|
38 |
+
)
|
39 |
+
|
40 |
+
# βββββββββββββββββββββββ COMMON HELPERS βββββββββββββββββββββββββββββββββββββ
|
41 |
+
|
42 |
+
|
43 |
+
async def _download(
|
44 |
+
url: str,
|
45 |
+
dest: Path,
|
46 |
+
*,
|
47 |
+
retries: int = 3,
|
48 |
+
chunk_size: int = 1 << 18, # 256Β KiB
|
49 |
+
connect_timeout: float = 10.0,
|
50 |
+
read_timeout: float = 10.0,
|
51 |
+
) -> None:
|
52 |
+
"""
|
53 |
+
Robustly stream *url* β *dest*.
|
54 |
+
|
55 |
+
β’ streams in chunks (constant memory)
|
56 |
+
β’ follows redirects
|
57 |
+
β’ retries (networkΒ / timeout) with exponential backβoff
|
58 |
+
"""
|
59 |
+
dest.parent.mkdir(parents=True, exist_ok=True)
|
60 |
+
|
61 |
+
backoff = 1.0
|
62 |
+
for attempt in range(1, retries + 1):
|
63 |
+
try:
|
64 |
+
timeout_cfg = httpx.Timeout(connect_timeout, read=read_timeout)
|
65 |
+
async with httpx.AsyncClient(timeout=timeout_cfg, follow_redirects=True) as client:
|
66 |
+
async with client.stream("GET", url) as resp:
|
67 |
+
if resp.status_code >= 400:
|
68 |
+
raise HTTPException(
|
69 |
+
status_code=400,
|
70 |
+
detail=f"Upstream returned {resp.status_code} for {url}",
|
71 |
+
)
|
72 |
+
|
73 |
+
with dest.open("wb") as fp:
|
74 |
+
async for chunk in resp.aiter_bytes(chunk_size):
|
75 |
+
fp.write(chunk)
|
76 |
+
return # success
|
77 |
+
except (httpx.TimeoutException, httpx.TransportError) as exc:
|
78 |
+
if attempt == retries:
|
79 |
+
raise HTTPException(
|
80 |
+
status_code=400,
|
81 |
+
detail=f"Failed to fetch {url} after {retries} attempts: {exc}",
|
82 |
+
)
|
83 |
+
await asyncio.sleep(backoff)
|
84 |
+
backoff *= 2 # exponential backβoff
|
85 |
+
|
86 |
+
|
87 |
+
def _ensure_wav(src: Path, work_dir: Path) -> Path:
|
88 |
+
"""If *src* isnβt WAV, transcode with FFmpeg β 48Β kHzΒ stereo WAV."""
|
89 |
+
if src.suffix.lower() == ".wav":
|
90 |
+
return src
|
91 |
+
|
92 |
+
ctype, _ = mimetypes.guess_type(src.name)
|
93 |
+
if not (ctype or "").startswith("audio"):
|
94 |
+
raise HTTPException(status_code=400, detail="Input is not an audio file")
|
95 |
+
|
96 |
+
dst = work_dir / f"{src.stem}_converted.wav"
|
97 |
+
cmd = ["ffmpeg", "-y", "-i", str(src), "-ar", "48000", "-ac", "2", str(dst)]
|
98 |
+
try:
|
99 |
+
subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
100 |
+
except (subprocess.CalledProcessError, FileNotFoundError):
|
101 |
+
raise HTTPException(
|
102 |
+
status_code=500,
|
103 |
+
detail="FFmpeg failed or is missing on the server.",
|
104 |
+
)
|
105 |
+
return dst
|
106 |
+
|
107 |
+
|
108 |
+
# ββββββββββββββββββββββββ RVC VOICEβCONVERT βββββββββββββββββββββββββββββββββ
|
109 |
+
class VoiceConversionRequest(BaseModel):
|
110 |
+
pitch: conint(ge=-24, le=24)
|
111 |
+
input_url: HttpUrl # audio (wav/mp3/flacβ¦)
|
112 |
+
model_url: HttpUrl # ZIP that holds *.pth & *.index
|
113 |
+
|
114 |
+
def _voice_convert(
|
115 |
+
wav_in: Path, wav_out: Path, pth_file: Path, index_file: Path, pitch: int
|
116 |
+
) -> None:
|
117 |
+
"""Blocking call into the RVC VoiceConverter."""
|
118 |
+
converter.convert_audio(
|
119 |
+
audio_input_path=str(wav_in),
|
120 |
+
audio_output_path=str(wav_out),
|
121 |
+
model_path=str(pth_file),
|
122 |
+
index_path=str(index_file),
|
123 |
+
pitch=pitch,
|
124 |
+
filter_radius=3,
|
125 |
+
index_rate=0.3,
|
126 |
+
volume_envelope=1.0,
|
127 |
+
protect=0.33,
|
128 |
+
hop_length=128,
|
129 |
+
f0_method="rmvpe",
|
130 |
+
split_audio=False,
|
131 |
+
export_format="WAV",
|
132 |
+
embedder_model="contentvec",
|
133 |
+
sid=0,
|
134 |
+
)
|
135 |
+
|
136 |
+
|
137 |
+
# βββ 2. VOICEβCONVERT ENDPOINT βββββββββββββββββββββββββββββββββββββββββββββ
|
138 |
+
import zipfile, itertools
|
139 |
+
@app.post("/voice-convert", response_class=FileResponse)
|
140 |
+
async def voice_convert(req: VoiceConversionRequest, background: BackgroundTasks):
|
141 |
+
tmp = Path(tempfile.mkdtemp(prefix="rvc_"))
|
142 |
+
background.add_task(shutil.rmtree, tmp, ignore_errors=True)
|
143 |
+
|
144 |
+
# 2βa. download audio & model ZIP
|
145 |
+
wav_src = tmp / Path(req.input_url.path).name
|
146 |
+
model_zip = tmp / "model.zip"
|
147 |
+
await asyncio.gather(
|
148 |
+
_download(str(req.input_url), wav_src),
|
149 |
+
_download(str(req.model_url), model_zip),
|
150 |
+
)
|
151 |
+
|
152 |
+
# 2βb. extract ZIP (nested folders ok)
|
153 |
+
extract_dir = tmp / "model"
|
154 |
+
extract_dir.mkdir(exist_ok=True)
|
155 |
+
try:
|
156 |
+
with zipfile.ZipFile(model_zip) as zf:
|
157 |
+
zf.extractall(extract_dir)
|
158 |
+
|
159 |
+
# locate first *.pth and *.index anywhere in the tree
|
160 |
+
pth_path = next(itertools.chain(extract_dir.rglob("*.pth")), None)
|
161 |
+
index_path = next(itertools.chain(extract_dir.rglob("*.index")), None)
|
162 |
+
if not pth_path or not index_path:
|
163 |
+
raise HTTPException(status_code=400, detail="ZIP does not contain .pth and .index")
|
164 |
+
except zipfile.BadZipFile:
|
165 |
+
raise HTTPException(status_code=400, detail="Uploaded model is not a valid ZIP")
|
166 |
+
|
167 |
+
# 2βc. make sure input is WAV
|
168 |
+
wav_for_rvc = _ensure_wav(wav_src, tmp)
|
169 |
+
out_wav = tmp / f"{wav_for_rvc.stem}_output.wav"
|
170 |
+
|
171 |
+
# 2βd. run conversion in a worker thread
|
172 |
+
try:
|
173 |
+
await asyncio.to_thread(_voice_convert, wav_for_rvc, out_wav, pth_path, index_path, req.pitch)
|
174 |
+
except Exception as e:
|
175 |
+
raise HTTPException(status_code=500, detail=f"Conversion failed: {e}")
|
176 |
+
|
177 |
+
# 2βe. schedule cleanup of every artefact
|
178 |
+
for p in (wav_src, model_zip, out_wav, pth_path, index_path):
|
179 |
+
background.add_task(p.unlink, missing_ok=True)
|
180 |
+
background.add_task(shutil.rmtree, extract_dir, ignore_errors=True)
|
181 |
+
|
182 |
+
return FileResponse(
|
183 |
+
path=out_wav,
|
184 |
+
media_type="audio/wav",
|
185 |
+
filename=f"{uuid.uuid4().hex}.wav",
|
186 |
+
background=background,
|
187 |
+
)
|
188 |
+
|
189 |
+
|
190 |
+
# ββββββββββββββββββββββββββ UVR VOCALΒ REMOVAL βββββββββββββββββββββββββββββββ
|
191 |
+
class UVRRequest(BaseModel):
|
192 |
+
audio_url: HttpUrl
|
193 |
+
model_filename: str # e.g. "2_HP-UVR.pth"
|
194 |
+
|
195 |
+
|
196 |
+
# ββββββββββββββββ patched helper βββββββββββββββββββββββββββββββββββββββββββ
|
197 |
+
def _uvr_separate(audio_path: Path, model_filename: str, out_dir: Path) -> list[Path]:
|
198 |
+
from uvr.separator import Separator
|
199 |
+
|
200 |
+
sep = Separator(
|
201 |
+
model_file_dir="uvr/tmp/audio-separator-models/",
|
202 |
+
output_dir=str(out_dir),
|
203 |
+
output_format="MP3",
|
204 |
+
normalization_threshold=0.9,
|
205 |
+
)
|
206 |
+
sep.load_model(model_filename=model_filename)
|
207 |
+
|
208 |
+
raw_paths: list[str] = sep.separate(str(audio_path))
|
209 |
+
|
210 |
+
# --- NEW: make sure every path is absolute & exists --------------------
|
211 |
+
abs_paths: list[Path] = []
|
212 |
+
for p in raw_paths:
|
213 |
+
p_path = Path(p)
|
214 |
+
if not p_path.is_absolute():
|
215 |
+
p_path = out_dir / p_path # <β key fix
|
216 |
+
p_path = p_path.resolve()
|
217 |
+
if not p_path.exists():
|
218 |
+
raise RuntimeError(f"UVR reported missing file: {p_path}")
|
219 |
+
abs_paths.append(p_path)
|
220 |
+
# ----------------------------------------------------------------------
|
221 |
+
|
222 |
+
return abs_paths
|
223 |
+
|
224 |
+
@app.post("/uvr-remove", response_class=JSONResponse)
|
225 |
+
async def uvr_remove(req: UVRRequest, background: BackgroundTasks):
|
226 |
+
tmp = Path(tempfile.mkdtemp(prefix="uvr_"))
|
227 |
+
background.add_task(shutil.rmtree, tmp, ignore_errors=True)
|
228 |
+
|
229 |
+
# 1. download the audio to be separated
|
230 |
+
src = tmp / Path(req.audio_url.path).name
|
231 |
+
await _download(str(req.audio_url), src)
|
232 |
+
|
233 |
+
# 2. call UVR
|
234 |
+
try:
|
235 |
+
stems = await asyncio.to_thread(_uvr_separate, src, req.model_filename, tmp)
|
236 |
+
except FileNotFoundError:
|
237 |
+
raise HTTPException(status_code=400, detail="Model file not found.")
|
238 |
+
except Exception as e:
|
239 |
+
raise HTTPException(status_code=500, detail=f"UVR failed: {e}")
|
240 |
+
|
241 |
+
if not stems or len(stems) < 2:
|
242 |
+
raise HTTPException(status_code=500, detail="UVR did not produce two stems.")
|
243 |
+
|
244 |
+
# 3. find which stem is vocals / instrumental (UVR naming convention)
|
245 |
+
vocals_path = next((p for p in stems if "vocal" in p.stem.lower()), stems[0])
|
246 |
+
inst_path = next((p for p in stems if "instrumental" in p.stem.lower()), stems[1])
|
247 |
+
|
248 |
+
def _b64(path: Path) -> str:
|
249 |
+
"""read file and baseβ64 encode β str (ascii)"""
|
250 |
+
return base64.b64encode(path.read_bytes()).decode("ascii")
|
251 |
+
|
252 |
+
# 4. encode and build response
|
253 |
+
payload = {
|
254 |
+
"vocals": {
|
255 |
+
"filename": vocals_path.name,
|
256 |
+
"base64_wav": _b64(vocals_path),
|
257 |
+
},
|
258 |
+
"instrumental": {
|
259 |
+
"filename": inst_path.name,
|
260 |
+
"base64_wav": _b64(inst_path),
|
261 |
+
},
|
262 |
+
}
|
263 |
+
|
264 |
+
# 5. clean up temp artefacts
|
265 |
+
for p in stems + [src]:
|
266 |
+
background.add_task(p.unlink, missing_ok=True)
|
267 |
+
|
268 |
+
return JSONResponse(content=payload)
|