kevinwang676 commited on
Commit
4d7c371
Β·
verified Β·
1 Parent(s): bd7faa3

Create rvc.py

Browse files
Files changed (1) hide show
  1. rvc.py +268 -0
rvc.py ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ app.py – unified FastAPI backend
3
+
4
+ * /voice‑convert – RVC voice conversion
5
+ * /uvr-remove – UVR vocalΒ / instrumental separation
6
+
7
+ Run:
8
+ uvicorn app:app --host 0.0.0.0 --port 8000
9
+ """
10
+
11
+ import asyncio
12
+ import mimetypes
13
+ import shutil
14
+ import subprocess
15
+ import tempfile
16
+ import uuid
17
+ import zipfile
18
+ from pathlib import Path
19
+ from typing import List
20
+
21
+ import httpx
22
+ from fastapi import BackgroundTasks, FastAPI, HTTPException
23
+ from fastapi.responses import FileResponse
24
+ from pydantic import BaseModel, HttpUrl, conint
25
+ from fastapi.responses import JSONResponse
26
+ import base64
27
+
28
+ # ─────────────── RVC IMPORT (lazy singleton) ────────────────────────────────
29
+ from rvc_cli import import_voice_converter # change if your module is named differently
30
+
31
+ converter = import_voice_converter()
32
+
33
+ # ─────────────── FASTAPI APP ────────────────────────────────────────────────
34
+ app = FastAPI(
35
+ title="AudioΒ AIΒ Backend",
36
+ version="2.0.0",
37
+ description="Voice conversion (RVC)Β + vocal removal (UVR)",
38
+ )
39
+
40
+ # ─────────────────────── COMMON HELPERS ─────────────────────────────────────
41
+
42
+
43
+ async def _download(
44
+ url: str,
45
+ dest: Path,
46
+ *,
47
+ retries: int = 3,
48
+ chunk_size: int = 1 << 18, # 256Β KiB
49
+ connect_timeout: float = 10.0,
50
+ read_timeout: float = 10.0,
51
+ ) -> None:
52
+ """
53
+ Robustly stream *url* β†’ *dest*.
54
+
55
+ β€’ streams in chunks (constant memory)
56
+ β€’ follows redirects
57
+ β€’ retries (networkΒ / timeout) with exponential back‑off
58
+ """
59
+ dest.parent.mkdir(parents=True, exist_ok=True)
60
+
61
+ backoff = 1.0
62
+ for attempt in range(1, retries + 1):
63
+ try:
64
+ timeout_cfg = httpx.Timeout(connect_timeout, read=read_timeout)
65
+ async with httpx.AsyncClient(timeout=timeout_cfg, follow_redirects=True) as client:
66
+ async with client.stream("GET", url) as resp:
67
+ if resp.status_code >= 400:
68
+ raise HTTPException(
69
+ status_code=400,
70
+ detail=f"Upstream returned {resp.status_code} for {url}",
71
+ )
72
+
73
+ with dest.open("wb") as fp:
74
+ async for chunk in resp.aiter_bytes(chunk_size):
75
+ fp.write(chunk)
76
+ return # success
77
+ except (httpx.TimeoutException, httpx.TransportError) as exc:
78
+ if attempt == retries:
79
+ raise HTTPException(
80
+ status_code=400,
81
+ detail=f"Failed to fetch {url} after {retries} attempts: {exc}",
82
+ )
83
+ await asyncio.sleep(backoff)
84
+ backoff *= 2 # exponential back‑off
85
+
86
+
87
+ def _ensure_wav(src: Path, work_dir: Path) -> Path:
88
+ """If *src* isn’t WAV, transcode with FFmpeg β†’ 48Β kHzΒ stereo WAV."""
89
+ if src.suffix.lower() == ".wav":
90
+ return src
91
+
92
+ ctype, _ = mimetypes.guess_type(src.name)
93
+ if not (ctype or "").startswith("audio"):
94
+ raise HTTPException(status_code=400, detail="Input is not an audio file")
95
+
96
+ dst = work_dir / f"{src.stem}_converted.wav"
97
+ cmd = ["ffmpeg", "-y", "-i", str(src), "-ar", "48000", "-ac", "2", str(dst)]
98
+ try:
99
+ subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
100
+ except (subprocess.CalledProcessError, FileNotFoundError):
101
+ raise HTTPException(
102
+ status_code=500,
103
+ detail="FFmpeg failed or is missing on the server.",
104
+ )
105
+ return dst
106
+
107
+
108
+ # ──────────────────────── RVC VOICE‑CONVERT ─────────────────────────────────
109
+ class VoiceConversionRequest(BaseModel):
110
+ pitch: conint(ge=-24, le=24)
111
+ input_url: HttpUrl # audio (wav/mp3/flac…)
112
+ model_url: HttpUrl # ZIP that holds *.pth & *.index
113
+
114
+ def _voice_convert(
115
+ wav_in: Path, wav_out: Path, pth_file: Path, index_file: Path, pitch: int
116
+ ) -> None:
117
+ """Blocking call into the RVC VoiceConverter."""
118
+ converter.convert_audio(
119
+ audio_input_path=str(wav_in),
120
+ audio_output_path=str(wav_out),
121
+ model_path=str(pth_file),
122
+ index_path=str(index_file),
123
+ pitch=pitch,
124
+ filter_radius=3,
125
+ index_rate=0.3,
126
+ volume_envelope=1.0,
127
+ protect=0.33,
128
+ hop_length=128,
129
+ f0_method="rmvpe",
130
+ split_audio=False,
131
+ export_format="WAV",
132
+ embedder_model="contentvec",
133
+ sid=0,
134
+ )
135
+
136
+
137
+ # ─── 2. VOICE‑CONVERT ENDPOINT ─────────────────────────────────────────────
138
+ import zipfile, itertools
139
+ @app.post("/voice-convert", response_class=FileResponse)
140
+ async def voice_convert(req: VoiceConversionRequest, background: BackgroundTasks):
141
+ tmp = Path(tempfile.mkdtemp(prefix="rvc_"))
142
+ background.add_task(shutil.rmtree, tmp, ignore_errors=True)
143
+
144
+ # 2‑a. download audio & model ZIP
145
+ wav_src = tmp / Path(req.input_url.path).name
146
+ model_zip = tmp / "model.zip"
147
+ await asyncio.gather(
148
+ _download(str(req.input_url), wav_src),
149
+ _download(str(req.model_url), model_zip),
150
+ )
151
+
152
+ # 2‑b. extract ZIP (nested folders ok)
153
+ extract_dir = tmp / "model"
154
+ extract_dir.mkdir(exist_ok=True)
155
+ try:
156
+ with zipfile.ZipFile(model_zip) as zf:
157
+ zf.extractall(extract_dir)
158
+
159
+ # locate first *.pth and *.index anywhere in the tree
160
+ pth_path = next(itertools.chain(extract_dir.rglob("*.pth")), None)
161
+ index_path = next(itertools.chain(extract_dir.rglob("*.index")), None)
162
+ if not pth_path or not index_path:
163
+ raise HTTPException(status_code=400, detail="ZIP does not contain .pth and .index")
164
+ except zipfile.BadZipFile:
165
+ raise HTTPException(status_code=400, detail="Uploaded model is not a valid ZIP")
166
+
167
+ # 2‑c. make sure input is WAV
168
+ wav_for_rvc = _ensure_wav(wav_src, tmp)
169
+ out_wav = tmp / f"{wav_for_rvc.stem}_output.wav"
170
+
171
+ # 2‑d. run conversion in a worker thread
172
+ try:
173
+ await asyncio.to_thread(_voice_convert, wav_for_rvc, out_wav, pth_path, index_path, req.pitch)
174
+ except Exception as e:
175
+ raise HTTPException(status_code=500, detail=f"Conversion failed: {e}")
176
+
177
+ # 2‑e. schedule cleanup of every artefact
178
+ for p in (wav_src, model_zip, out_wav, pth_path, index_path):
179
+ background.add_task(p.unlink, missing_ok=True)
180
+ background.add_task(shutil.rmtree, extract_dir, ignore_errors=True)
181
+
182
+ return FileResponse(
183
+ path=out_wav,
184
+ media_type="audio/wav",
185
+ filename=f"{uuid.uuid4().hex}.wav",
186
+ background=background,
187
+ )
188
+
189
+
190
+ # ────────────────────────── UVR VOCALΒ REMOVAL ───────────────────────────────
191
+ class UVRRequest(BaseModel):
192
+ audio_url: HttpUrl
193
+ model_filename: str # e.g. "2_HP-UVR.pth"
194
+
195
+
196
+ # ──────────────── patched helper ───────────────────────────────────────────
197
+ def _uvr_separate(audio_path: Path, model_filename: str, out_dir: Path) -> list[Path]:
198
+ from uvr.separator import Separator
199
+
200
+ sep = Separator(
201
+ model_file_dir="uvr/tmp/audio-separator-models/",
202
+ output_dir=str(out_dir),
203
+ output_format="MP3",
204
+ normalization_threshold=0.9,
205
+ )
206
+ sep.load_model(model_filename=model_filename)
207
+
208
+ raw_paths: list[str] = sep.separate(str(audio_path))
209
+
210
+ # --- NEW: make sure every path is absolute & exists --------------------
211
+ abs_paths: list[Path] = []
212
+ for p in raw_paths:
213
+ p_path = Path(p)
214
+ if not p_path.is_absolute():
215
+ p_path = out_dir / p_path # <─ key fix
216
+ p_path = p_path.resolve()
217
+ if not p_path.exists():
218
+ raise RuntimeError(f"UVR reported missing file: {p_path}")
219
+ abs_paths.append(p_path)
220
+ # ----------------------------------------------------------------------
221
+
222
+ return abs_paths
223
+
224
+ @app.post("/uvr-remove", response_class=JSONResponse)
225
+ async def uvr_remove(req: UVRRequest, background: BackgroundTasks):
226
+ tmp = Path(tempfile.mkdtemp(prefix="uvr_"))
227
+ background.add_task(shutil.rmtree, tmp, ignore_errors=True)
228
+
229
+ # 1. download the audio to be separated
230
+ src = tmp / Path(req.audio_url.path).name
231
+ await _download(str(req.audio_url), src)
232
+
233
+ # 2. call UVR
234
+ try:
235
+ stems = await asyncio.to_thread(_uvr_separate, src, req.model_filename, tmp)
236
+ except FileNotFoundError:
237
+ raise HTTPException(status_code=400, detail="Model file not found.")
238
+ except Exception as e:
239
+ raise HTTPException(status_code=500, detail=f"UVR failed: {e}")
240
+
241
+ if not stems or len(stems) < 2:
242
+ raise HTTPException(status_code=500, detail="UVR did not produce two stems.")
243
+
244
+ # 3. find which stem is vocals / instrumental (UVR naming convention)
245
+ vocals_path = next((p for p in stems if "vocal" in p.stem.lower()), stems[0])
246
+ inst_path = next((p for p in stems if "instrumental" in p.stem.lower()), stems[1])
247
+
248
+ def _b64(path: Path) -> str:
249
+ """read file and base‑64 encode β†’ str (ascii)"""
250
+ return base64.b64encode(path.read_bytes()).decode("ascii")
251
+
252
+ # 4. encode and build response
253
+ payload = {
254
+ "vocals": {
255
+ "filename": vocals_path.name,
256
+ "base64_wav": _b64(vocals_path),
257
+ },
258
+ "instrumental": {
259
+ "filename": inst_path.name,
260
+ "base64_wav": _b64(inst_path),
261
+ },
262
+ }
263
+
264
+ # 5. clean up temp artefacts
265
+ for p in stems + [src]:
266
+ background.add_task(p.unlink, missing_ok=True)
267
+
268
+ return JSONResponse(content=payload)