smjain commited on
Commit
6cfe9c0
1 Parent(s): 817f74f

Upload 2 files

Browse files
Files changed (2) hide show
  1. myinfer_latest.py +715 -676
  2. requirements.txt +28 -26
myinfer_latest.py CHANGED
@@ -1,676 +1,715 @@
1
- import torch, os, traceback, sys, warnings, shutil, numpy as np
2
- import gradio as gr
3
- import librosa
4
- import asyncio
5
- import rarfile
6
- import edge_tts
7
- import yt_dlp
8
- import ffmpeg
9
- import gdown
10
- import subprocess
11
- import wave
12
- import soundfile as sf
13
- from scipy.io import wavfile
14
- from datetime import datetime
15
- from urllib.parse import urlparse
16
- from mega import Mega
17
- from flask import Flask, request, jsonify, send_file,session,render_template
18
- import base64
19
- import tempfile
20
- import threading
21
- import hashlib
22
- import os
23
- import werkzeug
24
- from pydub import AudioSegment
25
- import uuid
26
- from threading import Semaphore
27
- from threading import Lock
28
- from multiprocessing import Process, SimpleQueue, set_start_method,get_context
29
- from queue import Empty
30
- from pydub import AudioSegment
31
- from flask_dance.contrib.google import make_google_blueprint, google
32
- import io
33
- from space import ensure_model_in_weights_dir,upload_to_do
34
- import boto3
35
-
36
-
37
-
38
-
39
-
40
- app = Flask(__name__)
41
- app.secret_key = 'smjain_6789'
42
- now_dir = os.getcwd()
43
- cpt={}
44
- tmp = os.path.join(now_dir, "TEMP")
45
- shutil.rmtree(tmp, ignore_errors=True)
46
- os.makedirs(tmp, exist_ok=True)
47
- os.environ["TEMP"] = tmp
48
- split_model="htdemucs"
49
- convert_voice_lock = Lock()
50
- #concurrent= os.getenv('concurrent', '')
51
- # Define the maximum number of concurrent requests
52
- MAX_CONCURRENT_REQUESTS=10
53
-
54
-
55
- # Initialize the semaphore with the maximum number of concurrent requests
56
- request_semaphore = Semaphore(MAX_CONCURRENT_REQUESTS)
57
-
58
- task_status_tracker = {}
59
- os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1" # ONLY FOR TESTING, REMOVE IN PRODUCTION
60
- os.environ["OAUTHLIB_RELAX_TOKEN_SCOPE"] = "1"
61
- app.config["GOOGLE_OAUTH_CLIENT_ID"] = "144930881143-n3e3ubers3vkq7jc9doe4iirasgimdt2.apps.googleusercontent.com"
62
- app.config["GOOGLE_OAUTH_CLIENT_SECRET"] = "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"
63
- google_blueprint = make_google_blueprint(scope=["profile", "email"])
64
- app.register_blueprint(google_blueprint, url_prefix="/login")
65
- ACCESS_ID = os.getenv('ACCESS_ID', '')
66
- SECRET_KEY = os.getenv('SECRET_KEY', '')
67
-
68
-
69
- #set_start_method('spawn', force=True)
70
- from lib.infer_pack.models import (
71
- SynthesizerTrnMs256NSFsid,
72
- SynthesizerTrnMs256NSFsid_nono,
73
- SynthesizerTrnMs768NSFsid,
74
- SynthesizerTrnMs768NSFsid_nono,
75
- )
76
- from fairseq import checkpoint_utils
77
- from vc_infer_pipeline import VC
78
- from config import Config
79
- config = Config()
80
-
81
- tts_voice_list = asyncio.get_event_loop().run_until_complete(edge_tts.list_voices())
82
- voices = [f"{v['ShortName']}-{v['Gender']}" for v in tts_voice_list]
83
-
84
- hubert_model = None
85
-
86
- f0method_mode = ["pm", "harvest", "crepe"]
87
- f0method_info = "PM is fast, Harvest is good but extremely slow, and Crepe effect is good but requires GPU (Default: PM)"
88
-
89
- @app.route("/")
90
- def index():
91
- # Check if user is logged in
92
- return render_template("ui.html")
93
- #if google.authorized:
94
- # return render_template("index.html", logged_in=True)
95
- #else:
96
- # return render_template("index.html", logged_in=False)
97
-
98
-
99
-
100
-
101
- if os.path.isfile("rmvpe.pt"):
102
- f0method_mode.insert(2, "rmvpe")
103
- f0method_info = "PM is fast, Harvest is good but extremely slow, Rvmpe is alternative to harvest (might be better), and Crepe effect is good but requires GPU (Default: PM)"
104
-
105
-
106
-
107
-
108
- def load_hubert():
109
- global hubert_model
110
- models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
111
- ["hubert_base.pt"],
112
- suffix="",
113
- )
114
- hubert_model = models[0]
115
- hubert_model = hubert_model.to(config.device)
116
- if config.is_half:
117
- hubert_model = hubert_model.half()
118
- else:
119
- hubert_model = hubert_model.float()
120
- hubert_model.eval()
121
-
122
- load_hubert()
123
-
124
- weight_root = "weights"
125
- index_root = "weights/index"
126
- weights_model = []
127
- weights_index = []
128
- for _, _, model_files in os.walk(weight_root):
129
- for file in model_files:
130
- if file.endswith(".pth"):
131
- weights_model.append(file)
132
- for _, _, index_files in os.walk(index_root):
133
- for file in index_files:
134
- if file.endswith('.index') and "trained" not in file:
135
- weights_index.append(os.path.join(index_root, file))
136
-
137
- def check_models():
138
- weights_model = []
139
- weights_index = []
140
- for _, _, model_files in os.walk(weight_root):
141
- for file in model_files:
142
- if file.endswith(".pth"):
143
- weights_model.append(file)
144
- for _, _, index_files in os.walk(index_root):
145
- for file in index_files:
146
- if file.endswith('.index') and "trained" not in file:
147
- weights_index.append(os.path.join(index_root, file))
148
- return (
149
- gr.Dropdown.update(choices=sorted(weights_model), value=weights_model[0]),
150
- gr.Dropdown.update(choices=sorted(weights_index))
151
- )
152
-
153
- def clean():
154
- return (
155
- gr.Dropdown.update(value=""),
156
- gr.Slider.update(visible=False)
157
- )
158
- # Function to delete files
159
- def cleanup_files(file_paths):
160
- for path in file_paths:
161
- try:
162
- os.remove(path)
163
- print(f"Deleted {path}")
164
- except Exception as e:
165
- print(f"Error deleting {path}: {e}")
166
-
167
- @app.route("/create_song")
168
- def create_song():
169
- if not google.authorized:
170
- return redirect(url_for("google.login"))
171
- resp = google.get("/oauth2/v2/userinfo")
172
- assert resp.ok, resp.text
173
- email = resp.json()["email"]
174
- user_info = resp.json()
175
- user_id = user_info.get("id")
176
- name = user_info.get("name")
177
-
178
- #if not user_exists(email):
179
- # user_data = {'user_id': user_id, 'user_name': name, 'email': email, 'model_created': 'No', 'time_used': '0','model_id':''}
180
- # add_user(user_data)
181
-
182
- #models = get_user_models(email)
183
-
184
- # Assuming we're interested in whether any model has been created
185
- #model_exists = len(models) > 0
186
- return render_template("ui.html", email=email)
187
-
188
- @app.route('/download/<filename>', methods=['GET'])
189
- def download_file(filename):
190
- # Configure the client with your credentials
191
- session = boto3.session.Session()
192
- client = session.client('s3',
193
- region_name='nyc3',
194
- endpoint_url='https://nyc3.digitaloceanspaces.com',
195
- aws_access_key_id=ACCESS_ID,
196
- aws_secret_access_key=SECRET_KEY)
197
-
198
- # Define the bucket and object key
199
- bucket_name = 'sing' # Your bucket name
200
- object_key = f'{filename}' # Construct the object key
201
-
202
- # Define the local path to save the file
203
- local_file_path = os.path.join('weights', filename)
204
-
205
- # Download the file from the bucket
206
- try:
207
- client.download_file(bucket_name, object_key, local_file_path)
208
- except client.exceptions.NoSuchKey:
209
- return jsonify({'error': 'File not found in the bucket'}), 404
210
- except Exception as e:
211
- return jsonify({'error': str(e)}), 500
212
-
213
- # Optional: Send the file directly to the client
214
- # return send_file(local_file_path, as_attachment=True)
215
-
216
- return jsonify({'success': True, 'message': 'File downloaded successfully', 'file_path': local_file_path})
217
-
218
- @app.route('/list-weights', methods=['GET'])
219
- def list_weights():
220
- directory = 'weights'
221
- files = os.listdir(directory)
222
- email = request.args.get('email', default='')
223
- if not email:
224
- return jsonify({"error": "Email parameter is required"}), 400
225
- list_models(email)
226
- # Extract filenames without their extensions
227
- filenames = [os.path.splitext(file)[0] for file in files if os.path.isfile(os.path.join(directory, file))]
228
- return jsonify(filenames)
229
-
230
- @app.route("/logout")
231
- def logout():
232
- # Clear the session
233
- session.clear()
234
- #if "google_oauth_token" in session:
235
- # del session["google_oauth_token"]
236
- return redirect(url_for("index"))
237
-
238
-
239
- @app.route('/status/<audio_id>', methods=['GET'])
240
- def get_status(audio_id):
241
- # Retrieve the task status using the unique ID
242
- print(audio_id)
243
- status_info = task_status_tracker.get(audio_id, {"status": "Unknown ID", "percentage": 0})
244
- return jsonify({"audio_id": audio_id, "status": status_info["status"], "percentage": status_info["percentage"]})
245
-
246
- processed_audio_storage = {}
247
- @app.route('/convert_voice', methods=['POST'])
248
- def api_convert_voice():
249
- acquired = request_semaphore.acquire(blocking=False)
250
-
251
- if not acquired:
252
- return jsonify({"error": "Too many requests, please try again later"}), 429
253
- #task_status_tracker[unique_id] = {"status": "Starting", "percentage": 0}
254
- try:
255
-
256
- #if session.get('submitted'):
257
- # return jsonify({"error": "Form already submitted"}), 400
258
-
259
- # Process the form here...
260
- # Set the flag indicating the form has been submitted
261
- #session['submitted'] = True
262
- print(request.form)
263
- print(request.files)
264
- print("accessing spk_id")
265
- spk_id = request.form['spk_id']+'.pth'
266
- print("speaker id path=",spk_id)
267
- voice_transform = request.form['voice_transform']
268
- print("before file access")
269
- # The file part
270
- if 'file' not in request.files:
271
- return jsonify({"error": "No file part"}), 400
272
- file = request.files['file']
273
- if file.filename == '':
274
- return jsonify({"error": "No selected file"}), 400
275
-
276
- if file.content_length > 10 * 1024 * 1024:
277
- return jsonify({"error": "File size exceeds 6 MB"}), 400
278
-
279
- print("after file access")
280
- print("check if model is there in weights dir or not")
281
- filename_without_extension = os.path.splitext(file.filename)[0]
282
- unique_id = filename_without_extension
283
- ensure_model_in_weights_dir(spk_id)
284
- print("checking done for the model")
285
- content_type_format_map = {
286
- 'audio/mpeg': 'mp3',
287
- 'audio/wav': 'wav',
288
- 'audio/x-wav': 'wav',
289
- 'audio/mp4': 'mp4',
290
- 'audio/x-m4a': 'mp4',
291
- }
292
-
293
- # Default to 'mp3' if content type is unknown (or adjust as needed)
294
- audio_format = content_type_format_map.get(file.content_type, 'mp3')
295
-
296
- # Convert the uploaded file to an audio segment
297
- audio = AudioSegment.from_file(io.BytesIO(file.read()), format=audio_format)
298
-
299
- #audio = AudioSegment.from_file(io.BytesIO(file.read()), format="mp3") # Adjust format as necessary
300
- file.seek(0) # Reset file pointer after reading
301
-
302
- # Calculate audio length in minutes
303
- audio_length_minutes = len(audio) / 60000.0 # pydub returns length in milliseconds
304
-
305
- if audio_length_minutes > 5:
306
- return jsonify({"error": "Audio length exceeds 5 minutes"}), 400
307
-
308
- #created_files = []
309
- # Save the file to a temporary path
310
- #unique_id = str(uuid.uuid4())
311
- print(unique_id)
312
-
313
- filename = werkzeug.utils.secure_filename(file.filename)
314
- input_audio_path = os.path.join(tmp, f"{spk_id}_input_audio_{unique_id}.{filename.split('.')[-1]}")
315
- file.save(input_audio_path)
316
-
317
- #created_files.append(input_audio_path)
318
-
319
- #split audio
320
- task_status_tracker[unique_id] = {"status": "Processing: Step 1", "percentage": 30}
321
-
322
- cut_vocal_and_inst(input_audio_path,spk_id,unique_id)
323
- print("audio splitting performed")
324
- vocal_path = f"output/{spk_id}_{unique_id}/{split_model}/{spk_id}_input_audio_{unique_id}/vocals.wav"
325
- inst = f"output/{spk_id}_{unique_id}/{split_model}/{spk_id}_input_audio_{unique_id}/no_vocals.wav"
326
- print("*****before making call to convert ", unique_id)
327
- #task_status_tracker[unique_id] = "Processing: Step 2"
328
- #output_queue = SimpleQueue()
329
- ctx = get_context('spawn')
330
- output_queue = ctx.Queue()
331
- # Create and start the process
332
- p = ctx.Process(target=worker, args=(spk_id, vocal_path, voice_transform, unique_id, output_queue,))
333
- p.start()
334
-
335
- # Wait for the process to finish and get the result
336
- p.join()
337
- print("*******waiting for process to complete ")
338
-
339
- output_path = output_queue.get()
340
- task_status_tracker[unique_id] = {"status": "Processing: Step 2", "percentage": 80}
341
- #if isinstance(output_path, Exception):
342
- # print("Exception in worker:", output_path)
343
- #else:
344
- # print("output path of converted voice", output_path)
345
- #output_path = convert_voice(spk_id, vocal_path, voice_transform,unique_id)
346
- output_path1= combine_vocal_and_inst(output_path,inst,unique_id)
347
-
348
- processed_audio_storage[unique_id] = output_path1
349
- session['processed_audio_id'] = unique_id
350
- task_status_tracker[unique_id] = {"status": "Finalizing", "percentage": 100}
351
- print(output_path1)
352
-
353
- #created_files.extend([vocal_path, inst, output_path])
354
- task_status_tracker[unique_id]["status"] = "Completed"
355
- upload_to_do(output_path1)
356
- print("file uploaded to Digital ocean space")
357
-
358
- return jsonify({"message": "File processed successfully", "audio_id": unique_id}), 200
359
- finally:
360
- request_semaphore.release()
361
- #if os.path.exists(output_path1):
362
-
363
- # return send_file(output_path1, as_attachment=True)
364
- #else:
365
- # return jsonify({"error": "File not found."}), 404
366
-
367
- def convert_voice_thread_safe(spk_id, vocal_path, voice_transform, unique_id):
368
- with convert_voice_lock:
369
- return convert_voice(spk_id, vocal_path, voice_transform, unique_id)
370
-
371
-
372
-
373
- def get_vc_safe(sid, to_return_protect0):
374
- with convert_voice_lock:
375
- return get_vc(sid, to_return_protect0)
376
-
377
- @app.route('/')
378
- def upload_form():
379
- return render_template('ui.html')
380
-
381
- @app.route('/get_processed_audio/<audio_id>')
382
- def get_processed_audio(audio_id):
383
- # Retrieve the path from temporary storage or session
384
- if audio_id in processed_audio_storage:
385
- file_path = processed_audio_storage[audio_id]
386
- return send_file(file_path, as_attachment=True)
387
- return jsonify({"error": "File not found."}), 404
388
-
389
- def worker(spk_id, input_audio_path, voice_transform, unique_id, output_queue):
390
- try:
391
-
392
- output_audio_path = convert_voice(spk_id, input_audio_path, voice_transform, unique_id)
393
- print("output in worker for audio file", output_audio_path)
394
- output_queue.put(output_audio_path)
395
- print("added to output queue")
396
- except Exception as e:
397
- print("exception in adding to queue")
398
- output_queue.put(e) # Send the exception to the main process for debugging
399
-
400
-
401
- def convert_voice(spk_id, input_audio_path, voice_transform,unique_id):
402
- get_vc(spk_id,0.5)
403
- print("*****before makinf call to vc ", unique_id)
404
-
405
-
406
- output_audio_path = vc_single(
407
- sid=0,
408
- input_audio_path=input_audio_path,
409
- f0_up_key=voice_transform, # Assuming voice_transform corresponds to f0_up_key
410
- f0_file=None ,
411
- f0_method="rmvpe",
412
- file_index=spk_id, # Assuming file_index_path corresponds to file_index
413
- index_rate=0.75,
414
- filter_radius=3,
415
- resample_sr=0,
416
- rms_mix_rate=0.25,
417
- protect=0.33, # Adjusted from protect_rate to protect to match the function signature,
418
- unique_id=unique_id
419
- )
420
- print(output_audio_path)
421
- return output_audio_path
422
-
423
- def cut_vocal_and_inst(audio_path,spk_id,unique_id):
424
-
425
- vocal_path = "output/result/audio.wav"
426
- os.makedirs("output/result", exist_ok=True)
427
- #wavfile.write(vocal_path, audio_data[0], audio_data[1])
428
- #logs.append("Starting the audio splitting process...")
429
- #yield "\n".join(logs), None, None
430
- print("before executing splitter")
431
- command = f"demucs --two-stems=vocals -n {split_model} {audio_path} -o output/{spk_id}_{unique_id}"
432
- env = os.environ.copy()
433
-
434
- # Add or modify the environment variable for this subprocess
435
- env["CUDA_VISIBLE_DEVICES"] = "0"
436
-
437
-
438
-
439
- #result = subprocess.Popen(command.split(), stdout=subprocess.PIPE, text=True)
440
- result = subprocess.run(command.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
441
- if result.returncode != 0:
442
- print("Demucs process failed:", result.stderr)
443
- else:
444
- print("Demucs process completed successfully.")
445
- print("after executing splitter")
446
- #for line in result.stdout:
447
- # logs.append(line)
448
- # yield "\n".join(logs), None, None
449
-
450
- print(result.stdout)
451
- vocal = f"output/{split_model}/{spk_id}_input_audio/vocals.wav"
452
- inst = f"output/{split_model}/{spk_id}_input_audio/no_vocals.wav"
453
- #logs.append("Audio splitting complete.")
454
-
455
-
456
- def combine_vocal_and_inst(vocal_path, inst_path, output_path):
457
-
458
- vocal_volume=1
459
- inst_volume=1
460
- os.makedirs("output/result", exist_ok=True)
461
- # Assuming vocal_path and inst_path are now directly passed as arguments
462
- output_path = f"output/result/{output_path}.mp3"
463
- #command = f'ffmpeg -y -i "{inst_path}" -i "{vocal_path}" -filter_complex [0:a]volume={inst_volume}[i];[1:a]volume={vocal_volume}[v];[i][v]amix=inputs=2:duration=longest[a] -map [a] -b:a 320k -c:a libmp3lame "{output_path}"'
464
- #command=f'ffmpeg -y -i "{inst_path}" -i "{vocal_path}" -filter_complex "amix=inputs=2:duration=longest" -b:a 320k -c:a libmp3lame "{output_path}"'
465
- # Load the audio files
466
- print(vocal_path)
467
- print(inst_path)
468
- vocal = AudioSegment.from_file(vocal_path)
469
- instrumental = AudioSegment.from_file(inst_path)
470
-
471
- # Overlay the vocal track on top of the instrumental track
472
- combined = vocal.overlay(instrumental)
473
-
474
- # Export the result
475
- combined.export(output_path, format="mp3")
476
-
477
- #result = subprocess.run(command.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
478
- return output_path
479
-
480
-
481
-
482
- def vc_single(
483
- sid,
484
- input_audio_path,
485
- f0_up_key,
486
- f0_file,
487
- f0_method,
488
- file_index,
489
- index_rate,
490
- filter_radius,
491
- resample_sr,
492
- rms_mix_rate,
493
- protect,
494
- unique_id
495
- ): # spk_item, input_audio0, vc_transform0,f0_file,f0method0
496
- global tgt_sr, net_g, vc, hubert_model, version, cpt
497
- print("***** in vc ", unique_id)
498
-
499
- try:
500
- logs = []
501
- print(f"Converting...")
502
-
503
- audio, sr = librosa.load(input_audio_path, sr=16000, mono=True)
504
- print(f"found audio ")
505
- f0_up_key = int(f0_up_key)
506
- times = [0, 0, 0]
507
- if hubert_model == None:
508
- load_hubert()
509
- print("loaded hubert")
510
- if_f0 = 1
511
- audio_opt = vc.pipeline(
512
- hubert_model,
513
- net_g,
514
- 0,
515
- audio,
516
- input_audio_path,
517
- times,
518
- f0_up_key,
519
- f0_method,
520
- file_index,
521
- # file_big_npy,
522
- index_rate,
523
- if_f0,
524
- filter_radius,
525
- tgt_sr,
526
- resample_sr,
527
- rms_mix_rate,
528
- version,
529
- protect,
530
- f0_file=f0_file
531
- )
532
-
533
-
534
- # Get the current thread's name or ID
535
-
536
-
537
-
538
- if resample_sr >= 16000 and tgt_sr != resample_sr:
539
- tgt_sr = resample_sr
540
- index_info = (
541
- "Using index:%s." % file_index
542
- if os.path.exists(file_index)
543
- else "Index not used."
544
- )
545
-
546
- print("writing to FS")
547
- #output_file_path = os.path.join("output", f"converted_audio_{sid}.wav") # Adjust path as needed
548
- # Assuming 'unique_id' is passed to convert_voice function along with 'sid'
549
- print("***** before writing to file outout ", unique_id)
550
- output_file_path = os.path.join("output", f"converted_audio_{sid}_{unique_id}.wav") # Adjust path as needed
551
-
552
- print("******* output file path ",output_file_path)
553
- os.makedirs(os.path.dirname(output_file_path), exist_ok=True) # Create the output directory if it doesn't exist
554
- print("create dir")
555
- # Save the audio file using the target sampling rate
556
- sf.write(output_file_path, audio_opt, tgt_sr)
557
-
558
- print("wrote to FS")
559
-
560
- # Return the path to the saved file along with any other information
561
-
562
- return output_file_path
563
-
564
-
565
- except:
566
- info = traceback.format_exc()
567
-
568
- return info, (None, None)
569
-
570
-
571
-
572
-
573
- def get_vc(sid, to_return_protect0):
574
- global n_spk, tgt_sr, net_g, vc, cpt, version, weights_index
575
- if sid == "" or sid == []:
576
- global hubert_model
577
- if hubert_model is not None: # 考虑到轮询, 需要加个判断看是否 sid 是由有模型切换到无模型的
578
- print("clean_empty_cache")
579
- del net_g, n_spk, vc, hubert_model, tgt_sr # ,cpt
580
- hubert_model = net_g = n_spk = vc = hubert_model = tgt_sr = None
581
- if torch.cuda.is_available():
582
- torch.cuda.empty_cache()
583
- ###楼下不这么折腾清理不干净
584
- if_f0 = cpt[sid].get("f0", 1)
585
- version = cpt[sid].get("version", "v1")
586
- if version == "v1":
587
- if if_f0 == 1:
588
- net_g = SynthesizerTrnMs256NSFsid(
589
- *cpt[sid]["config"], is_half=config.is_half
590
- )
591
- else:
592
- net_g = SynthesizerTrnMs256NSFsid_nono(*cpt[sid]["config"])
593
- elif version == "v2":
594
- if if_f0 == 1:
595
- net_g = SynthesizerTrnMs768NSFsid(
596
- *cpt[sid]["config"], is_half=config.is_half
597
- )
598
- else:
599
- net_g = SynthesizerTrnMs768NSFsid_nono(*cpt[sid]["config"])
600
- del net_g, cpt
601
- if torch.cuda.is_available():
602
- torch.cuda.empty_cache()
603
- cpt = None
604
- return (
605
- gr.Slider.update(maximum=2333, visible=False),
606
- gr.Slider.update(visible=True),
607
- gr.Dropdown.update(choices=sorted(weights_index), value=""),
608
- gr.Markdown.update(value="# <center> No model selected")
609
- )
610
- print(f"Loading {sid} model...")
611
- selected_model = sid[:-4]
612
- cpt[sid] = torch.load(os.path.join(weight_root, sid), map_location="cpu")
613
- tgt_sr = cpt[sid]["config"][-1]
614
- cpt[sid]["config"][-3] = cpt[sid]["weight"]["emb_g.weight"].shape[0]
615
- if_f0 = cpt[sid].get("f0", 1)
616
- if if_f0 == 0:
617
- to_return_protect0 = {
618
- "visible": False,
619
- "value": 0.5,
620
- "__type__": "update",
621
- }
622
- else:
623
- to_return_protect0 = {
624
- "visible": True,
625
- "value": to_return_protect0,
626
- "__type__": "update",
627
- }
628
- version = cpt[sid].get("version", "v1")
629
- if version == "v1":
630
- if if_f0 == 1:
631
- net_g = SynthesizerTrnMs256NSFsid(*cpt[sid]["config"], is_half=config.is_half)
632
- else:
633
- net_g = SynthesizerTrnMs256NSFsid_nono(*cpt[sid]["config"])
634
- elif version == "v2":
635
- if if_f0 == 1:
636
- net_g = SynthesizerTrnMs768NSFsid(*cpt[sid]["config"], is_half=config.is_half)
637
- else:
638
- net_g = SynthesizerTrnMs768NSFsid_nono(*cpt[sid]["config"])
639
- del net_g.enc_q
640
- print(net_g.load_state_dict(cpt[sid]["weight"], strict=False))
641
- net_g.eval().to(config.device)
642
- if config.is_half:
643
- net_g = net_g.half()
644
- else:
645
- net_g = net_g.float()
646
- vc = VC(tgt_sr, config)
647
- n_spk = cpt[sid]["config"][-3]
648
- weights_index = []
649
- for _, _, index_files in os.walk(index_root):
650
- for file in index_files:
651
- if file.endswith('.index') and "trained" not in file:
652
- weights_index.append(os.path.join(index_root, file))
653
- if weights_index == []:
654
- selected_index = gr.Dropdown.update(value="")
655
- else:
656
- selected_index = gr.Dropdown.update(value=weights_index[0])
657
- for index, model_index in enumerate(weights_index):
658
- if selected_model in model_index:
659
- selected_index = gr.Dropdown.update(value=weights_index[index])
660
- break
661
- return (
662
- gr.Slider.update(maximum=n_spk, visible=True),
663
- to_return_protect0,
664
- selected_index,
665
- gr.Markdown.update(
666
- f'## <center> {selected_model}\n'+
667
- f'### <center> RVC {version} Model'
668
- )
669
- )
670
-
671
-
672
-
673
-
674
-
675
- if __name__ == '__main__':
676
- app.run(debug=False, port=5000,host='0.0.0.0')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch, os, traceback, sys, warnings, shutil, numpy as np
2
+ import gradio as gr
3
+ import librosa
4
+ import asyncio
5
+ import rarfile
6
+ import edge_tts
7
+ import yt_dlp
8
+ import ffmpeg
9
+ import gdown
10
+ import subprocess
11
+ import wave
12
+ import soundfile as sf
13
+ from scipy.io import wavfile
14
+ from datetime import datetime
15
+ from urllib.parse import urlparse
16
+ from mega import Mega
17
+ from flask import Flask, request, jsonify, send_file,session,render_template
18
+ import base64
19
+ import tempfile
20
+ import threading
21
+ import hashlib
22
+ import os
23
+ import werkzeug
24
+ from pydub import AudioSegment
25
+ import uuid
26
+ from threading import Semaphore
27
+ from threading import Lock
28
+ from multiprocessing import Process, SimpleQueue, set_start_method,get_context
29
+ from queue import Empty
30
+ from pydub import AudioSegment
31
+ from flask_dance.contrib.google import make_google_blueprint, google
32
+ import io
33
+ from space import ensure_model_in_weights_dir,upload_to_do
34
+ import boto3
35
+ from moviepy.editor import *
36
+ import os
37
+
38
+
39
+
40
+
41
+
42
+ app = Flask(__name__)
43
+ app.secret_key = 'smjain_6789'
44
+ now_dir = os.getcwd()
45
+ cpt={}
46
+ tmp = os.path.join(now_dir, "TEMP")
47
+ shutil.rmtree(tmp, ignore_errors=True)
48
+ os.makedirs(tmp, exist_ok=True)
49
+ os.environ["TEMP"] = tmp
50
+ split_model="htdemucs"
51
+ convert_voice_lock = Lock()
52
+ #concurrent= os.getenv('concurrent', '')
53
+ # Define the maximum number of concurrent requests
54
+ MAX_CONCURRENT_REQUESTS=10
55
+
56
+
57
+ # Initialize the semaphore with the maximum number of concurrent requests
58
+ request_semaphore = Semaphore(MAX_CONCURRENT_REQUESTS)
59
+
60
+ task_status_tracker = {}
61
+ os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1" # ONLY FOR TESTING, REMOVE IN PRODUCTION
62
+ os.environ["OAUTHLIB_RELAX_TOKEN_SCOPE"] = "1"
63
+ app.config["GOOGLE_OAUTH_CLIENT_ID"] = "144930881143-n3e3ubers3vkq7jc9doe4iirasgimdt2.apps.googleusercontent.com"
64
+ app.config["GOOGLE_OAUTH_CLIENT_SECRET"] = "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"
65
+ google_blueprint = make_google_blueprint(scope=["profile", "email"])
66
+ app.register_blueprint(google_blueprint, url_prefix="/login")
67
+ ACCESS_ID = os.getenv('ACCESS_ID', '')
68
+ SECRET_KEY = os.getenv('SECRET_KEY', '')
69
+
70
+
71
+ #set_start_method('spawn', force=True)
72
+ from lib.infer_pack.models import (
73
+ SynthesizerTrnMs256NSFsid,
74
+ SynthesizerTrnMs256NSFsid_nono,
75
+ SynthesizerTrnMs768NSFsid,
76
+ SynthesizerTrnMs768NSFsid_nono,
77
+ )
78
+ from fairseq import checkpoint_utils
79
+ from vc_infer_pipeline import VC
80
+ from config import Config
81
+ config = Config()
82
+
83
+ tts_voice_list = asyncio.get_event_loop().run_until_complete(edge_tts.list_voices())
84
+ voices = [f"{v['ShortName']}-{v['Gender']}" for v in tts_voice_list]
85
+
86
+ hubert_model = None
87
+
88
+ f0method_mode = ["pm", "harvest", "crepe"]
89
+ f0method_info = "PM is fast, Harvest is good but extremely slow, and Crepe effect is good but requires GPU (Default: PM)"
90
+
91
+ @app.route("/")
92
+ def index():
93
+ # Check if user is logged in
94
+ return render_template("ui.html")
95
+ #if google.authorized:
96
+ # return render_template("index.html", logged_in=True)
97
+ #else:
98
+ # return render_template("index.html", logged_in=False)
99
+
100
+
101
+
102
+
103
+ if os.path.isfile("rmvpe.pt"):
104
+ f0method_mode.insert(2, "rmvpe")
105
+ f0method_info = "PM is fast, Harvest is good but extremely slow, Rvmpe is alternative to harvest (might be better), and Crepe effect is good but requires GPU (Default: PM)"
106
+
107
+
108
+
109
+
110
+ def load_hubert():
111
+ global hubert_model
112
+ models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
113
+ ["hubert_base.pt"],
114
+ suffix="",
115
+ )
116
+ hubert_model = models[0]
117
+ hubert_model = hubert_model.to(config.device)
118
+ if config.is_half:
119
+ hubert_model = hubert_model.half()
120
+ else:
121
+ hubert_model = hubert_model.float()
122
+ hubert_model.eval()
123
+
124
+ load_hubert()
125
+
126
+ weight_root = "weights"
127
+ index_root = "weights/index"
128
+ weights_model = []
129
+ weights_index = []
130
+ for _, _, model_files in os.walk(weight_root):
131
+ for file in model_files:
132
+ if file.endswith(".pth"):
133
+ weights_model.append(file)
134
+ for _, _, index_files in os.walk(index_root):
135
+ for file in index_files:
136
+ if file.endswith('.index') and "trained" not in file:
137
+ weights_index.append(os.path.join(index_root, file))
138
+
139
+ def check_models():
140
+ weights_model = []
141
+ weights_index = []
142
+ for _, _, model_files in os.walk(weight_root):
143
+ for file in model_files:
144
+ if file.endswith(".pth"):
145
+ weights_model.append(file)
146
+ for _, _, index_files in os.walk(index_root):
147
+ for file in index_files:
148
+ if file.endswith('.index') and "trained" not in file:
149
+ weights_index.append(os.path.join(index_root, file))
150
+ return (
151
+ gr.Dropdown.update(choices=sorted(weights_model), value=weights_model[0]),
152
+ gr.Dropdown.update(choices=sorted(weights_index))
153
+ )
154
+
155
+ def clean():
156
+ return (
157
+ gr.Dropdown.update(value=""),
158
+ gr.Slider.update(visible=False)
159
+ )
160
+ # Function to delete files
161
+ def cleanup_files(file_paths):
162
+ for path in file_paths:
163
+ try:
164
+ os.remove(path)
165
+ print(f"Deleted {path}")
166
+ except Exception as e:
167
+ print(f"Error deleting {path}: {e}")
168
+
169
+ @app.route("/create_song")
170
+ def create_song():
171
+ if not google.authorized:
172
+ return redirect(url_for("google.login"))
173
+ resp = google.get("/oauth2/v2/userinfo")
174
+ assert resp.ok, resp.text
175
+ email = resp.json()["email"]
176
+ user_info = resp.json()
177
+ user_id = user_info.get("id")
178
+ name = user_info.get("name")
179
+
180
+ #if not user_exists(email):
181
+ # user_data = {'user_id': user_id, 'user_name': name, 'email': email, 'model_created': 'No', 'time_used': '0','model_id':''}
182
+ # add_user(user_data)
183
+
184
+ #models = get_user_models(email)
185
+
186
+ # Assuming we're interested in whether any model has been created
187
+ #model_exists = len(models) > 0
188
+ return render_template("ui.html", email=email)
189
+
190
+ @app.route('/download/<filename>', methods=['GET'])
191
+ def download_file(filename):
192
+ # Configure the client with your credentials
193
+ session = boto3.session.Session()
194
+ client = session.client('s3',
195
+ region_name='nyc3',
196
+ endpoint_url='https://nyc3.digitaloceanspaces.com',
197
+ aws_access_key_id=ACCESS_ID,
198
+ aws_secret_access_key=SECRET_KEY)
199
+
200
+ # Define the bucket and object key
201
+ bucket_name = 'sing' # Your bucket name
202
+ object_key = f'{filename}' # Construct the object key
203
+
204
+ # Define the local path to save the file
205
+ local_file_path = os.path.join('weights', filename)
206
+
207
+ # Download the file from the bucket
208
+ try:
209
+ client.download_file(bucket_name, object_key, local_file_path)
210
+ except client.exceptions.NoSuchKey:
211
+ return jsonify({'error': 'File not found in the bucket'}), 404
212
+ except Exception as e:
213
+ return jsonify({'error': str(e)}), 500
214
+
215
+ # Optional: Send the file directly to the client
216
+ # return send_file(local_file_path, as_attachment=True)
217
+
218
+ return jsonify({'success': True, 'message': 'File downloaded successfully', 'file_path': local_file_path})
219
+
220
+ @app.route('/list-weights', methods=['GET'])
221
+ def list_weights():
222
+ directory = 'weights'
223
+ files = os.listdir(directory)
224
+ email = request.args.get('email', default='')
225
+ if not email:
226
+ return jsonify({"error": "Email parameter is required"}), 400
227
+ list_models(email)
228
+ # Extract filenames without their extensions
229
+ filenames = [os.path.splitext(file)[0] for file in files if os.path.isfile(os.path.join(directory, file))]
230
+ return jsonify(filenames)
231
+
232
+ @app.route("/logout")
233
+ def logout():
234
+ # Clear the session
235
+ session.clear()
236
+ #if "google_oauth_token" in session:
237
+ # del session["google_oauth_token"]
238
+ return redirect(url_for("index"))
239
+
240
+
241
+ @app.route('/status/<audio_id>', methods=['GET'])
242
+ def get_status(audio_id):
243
+ # Retrieve the task status using the unique ID
244
+ print(audio_id)
245
+ status_info = task_status_tracker.get(audio_id, {"status": "Unknown ID", "percentage": 0})
246
+ return jsonify({"audio_id": audio_id, "status": status_info["status"], "percentage": status_info["percentage"]})
247
+
248
+
249
+ def merge_audio_image(mp3_path, image_path, output_dir, unique_id):
250
+ # Load the image
251
+ image_clip = ImageClip(image_path)
252
+
253
+ # Load the audio
254
+ audio_clip = AudioFileClip(mp3_path)
255
+
256
+ # Set the duration of the image clip to match the audio duration
257
+ image_clip = image_clip.set_duration(audio_clip.duration)
258
+
259
+ # Resize the image clip to Instagram's square dimensions (1080x1080)
260
+ image_clip = image_clip.resize((1080, 1080))
261
+
262
+ # Set the audio to the image clip
263
+ final_clip = image_clip.set_audio(audio_clip)
264
+
265
+ # Generate output file path
266
+ output_path = os.path.join(output_dir, f"{unique_id}.mp4")
267
+
268
+ # Write the output video file
269
+ final_clip.write_videofile(output_path, codec='libx264', audio_codec='aac')
270
+
271
+ return output_path
272
+
273
+
274
+
275
+
276
+
277
+ processed_audio_storage = {}
278
+ @app.route('/convert_voice', methods=['POST'])
279
+ def api_convert_voice():
280
+ acquired = request_semaphore.acquire(blocking=False)
281
+
282
+ if not acquired:
283
+ return jsonify({"error": "Too many requests, please try again later"}), 429
284
+ #task_status_tracker[unique_id] = {"status": "Starting", "percentage": 0}
285
+ try:
286
+
287
+ #if session.get('submitted'):
288
+ # return jsonify({"error": "Form already submitted"}), 400
289
+
290
+ # Process the form here...
291
+ # Set the flag indicating the form has been submitted
292
+ #session['submitted'] = True
293
+ print(request.form)
294
+ print(request.files)
295
+ print("accessing spk_id")
296
+ spk_id = request.form['spk_id']+'.pth'
297
+ print("speaker id path=",spk_id)
298
+ voice_transform = request.form['voice_transform']
299
+ print("before file access")
300
+ # The file part
301
+ if 'file' not in request.files:
302
+ return jsonify({"error": "No file part"}), 400
303
+ file = request.files['file']
304
+ if file.filename == '':
305
+ return jsonify({"error": "No selected file"}), 400
306
+
307
+ if file.content_length > 10 * 1024 * 1024:
308
+ return jsonify({"error": "File size exceeds 6 MB"}), 400
309
+
310
+ print("after file access")
311
+ print("check if model is there in weights dir or not")
312
+ filename_without_extension = os.path.splitext(file.filename)[0]
313
+ unique_id = filename_without_extension
314
+ ensure_model_in_weights_dir(spk_id)
315
+ print("checking done for the model")
316
+ content_type_format_map = {
317
+ 'audio/mpeg': 'mp3',
318
+ 'audio/wav': 'wav',
319
+ 'audio/x-wav': 'wav',
320
+ 'audio/mp4': 'mp4',
321
+ 'audio/x-m4a': 'mp4',
322
+ }
323
+
324
+ # Default to 'mp3' if content type is unknown (or adjust as needed)
325
+ audio_format = content_type_format_map.get(file.content_type, 'mp3')
326
+
327
+ # Convert the uploaded file to an audio segment
328
+ audio = AudioSegment.from_file(io.BytesIO(file.read()), format=audio_format)
329
+
330
+ #audio = AudioSegment.from_file(io.BytesIO(file.read()), format="mp3") # Adjust format as necessary
331
+ file.seek(0) # Reset file pointer after reading
332
+
333
+ # Calculate audio length in minutes
334
+ audio_length_minutes = len(audio) / 60000.0 # pydub returns length in milliseconds
335
+
336
+ if audio_length_minutes > 5:
337
+ return jsonify({"error": "Audio length exceeds 5 minutes"}), 400
338
+
339
+ #created_files = []
340
+ # Save the file to a temporary path
341
+ #unique_id = str(uuid.uuid4())
342
+ print(unique_id)
343
+
344
+ filename = werkzeug.utils.secure_filename(file.filename)
345
+ input_audio_path = os.path.join(tmp, f"{spk_id}_input_audio_{unique_id}.{filename.split('.')[-1]}")
346
+ file.save(input_audio_path)
347
+
348
+ #created_files.append(input_audio_path)
349
+
350
+ #split audio
351
+ task_status_tracker[unique_id] = {"status": "Processing: Step 1", "percentage": 30}
352
+
353
+ cut_vocal_and_inst(input_audio_path,spk_id,unique_id)
354
+ print("audio splitting performed")
355
+ vocal_path = f"output/{spk_id}_{unique_id}/{split_model}/{spk_id}_input_audio_{unique_id}/vocals.wav"
356
+ inst = f"output/{spk_id}_{unique_id}/{split_model}/{spk_id}_input_audio_{unique_id}/no_vocals.wav"
357
+ print("*****before making call to convert ", unique_id)
358
+ #task_status_tracker[unique_id] = "Processing: Step 2"
359
+ #output_queue = SimpleQueue()
360
+ ctx = get_context('spawn')
361
+ output_queue = ctx.Queue()
362
+ # Create and start the process
363
+ p = ctx.Process(target=worker, args=(spk_id, vocal_path, voice_transform, unique_id, output_queue,))
364
+ p.start()
365
+
366
+ # Wait for the process to finish and get the result
367
+ p.join()
368
+ print("*******waiting for process to complete ")
369
+
370
+ output_path = output_queue.get()
371
+ task_status_tracker[unique_id] = {"status": "Processing: Step 2", "percentage": 80}
372
+ #if isinstance(output_path, Exception):
373
+ # print("Exception in worker:", output_path)
374
+ #else:
375
+ # print("output path of converted voice", output_path)
376
+ #output_path = convert_voice(spk_id, vocal_path, voice_transform,unique_id)
377
+ output_path1= combine_vocal_and_inst(output_path,inst,unique_id)
378
+
379
+ processed_audio_storage[unique_id] = output_path1
380
+ session['processed_audio_id'] = unique_id
381
+ task_status_tracker[unique_id] = {"status": "Finalizing", "percentage": 100}
382
+ print(output_path1)
383
+
384
+ #created_files.extend([vocal_path, inst, output_path])
385
+
386
+ #upload_to_do(output_path1)
387
+
388
+ image_path = 'singer.jpg'
389
+ os.makedirs("output/result", exist_ok=True)
390
+ output_dir="output/result"
391
+ mp4_path = merge_audio_image(output_path1, image_path, output_dir, unique_id)
392
+ upload_to_do(mp4_path)
393
+
394
+ task_status_tracker[unique_id]["status"] = "Completed"
395
+ print("file uploaded to Digital ocean space")
396
+
397
+ return jsonify({"message": "File processed successfully", "audio_id": unique_id}), 200
398
+ finally:
399
+ request_semaphore.release()
400
+ #if os.path.exists(output_path1):
401
+
402
+ # return send_file(output_path1, as_attachment=True)
403
+ #else:
404
+ # return jsonify({"error": "File not found."}), 404
405
+
406
+ def convert_voice_thread_safe(spk_id, vocal_path, voice_transform, unique_id):
407
+ with convert_voice_lock:
408
+ return convert_voice(spk_id, vocal_path, voice_transform, unique_id)
409
+
410
+
411
+
412
+ def get_vc_safe(sid, to_return_protect0):
413
+ with convert_voice_lock:
414
+ return get_vc(sid, to_return_protect0)
415
+
416
+ @app.route('/')
417
+ def upload_form():
418
+ return render_template('ui.html')
419
+
420
+ @app.route('/get_processed_audio/<audio_id>')
421
+ def get_processed_audio(audio_id):
422
+ # Retrieve the path from temporary storage or session
423
+ if audio_id in processed_audio_storage:
424
+ file_path = processed_audio_storage[audio_id]
425
+ return send_file(file_path, as_attachment=True)
426
+ return jsonify({"error": "File not found."}), 404
427
+
428
+ def worker(spk_id, input_audio_path, voice_transform, unique_id, output_queue):
429
+ try:
430
+
431
+ output_audio_path = convert_voice(spk_id, input_audio_path, voice_transform, unique_id)
432
+ print("output in worker for audio file", output_audio_path)
433
+ output_queue.put(output_audio_path)
434
+ print("added to output queue")
435
+ except Exception as e:
436
+ print("exception in adding to queue")
437
+ output_queue.put(e) # Send the exception to the main process for debugging
438
+
439
+
440
+ def convert_voice(spk_id, input_audio_path, voice_transform,unique_id):
441
+ get_vc(spk_id,0.5)
442
+ print("*****before makinf call to vc ", unique_id)
443
+
444
+
445
+ output_audio_path = vc_single(
446
+ sid=0,
447
+ input_audio_path=input_audio_path,
448
+ f0_up_key=voice_transform, # Assuming voice_transform corresponds to f0_up_key
449
+ f0_file=None ,
450
+ f0_method="rmvpe",
451
+ file_index=spk_id, # Assuming file_index_path corresponds to file_index
452
+ index_rate=0.75,
453
+ filter_radius=3,
454
+ resample_sr=0,
455
+ rms_mix_rate=0.25,
456
+ protect=0.33, # Adjusted from protect_rate to protect to match the function signature,
457
+ unique_id=unique_id
458
+ )
459
+ print(output_audio_path)
460
+ return output_audio_path
461
+
462
+ def cut_vocal_and_inst(audio_path,spk_id,unique_id):
463
+
464
+ vocal_path = "output/result/audio.wav"
465
+ os.makedirs("output/result", exist_ok=True)
466
+ #wavfile.write(vocal_path, audio_data[0], audio_data[1])
467
+ #logs.append("Starting the audio splitting process...")
468
+ #yield "\n".join(logs), None, None
469
+ print("before executing splitter")
470
+ command = f"demucs --two-stems=vocals -n {split_model} {audio_path} -o output/{spk_id}_{unique_id}"
471
+ env = os.environ.copy()
472
+
473
+ # Add or modify the environment variable for this subprocess
474
+ env["CUDA_VISIBLE_DEVICES"] = "0"
475
+
476
+
477
+
478
+ #result = subprocess.Popen(command.split(), stdout=subprocess.PIPE, text=True)
479
+ result = subprocess.run(command.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
480
+ if result.returncode != 0:
481
+ print("Demucs process failed:", result.stderr)
482
+ else:
483
+ print("Demucs process completed successfully.")
484
+ print("after executing splitter")
485
+ #for line in result.stdout:
486
+ # logs.append(line)
487
+ # yield "\n".join(logs), None, None
488
+
489
+ print(result.stdout)
490
+ vocal = f"output/{split_model}/{spk_id}_input_audio/vocals.wav"
491
+ inst = f"output/{split_model}/{spk_id}_input_audio/no_vocals.wav"
492
+ #logs.append("Audio splitting complete.")
493
+
494
+
495
+ def combine_vocal_and_inst(vocal_path, inst_path, output_path):
496
+
497
+ vocal_volume=1
498
+ inst_volume=1
499
+ os.makedirs("output/result", exist_ok=True)
500
+ # Assuming vocal_path and inst_path are now directly passed as arguments
501
+ output_path = f"output/result/{output_path}.mp3"
502
+ #command = f'ffmpeg -y -i "{inst_path}" -i "{vocal_path}" -filter_complex [0:a]volume={inst_volume}[i];[1:a]volume={vocal_volume}[v];[i][v]amix=inputs=2:duration=longest[a] -map [a] -b:a 320k -c:a libmp3lame "{output_path}"'
503
+ #command=f'ffmpeg -y -i "{inst_path}" -i "{vocal_path}" -filter_complex "amix=inputs=2:duration=longest" -b:a 320k -c:a libmp3lame "{output_path}"'
504
+ # Load the audio files
505
+ print(vocal_path)
506
+ print(inst_path)
507
+ vocal = AudioSegment.from_file(vocal_path)
508
+ instrumental = AudioSegment.from_file(inst_path)
509
+
510
+ # Overlay the vocal track on top of the instrumental track
511
+ combined = vocal.overlay(instrumental)
512
+
513
+ # Export the result
514
+ combined.export(output_path, format="mp3")
515
+
516
+ #result = subprocess.run(command.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
517
+ return output_path
518
+
519
+
520
+
521
+ def vc_single(
522
+ sid,
523
+ input_audio_path,
524
+ f0_up_key,
525
+ f0_file,
526
+ f0_method,
527
+ file_index,
528
+ index_rate,
529
+ filter_radius,
530
+ resample_sr,
531
+ rms_mix_rate,
532
+ protect,
533
+ unique_id
534
+ ): # spk_item, input_audio0, vc_transform0,f0_file,f0method0
535
+ global tgt_sr, net_g, vc, hubert_model, version, cpt
536
+ print("***** in vc ", unique_id)
537
+
538
+ try:
539
+ logs = []
540
+ print(f"Converting...")
541
+
542
+ audio, sr = librosa.load(input_audio_path, sr=16000, mono=True)
543
+ print(f"found audio ")
544
+ f0_up_key = int(f0_up_key)
545
+ times = [0, 0, 0]
546
+ if hubert_model == None:
547
+ load_hubert()
548
+ print("loaded hubert")
549
+ if_f0 = 1
550
+ audio_opt = vc.pipeline(
551
+ hubert_model,
552
+ net_g,
553
+ 0,
554
+ audio,
555
+ input_audio_path,
556
+ times,
557
+ f0_up_key,
558
+ f0_method,
559
+ file_index,
560
+ # file_big_npy,
561
+ index_rate,
562
+ if_f0,
563
+ filter_radius,
564
+ tgt_sr,
565
+ resample_sr,
566
+ rms_mix_rate,
567
+ version,
568
+ protect,
569
+ f0_file=f0_file
570
+ )
571
+
572
+
573
+ # Get the current thread's name or ID
574
+
575
+
576
+
577
+ if resample_sr >= 16000 and tgt_sr != resample_sr:
578
+ tgt_sr = resample_sr
579
+ index_info = (
580
+ "Using index:%s." % file_index
581
+ if os.path.exists(file_index)
582
+ else "Index not used."
583
+ )
584
+
585
+ print("writing to FS")
586
+ #output_file_path = os.path.join("output", f"converted_audio_{sid}.wav") # Adjust path as needed
587
+ # Assuming 'unique_id' is passed to convert_voice function along with 'sid'
588
+ print("***** before writing to file outout ", unique_id)
589
+ output_file_path = os.path.join("output", f"converted_audio_{sid}_{unique_id}.wav") # Adjust path as needed
590
+
591
+ print("******* output file path ",output_file_path)
592
+ os.makedirs(os.path.dirname(output_file_path), exist_ok=True) # Create the output directory if it doesn't exist
593
+ print("create dir")
594
+ # Save the audio file using the target sampling rate
595
+ sf.write(output_file_path, audio_opt, tgt_sr)
596
+
597
+ print("wrote to FS")
598
+
599
+ # Return the path to the saved file along with any other information
600
+
601
+ return output_file_path
602
+
603
+
604
+ except:
605
+ info = traceback.format_exc()
606
+
607
+ return info, (None, None)
608
+
609
+
610
+
611
+
612
+ def get_vc(sid, to_return_protect0):
613
+ global n_spk, tgt_sr, net_g, vc, cpt, version, weights_index
614
+ if sid == "" or sid == []:
615
+ global hubert_model
616
+ if hubert_model is not None: # 考虑到轮询, 需要加个判断看是否 sid 是由有模型切换到无模型的
617
+ print("clean_empty_cache")
618
+ del net_g, n_spk, vc, hubert_model, tgt_sr # ,cpt
619
+ hubert_model = net_g = n_spk = vc = hubert_model = tgt_sr = None
620
+ if torch.cuda.is_available():
621
+ torch.cuda.empty_cache()
622
+ ###楼下不这么折腾清理不干净
623
+ if_f0 = cpt[sid].get("f0", 1)
624
+ version = cpt[sid].get("version", "v1")
625
+ if version == "v1":
626
+ if if_f0 == 1:
627
+ net_g = SynthesizerTrnMs256NSFsid(
628
+ *cpt[sid]["config"], is_half=config.is_half
629
+ )
630
+ else:
631
+ net_g = SynthesizerTrnMs256NSFsid_nono(*cpt[sid]["config"])
632
+ elif version == "v2":
633
+ if if_f0 == 1:
634
+ net_g = SynthesizerTrnMs768NSFsid(
635
+ *cpt[sid]["config"], is_half=config.is_half
636
+ )
637
+ else:
638
+ net_g = SynthesizerTrnMs768NSFsid_nono(*cpt[sid]["config"])
639
+ del net_g, cpt
640
+ if torch.cuda.is_available():
641
+ torch.cuda.empty_cache()
642
+ cpt = None
643
+ return (
644
+ gr.Slider.update(maximum=2333, visible=False),
645
+ gr.Slider.update(visible=True),
646
+ gr.Dropdown.update(choices=sorted(weights_index), value=""),
647
+ gr.Markdown.update(value="# <center> No model selected")
648
+ )
649
+ print(f"Loading {sid} model...")
650
+ selected_model = sid[:-4]
651
+ cpt[sid] = torch.load(os.path.join(weight_root, sid), map_location="cpu")
652
+ tgt_sr = cpt[sid]["config"][-1]
653
+ cpt[sid]["config"][-3] = cpt[sid]["weight"]["emb_g.weight"].shape[0]
654
+ if_f0 = cpt[sid].get("f0", 1)
655
+ if if_f0 == 0:
656
+ to_return_protect0 = {
657
+ "visible": False,
658
+ "value": 0.5,
659
+ "__type__": "update",
660
+ }
661
+ else:
662
+ to_return_protect0 = {
663
+ "visible": True,
664
+ "value": to_return_protect0,
665
+ "__type__": "update",
666
+ }
667
+ version = cpt[sid].get("version", "v1")
668
+ if version == "v1":
669
+ if if_f0 == 1:
670
+ net_g = SynthesizerTrnMs256NSFsid(*cpt[sid]["config"], is_half=config.is_half)
671
+ else:
672
+ net_g = SynthesizerTrnMs256NSFsid_nono(*cpt[sid]["config"])
673
+ elif version == "v2":
674
+ if if_f0 == 1:
675
+ net_g = SynthesizerTrnMs768NSFsid(*cpt[sid]["config"], is_half=config.is_half)
676
+ else:
677
+ net_g = SynthesizerTrnMs768NSFsid_nono(*cpt[sid]["config"])
678
+ del net_g.enc_q
679
+ print(net_g.load_state_dict(cpt[sid]["weight"], strict=False))
680
+ net_g.eval().to(config.device)
681
+ if config.is_half:
682
+ net_g = net_g.half()
683
+ else:
684
+ net_g = net_g.float()
685
+ vc = VC(tgt_sr, config)
686
+ n_spk = cpt[sid]["config"][-3]
687
+ weights_index = []
688
+ for _, _, index_files in os.walk(index_root):
689
+ for file in index_files:
690
+ if file.endswith('.index') and "trained" not in file:
691
+ weights_index.append(os.path.join(index_root, file))
692
+ if weights_index == []:
693
+ selected_index = gr.Dropdown.update(value="")
694
+ else:
695
+ selected_index = gr.Dropdown.update(value=weights_index[0])
696
+ for index, model_index in enumerate(weights_index):
697
+ if selected_model in model_index:
698
+ selected_index = gr.Dropdown.update(value=weights_index[index])
699
+ break
700
+ return (
701
+ gr.Slider.update(maximum=n_spk, visible=True),
702
+ to_return_protect0,
703
+ selected_index,
704
+ gr.Markdown.update(
705
+ f'## <center> {selected_model}\n'+
706
+ f'### <center> RVC {version} Model'
707
+ )
708
+ )
709
+
710
+
711
+
712
+
713
+
714
+ if __name__ == '__main__':
715
+ app.run(debug=False, port=5000,host='0.0.0.0')
requirements.txt CHANGED
@@ -1,26 +1,28 @@
1
- runpod
2
- wheel
3
- setuptools
4
- ffmpeg
5
- numba==0.56.4
6
- numpy==1.23.5
7
- scipy==1.9.3
8
- librosa==0.9.1
9
- fairseq==0.12.2
10
- faiss-cpu==1.7.3
11
- gradio==3.40.1
12
- pyworld==0.3.2
13
- soundfile>=0.12.1
14
- praat-parselmouth>=0.4.2
15
- httpx==0.23.0
16
- tensorboard
17
- tensorboardX
18
- torchcrepe
19
- onnxruntime
20
- asyncio
21
- demucs
22
- edge-tts
23
- yt_dlp
24
- rarfile
25
- mega.py
26
- gdown
 
 
 
1
+ runpod
2
+ wheel
3
+ setuptools
4
+ ffmpeg
5
+ numba==0.56.4
6
+ numpy==1.23.5
7
+ scipy==1.9.3
8
+ librosa==0.9.1
9
+ fairseq==0.12.2
10
+ faiss-cpu==1.7.3
11
+ gradio==3.40.1
12
+ pyworld==0.3.2
13
+ soundfile>=0.12.1
14
+ praat-parselmouth>=0.4.2
15
+ httpx==0.23.0
16
+ tensorboard
17
+ tensorboardX
18
+ torchcrepe
19
+ onnxruntime
20
+ asyncio
21
+ demucs
22
+ edge-tts
23
+ yt_dlp
24
+ rarfile
25
+ mega.py
26
+ gdown
27
+ moviepy
28
+ ffmpeg-python