import sys, os import subprocess import torch from functools import lru_cache import shutil from pedalboard import Pedalboard, Reverb from pedalboard.io import AudioFile from pydub import AudioSegment from audio_separator.separator import Separator import logging import yaml now_dir = os.getcwd() sys.path.append(now_dir) from programs.applio_code.rvc.infer.infer import VoiceConverter from programs.applio_code.rvc.lib.tools.model_download import model_download_pipeline from programs.music_separation_code.inference import proc_file models_vocals = [ { "name": "Mel-Roformer by KimberleyJSN", "path": os.path.join(now_dir, "models", "mel-vocals"), "model": os.path.join(now_dir, "models", "mel-vocals", "model.ckpt"), "config": os.path.join(now_dir, "models", "mel-vocals", "config.yaml"), "type": "mel_band_roformer", "config_url": "https://raw.githubusercontent.com/ZFTurbo/Music-Source-Separation-Training/main/configs/KimberleyJensen/config_vocals_mel_band_roformer_kj.yaml", "model_url": "https://huggingface.co/KimberleyJSN/melbandroformer/resolve/main/MelBandRoformer.ckpt", }, { "name": "BS-Roformer by ViperX", "path": os.path.join(now_dir, "models", "bs-vocals"), "model": os.path.join(now_dir, "models", "bs-vocals", "model.ckpt"), "config": os.path.join(now_dir, "models", "bs-vocals", "config.yaml"), "type": "bs_roformer", "config_url": "https://raw.githubusercontent.com/ZFTurbo/Music-Source-Separation-Training/main/configs/viperx/model_bs_roformer_ep_317_sdr_12.9755.yaml", "model_url": "https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/model_bs_roformer_ep_317_sdr_12.9755.ckpt", }, { "name": "MDX23C", "path": os.path.join(now_dir, "models", "mdx23c-vocals"), "model": os.path.join(now_dir, "models", "mdx23c-vocals", "model.ckpt"), "config": os.path.join(now_dir, "models", "mdx23c-vocals", "config.yaml"), "type": "mdx23c", "config_url": "https://raw.githubusercontent.com/ZFTurbo/Music-Source-Separation-Training/main/configs/config_vocals_mdx23c.yaml", "model_url": "https://github.com/ZFTurbo/Music-Source-Separation-Training/releases/download/v1.0.0/model_vocals_mdx23c_sdr_10.17.ckpt", }, ] karaoke_models = [ { "name": "Mel-Roformer Karaoke by aufr33 and viperx", "path": os.path.join(now_dir, "models", "mel-kara"), "model": os.path.join(now_dir, "models", "mel-kara", "model.ckpt"), "config": os.path.join(now_dir, "models", "mel-kara", "config.yaml"), "type": "mel_band_roformer", "config_url": "https://huggingface.co/shiromiya/audio-separation-models/resolve/main/mel_band_roformer_karaoke_aufr33_viperx/config_mel_band_roformer_karaoke.yaml", "model_url": "https://huggingface.co/shiromiya/audio-separation-models/resolve/main/mel_band_roformer_karaoke_aufr33_viperx/mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt", }, { "name": "UVR-BVE", "full_name": "UVR-BVE-4B_SN-44100-1.pth", "arch": "vr", }, ] denoise_models = [ { "name": "Mel-Roformer Denoise Normal by aufr33", "path": os.path.join(now_dir, "models", "mel-denoise"), "model": os.path.join(now_dir, "models", "mel-denoise", "model.ckpt"), "config": os.path.join(now_dir, "models", "mel-denoise", "config.yaml"), "type": "mel_band_roformer", "config_url": "https://huggingface.co/shiromiya/audio-separation-models/resolve/main/mel-denoise/model_mel_band_roformer_denoise.yaml", "model_url": "https://huggingface.co/jarredou/aufr33_MelBand_Denoise/resolve/main/denoise_mel_band_roformer_aufr33_sdr_27.9959.ckpt", }, { "name": "Mel-Roformer Denoise Aggressive by aufr33", "path": os.path.join(now_dir, "models", "mel-denoise-aggr"), "model": os.path.join(now_dir, "models", "mel-denoise-aggr", "model.ckpt"), "config": os.path.join(now_dir, "models", "mel-denoise-aggr", "config.yaml"), "type": "mel_band_roformer", "config_url": "https://huggingface.co/shiromiya/audio-separation-models/resolve/main/mel-denoise/model_mel_band_roformer_denoise.yaml", "model_url": "https://huggingface.co/jarredou/aufr33_MelBand_Denoise/resolve/main/denoise_mel_band_roformer_aufr33_aggr_sdr_27.9768.ckpt", }, { "name": "UVR Denoise", "full_name": "UVR-DeNoise.pth", "arch": "vr", }, ] dereverb_models = [ { "name": "MDX23C DeReverb by aufr33 and jarredou", "path": os.path.join(now_dir, "models", "mdx23c-dereveb"), "model": os.path.join(now_dir, "models", "mdx23c-dereveb", "model.ckpt"), "config": os.path.join(now_dir, "models", "mdx23c-dereveb", "config.yaml"), "type": "mdx23c", "config_url": "https://huggingface.co/jarredou/aufr33_jarredou_MDXv3_DeReverb/resolve/main/config_dereverb_mdx23c.yaml", "model_url": "https://huggingface.co/jarredou/aufr33_jarredou_MDXv3_DeReverb/resolve/main/dereverb_mdx23c_sdr_6.9096.ckpt", }, { "name": "BS-Roformer Dereverb by anvuew", "path": os.path.join(now_dir, "models", "mdx23c-dereveb"), "model": os.path.join(now_dir, "models", "mdx23c-dereveb", "model.ckpt"), "config": os.path.join(now_dir, "models", "mdx23c-dereveb", "config.yaml"), "type": "bs_roformer", "config_url": "https://huggingface.co/anvuew/deverb_bs_roformer/resolve/main/deverb_bs_roformer_8_384dim_10depth.yaml", "model_url": "https://huggingface.co/anvuew/deverb_bs_roformer/resolve/main/deverb_bs_roformer_8_384dim_10depth.ckpt", }, { "name": "UVR-Deecho-Dereverb", "full_name": "UVR-DeEcho-DeReverb.pth", "arch": "vr", }, { "name": "MDX Reverb HQ by FoxJoy", "full_name": "Reverb_HQ_By_FoxJoy.onnx", "arch": "mdx", }, ] deecho_models = [ { "name": "UVR-Deecho-Normal", "full_name": "UVR-De-Echo-Normal.pth", "arch": "vr", }, { "name": "UVR-Deecho-Agggressive", "full_name": "UVR-De-Echo-Aggressive.pth", "arch": "vr", }, ] @lru_cache(maxsize=None) def import_voice_converter(): from programs.applio_code.rvc.infer.infer import VoiceConverter return VoiceConverter() @lru_cache(maxsize=1) def get_config(): from programs.applio_code.rvc.configs.config import Config return Config() def download_file(url, path, filename): os.makedirs(path, exist_ok=True) file_path = os.path.join(path, filename) if os.path.exists(file_path): print(f"File '{filename}' already exists at '{path}'.") return try: response = torch.hub.download_url_to_file(url, file_path) print(f"File '{filename}' downloaded successfully") except Exception as e: print(f"Error downloading file '{filename}' from '{url}': {e}") def get_model_info_by_name(model_name): all_models = ( models_vocals + karaoke_models + dereverb_models + deecho_models + denoise_models ) for model in all_models: if model["name"] == model_name: return model return None def get_last_modified_file(pasta): if not os.path.isdir(pasta): raise NotADirectoryError(f"{pasta} is not a valid directory.") arquivos = [f for f in os.listdir(pasta) if os.path.isfile(os.path.join(pasta, f))] if not arquivos: return None return max(arquivos, key=lambda x: os.path.getmtime(os.path.join(pasta, x))) def search_with_word(folder, word): if not os.path.isdir(folder): raise NotADirectoryError(f"{folder} is not a valid directory.") file_with_word = [file for file in os.listdir(folder) if word in file] if not file_with_word: return None most_recent_file = max( file_with_word, key=lambda file: os.path.getmtime(os.path.join(folder, file)) ) return most_recent_file def search_with_two_words(folder, word1, word2): if not os.path.isdir(folder): raise NotADirectoryError(f"{folder} is not a valid directory.") file_with_words = [ file for file in os.listdir(folder) if word1 in file and word2 in file ] if not file_with_words: return None most_recent_file = max( file_with_words, key=lambda file: os.path.getmtime(os.path.join(folder, file)) ) return most_recent_file def get_last_modified_folder(path): directories = [ os.path.join(path, d) for d in os.listdir(path) if os.path.isdir(os.path.join(path, d)) ] if not directories: return None last_modified_folder = max(directories, key=os.path.getmtime) return last_modified_folder def add_audio_effects( audio_path, reverb_size, reverb_wet, reverb_dry, reverb_damping, reverb_width, output_path, ): board = Pedalboard([]) board.append( Reverb( room_size=reverb_size, dry_level=reverb_dry, wet_level=reverb_wet, damping=reverb_damping, width=reverb_width, ) ) with AudioFile(audio_path) as f: with AudioFile(output_path, "w", f.samplerate, f.num_channels) as o: while f.tell() < f.frames: chunk = f.read(int(f.samplerate)) effected = board(chunk, f.samplerate, reset=False) o.write(effected) return output_path def merge_audios( vocals_path, inst_path, backing_path, output_path, main_gain, inst_gain, backing_Vol, output_format, ): main_vocal_audio = AudioSegment.from_file(vocals_path, format="flac") + main_gain instrumental_audio = AudioSegment.from_file(inst_path, format="flac") + inst_gain backing_vocal_audio = ( AudioSegment.from_file(backing_path, format="flac") + backing_Vol ) combined_audio = main_vocal_audio.overlay( instrumental_audio.overlay(backing_vocal_audio) ) combined_audio.export(output_path, format=output_format) return output_path def check_fp16_support(device): i_device = int(str(device).split(":")[-1]) gpu_name = torch.cuda.get_device_name(i_device) low_end_gpus = ["16", "P40", "P10", "1060", "1070", "1080"] if any(gpu in gpu_name for gpu in low_end_gpus) and "V100" not in gpu_name.upper(): print(f"Your GPU {gpu_name} not support FP16 inference. Using FP32 instead.") return False return True def full_inference_program( model_path, index_path, input_audio_path, output_path, export_format_rvc, split_audio, autotune, vocal_model, karaoke_model, dereverb_model, deecho, deecho_model, denoise, denoise_model, reverb, vocals_volume, instrumentals_volume, backing_vocals_volume, export_format_final, devices, pitch, filter_radius, index_rate, rms_mix_rate, protect, pitch_extract, hop_lenght, reverb_room_size, reverb_damping, reverb_wet_gain, reverb_dry_gain, reverb_width, embedder_model, delete_audios, use_tta, batch_size, infer_backing_vocals, infer_backing_vocals_model, infer_backing_vocals_index, change_inst_pitch, pitch_back, filter_radius_back, index_rate_back, rms_mix_rate_back, protect_back, pitch_extract_back, hop_length_back, export_format_rvc_back, split_audio_back, autotune_back, embedder_model_back, ): if torch.cuda.is_available(): n_gpu = torch.cuda.device_count() devices = devices.replace("-", " ") print(f"Number of GPUs available: {n_gpu}") first_device = devices.split()[0] fp16 = check_fp16_support(first_device) else: devices = "cpu" print("Using CPU") fp16 = False music_folder = os.path.splitext(os.path.basename(input_audio_path))[0] # Vocals Separation model_info = get_model_info_by_name(vocal_model) model_ckpt_path = os.path.join(model_info["path"], "model.ckpt") if not os.path.exists(model_ckpt_path): download_file( model_info["model_url"], model_info["path"], "model.ckpt", ) config_json_path = os.path.join(model_info["path"], "config.yaml") if not os.path.exists(config_json_path): download_file( model_info["config_url"], model_info["path"], "config.yaml", ) if not fp16: with open(model_info["config"], "r") as file: config = yaml.safe_load(file) config["training"]["use_amp"] = False with open(model_info["config"], "w") as file: yaml.safe_dump(config, file) store_dir = os.path.join(now_dir, "audio_files", music_folder, "vocals") inst_dir = os.path.join(now_dir, "audio_files", music_folder, "instrumentals") os.makedirs(store_dir, exist_ok=True) os.makedirs(inst_dir, exist_ok=True) input_audio_basename = os.path.splitext(os.path.basename(input_audio_path))[0] search_result = search_with_word(store_dir, "vocals") if search_result: print("Vocals already separated"), else: print("Separating vocals") command = [ "python", os.path.join(now_dir, "programs", "music_separation_code", "inference.py"), "--model_type", model_info["type"], "--config_path", model_info["config"], "--start_check_point", model_info["model"], "--input_file", input_audio_path, "--store_dir", store_dir, "--flac_file", "--pcm_type", "PCM_16", "--extract_instrumental", ] if devices == "cpu": command.append("--force_cpu") else: device_ids = [str(int(device)) for device in devices.split()] command.extend(["--device_ids"] + device_ids) subprocess.run(command) os.rename( os.path.join( store_dir, search_with_two_words( store_dir, os.path.basename(input_audio_path).split(".")[0], "instrumental", ), ), os.path.join( inst_dir, f"{os.path.basename(input_audio_path).split('.')[0]}_instrumentals.flac", ), ) inst_file = os.path.join( inst_dir, search_with_two_words( inst_dir, os.path.basename(input_audio_path).split(".")[0], "instrumentals" ), ) # karaoke separation model_info = get_model_info_by_name(karaoke_model) store_dir = os.path.join(now_dir, "audio_files", music_folder, "karaoke") os.makedirs(store_dir, exist_ok=True) vocals_path = os.path.join(now_dir, "audio_files", music_folder, "vocals") input_file = search_with_word(vocals_path, "vocals") karaoke_exists = search_with_word(store_dir, "karaoke") is not None if karaoke_exists: print("Backing vocals already separated") else: if input_file: input_file = os.path.join(vocals_path, input_file) print("Separating Backing vocals") if model_info["name"] == "Mel-Roformer Karaoke by aufr33 and viperx": model_ckpt_path = os.path.join(model_info["path"], "model.ckpt") if not os.path.exists(model_ckpt_path): download_file( model_info["model_url"], model_info["path"], "model.ckpt", ) config_json_path = os.path.join(model_info["path"], "config.yaml") if not os.path.exists(config_json_path): download_file( model_info["config_url"], model_info["path"], "config.yaml", ) if not fp16: with open(model_info["config"], "r") as file: config = yaml.safe_load(file) config["training"]["use_amp"] = False with open(model_info["config"], "w") as file: yaml.safe_dump(config, file) command = [ "python", os.path.join( now_dir, "programs", "music_separation_code", "inference.py" ), "--model_type", model_info["type"], "--config_path", model_info["config"], "--start_check_point", model_info["model"], "--input_file", input_file, "--store_dir", store_dir, "--flac_file", "--pcm_type", "PCM_16", "--extract_instrumental", ] if devices == "cpu": command.append("--force_cpu") else: device_ids = [str(int(device)) for device in devices.split()] command.extend(["--device_ids"] + device_ids) subprocess.run(command) else: separator = Separator( model_file_dir=os.path.join(now_dir, "models", "karaoke"), log_level=logging.WARNING, normalization_threshold=1.0, output_format="flac", output_dir=store_dir, vr_params={ "batch_size": batch_size, "enable_tta": use_tta, }, ) separator.load_model(model_filename=model_info["full_name"]) separator.separate(input_file) karaoke_path = os.path.join(now_dir, "audio_files", music_folder, "karaoke") vocals_result = search_with_two_words( karaoke_path, os.path.basename(input_audio_path).split(".")[0], "Vocals", ) instrumental_result = search_with_two_words( karaoke_path, os.path.basename(input_audio_path).split(".")[0], "Instrumental", ) if "UVR-BVE-4B_SN-44100-1" in os.path.basename(vocals_result): os.rename( os.path.join(karaoke_path, vocals_result), os.path.join( karaoke_path, f"{os.path.basename(input_audio_path).split('.')[0]}_karaoke.flac", ), ) if "UVR-BVE-4B_SN-44100-1" in os.path.basename(instrumental_result): os.rename( os.path.join(karaoke_path, instrumental_result), os.path.join( karaoke_path, f"{os.path.basename(input_audio_path).split('.')[0]}_instrumental.flac", ), ) # dereverb model_info = get_model_info_by_name(dereverb_model) store_dir = os.path.join(now_dir, "audio_files", music_folder, "dereverb") os.makedirs(store_dir, exist_ok=True) karaoke_path = os.path.join(now_dir, "audio_files", music_folder, "karaoke") input_file = search_with_word(karaoke_path, "karaoke") noreverb_exists = search_with_word(store_dir, "noreverb") is not None if noreverb_exists: print("Reverb already removed") else: if input_file: input_file = os.path.join(karaoke_path, input_file) print("Removing reverb") if ( model_info["name"] == "BS-Roformer Dereverb by anvuew" or model_info["name"] == "MDX23C DeReverb by aufr33 and jarredou" ): model_ckpt_path = os.path.join(model_info["path"], "model.ckpt") if not os.path.exists(model_ckpt_path): download_file( model_info["model_url"], model_info["path"], "model.ckpt", ) config_json_path = os.path.join(model_info["path"], "config.yaml") if not os.path.exists(config_json_path): download_file( model_info["config_url"], model_info["path"], "config.yaml", ) if not fp16: with open(model_info["config"], "r") as file: config = yaml.safe_load(file) config["training"]["use_amp"] = False with open(model_info["config"], "w") as file: yaml.safe_dump(config, file) command = [ "python", os.path.join( now_dir, "programs", "music_separation_code", "inference.py" ), "--model_type", model_info["type"], "--config_path", model_info["config"], "--start_check_point", model_info["model"], "--input_file", input_file, "--store_dir", store_dir, "--flac_file", "--pcm_type", "PCM_16", ] if devices == "cpu": command.append("--force_cpu") else: device_ids = [str(int(device)) for device in devices.split()] command.extend(["--device_ids"] + device_ids) subprocess.run(command) else: if model_info["arch"] == "vr": separator = Separator( model_file_dir=os.path.join(now_dir, "models", "dereverb"), log_level=logging.WARNING, normalization_threshold=1.0, output_format="flac", output_dir=store_dir, output_single_stem="No Reverb", vr_params={ "batch_size": batch_size, "enable_tta": use_tta, }, ) else: separator = Separator( model_file_dir=os.path.join(now_dir, "models", "dereverb"), log_level=logging.WARNING, normalization_threshold=1.0, output_format="flac", output_dir=store_dir, output_single_stem="No Reverb", ) separator.load_model(model_filename=model_info["full_name"]) separator.separate(input_file) dereverb_path = os.path.join( now_dir, "audio_files", music_folder, "dereverb" ) search_result = search_with_two_words( dereverb_path, os.path.basename(input_audio_path).split(".")[0], "No Reverb", ) if "UVR-DeEcho-DeReverb" in os.path.basename( search_result ) or "MDX Reverb HQ by FoxJoy" in os.path.basename(search_result): os.rename( os.path.join(dereverb_path, search_result), os.path.join( dereverb_path, f"{os.path.basename(input_audio_path).split('.')[0]}_noreverb.flac", ), ) # deecho store_dir = os.path.join(now_dir, "audio_files", music_folder, "deecho") os.makedirs(store_dir, exist_ok=True) if deecho: no_echo_exists = search_with_word(store_dir, "noecho") is not None if no_echo_exists: print("Echo already removed") else: print("Removing echo") model_info = get_model_info_by_name(deecho_model) dereverb_path = os.path.join( now_dir, "audio_files", music_folder, "dereverb" ) noreverb_file = search_with_word(dereverb_path, "noreverb") input_file = os.path.join(dereverb_path, noreverb_file) separator = Separator( model_file_dir=os.path.join(now_dir, "models", "deecho"), log_level=logging.WARNING, normalization_threshold=1.0, output_format="flac", output_dir=store_dir, output_single_stem="No Echo", vr_params={ "batch_size": batch_size, "enable_tta": use_tta, }, ) separator.load_model(model_filename=model_info["full_name"]) separator.separate(input_file) deecho_path = os.path.join(now_dir, "audio_files", music_folder, "deecho") search_result = search_with_two_words( deecho_path, os.path.basename(input_audio_path).split(".")[0], "No Echo", ) if "UVR-De-Echo-Normal" in os.path.basename( search_result ) or "UVR-Deecho-Agggressive" in os.path.basename(search_result): os.rename( os.path.join(deecho_path, search_result), os.path.join( deecho_path, f"{os.path.basename(input_audio_path).split('.')[0]}_noecho.flac", ), ) # denoise store_dir = os.path.join(now_dir, "audio_files", music_folder, "denoise") os.makedirs(store_dir, exist_ok=True) if denoise: no_noise_exists = search_with_word(store_dir, "dry") is not None if no_noise_exists: print("Noise already removed") else: model_info = get_model_info_by_name(denoise_model) print("Removing noise") input_file = ( os.path.join( now_dir, "audio_files", music_folder, "deecho", search_with_word( os.path.join(now_dir, "audio_files", music_folder, "deecho"), "noecho", ), ) if deecho else os.path.join( now_dir, "audio_files", music_folder, "dereverb", search_with_word( os.path.join(now_dir, "audio_files", music_folder, "dereverb"), "noreverb", ), ) ) if ( model_info["name"] == "Mel-Roformer Denoise Normal by aufr33" or model_info["name"] == "Mel-Roformer Denoise Aggressive by aufr33" ): model_ckpt_path = os.path.join(model_info["path"], "model.ckpt") if not os.path.exists(model_ckpt_path): download_file( model_info["model_url"], model_info["path"], "model.ckpt", ) config_json_path = os.path.join(model_info["path"], "config.yaml") if not os.path.exists(config_json_path): download_file( model_info["config_url"], model_info["path"], "config.yaml" ) if not fp16: with open(model_info["config"], "r") as file: config = yaml.safe_load(file) config["training"]["use_amp"] = False with open(model_info["config"], "w") as file: yaml.safe_dump(config, file) command = [ "python", os.path.join( now_dir, "programs", "music_separation_code", "inference.py" ), "--model_type", model_info["type"], "--config_path", model_info["config"], "--start_check_point", model_info["model"], "--input_file", input_file, "--store_dir", store_dir, "--flac_file", "--pcm_type", "PCM_16", ] if devices == "cpu": command.append("--force_cpu") else: device_ids = [str(int(device)) for device in devices.split()] command.extend(["--device_ids"] + device_ids) subprocess.run(command) else: separator = Separator( model_file_dir=os.path.join(now_dir, "models", "denoise"), log_level=logging.WARNING, normalization_threshold=1.0, output_format="flac", output_dir=store_dir, output_single_stem="No Noise", vr_params={ "batch_size": batch_size, "enable_tta": use_tta, }, ) separator.load_model(model_filename=model_info["full_name"]) separator.separate(input_file) search_result = search_with_two_words( deecho_path, os.path.basename(input_audio_path).split(".")[0], "No Noise", ) if "UVR Denoise" in os.path.basename(search_result): os.rename( os.path.join(deecho_path, search_result), os.path.join( deecho_path, f"{os.path.basename(input_audio_path).split('.')[0]}_dry.flac", ), ) # RVC denoise_path = os.path.join(now_dir, "audio_files", music_folder, "denoise") deecho_path = os.path.join(now_dir, "audio_files", music_folder, "deecho") dereverb_path = os.path.join(now_dir, "audio_files", music_folder, "dereverb") denoise_audio = search_with_two_words( denoise_path, os.path.basename(input_audio_path).split(".")[0], "dry" ) deecho_audio = search_with_two_words( deecho_path, os.path.basename(input_audio_path).split(".")[0], "noecho" ) dereverb = search_with_two_words( dereverb_path, os.path.basename(input_audio_path).split(".")[0], "noreverb" ) if denoise_audio: final_path = os.path.join( now_dir, "audio_files", music_folder, "denoise", denoise_audio ) elif deecho_audio: final_path = os.path.join( now_dir, "audio_files", music_folder, "deecho", deecho_audio ) elif dereverb: final_path = os.path.join( now_dir, "audio_files", music_folder, "dereverb", dereverb ) else: final_path = None store_dir = os.path.join(now_dir, "audio_files", music_folder, "rvc") os.makedirs(store_dir, exist_ok=True) print("Making RVC inference") output_rvc = os.path.join( now_dir, "audio_files", music_folder, "rvc", f"{os.path.basename(input_audio_path).split('.')[0]}_rvc.wav", ) inference_vc = import_voice_converter() inference_vc.convert_audio( audio_input_path=final_path, audio_output_path=output_rvc, model_path=model_path, index_path=index_path, embedder_model=embedder_model, pitch=pitch, f0_file=None, f0_method=pitch_extract, filter_radius=filter_radius, index_rate=index_rate, volume_envelope=rms_mix_rate, protect=protect, split_audio=split_audio, f0_autotune=autotune, hop_length=hop_lenght, export_format=export_format_rvc, embedder_model_custom=None, ) backing_vocals = os.path.join( karaoke_path, search_with_word(karaoke_path, "instrumental") ) if infer_backing_vocals: print("Infering backing vocals") karaoke_path = os.path.join(now_dir, "audio_files", music_folder, "karaoke") instrumental_file = search_with_word(karaoke_path, "instrumental") backing_vocals = os.path.join(karaoke_path, instrumental_file) output_backing_vocals = os.path.join( karaoke_path, f"{input_audio_basename}_instrumental_output.wav" ) inference_vc.convert_audio( audio_input_path=backing_vocals, audio_output_path=output_backing_vocals, model_path=infer_backing_vocals_model, index_path=infer_backing_vocals_index, embedder_model=embedder_model_back, pitch=pitch_back, f0_file=None, f0_method=pitch_extract_back, filter_radius=filter_radius_back, index_rate=index_rate_back, volume_envelope=rms_mix_rate_back, protect=protect_back, split_audio=split_audio_back, f0_autotune=autotune_back, hop_length=hop_length_back, export_format=export_format_rvc_back, embedder_model_custom=None, ) backing_vocals = output_backing_vocals # post process if reverb: add_audio_effects( os.path.join( now_dir, "audio_files", music_folder, "rvc", get_last_modified_file( os.path.join(now_dir, "audio_files", music_folder, "rvc") ), ), reverb_room_size, reverb_wet_gain, reverb_dry_gain, reverb_damping, reverb_width, os.path.join( now_dir, "audio_files", music_folder, "rvc", os.path.basename(input_audio_path), ), ) if change_inst_pitch != 0: print("Changing instrumental pitch") inst_path = os.path.join( now_dir, "audio_files", music_folder, "instrumentals", search_with_word( os.path.join(now_dir, "audio_files", music_folder, "instrumentals"), "instrumentals", ), ) audio = AudioSegment.from_file(inst_path) factor = 2 ** (change_inst_pitch / 12) new_frame_rate = int(audio.frame_rate * factor) audio = audio._spawn(audio.raw_data, overrides={"frame_rate": new_frame_rate}) audio = audio.set_frame_rate(audio.frame_rate) output_dir_pitch = os.path.join( now_dir, "audio_files", music_folder, "instrumentals" ) output_path_pitch = os.path.join( output_dir_pitch, "inst_with_changed_pitch.flac" ) audio.export(output_path_pitch, format="flac") # merge audios store_dir = os.path.join(now_dir, "audio_files", music_folder, "final") os.makedirs(store_dir, exist_ok=True) vocals_path = os.path.join(now_dir, "audio_files", music_folder, "rvc") vocals_file = get_last_modified_file( os.path.join(now_dir, "audio_files", music_folder, "rvc") ) vocals_file = os.path.join(vocals_path, vocals_file) karaoke_path = os.path.join(now_dir, "audio_files", music_folder, "karaoke") karaoke_file = search_with_word(karaoke_path, "Instrumental") or search_with_word( karaoke_path, "instrumental" ) karaoke_file = os.path.join(karaoke_path, karaoke_file) final_output_path = os.path.join( now_dir, "audio_files", music_folder, "final", f"{os.path.basename(input_audio_path).split('.')[0]}_final.{export_format_final.lower()}", ) print("Merging audios") result = merge_audios( vocals_file, inst_file, backing_vocals, final_output_path, vocals_volume, instrumentals_volume, backing_vocals_volume, export_format_final, ) print("Audios merged!") if delete_audios: main_directory = os.path.join(now_dir, "audio_files", music_folder) folder_to_keep = "final" for folder_name in os.listdir(main_directory): folder_path = os.path.join(main_directory, folder_name) if os.path.isdir(folder_path) and folder_name != folder_to_keep: shutil.rmtree(folder_path) return ( f"Audio file {os.path.basename(input_audio_path).split('.')[0]} converted with success", result, ) def download_model(link): model_download_pipeline(link) return "Model downloaded with success" def download_music(link): os.makedirs(os.path.join(now_dir, "audio_files", "original_files"), exist_ok=True) command = [ "yt-dlp", "-x", "--output", os.path.join(now_dir, "audio_files", "original_files", "%(title)s.%(ext)s"), link, ] subprocess.run(command) return "Music downloaded with success"