import io import os import gradio as gr import librosa import base64 import numpy as np import soundfile #from inference.infer_tool import Svc from inference.infer_tool import Svc import logging import time from tts_voices import SUPPORTED_LANGUAGES logging.getLogger('numba').setLevel(logging.WARNING) logging.getLogger('markdown_it').setLevel(logging.WARNING) logging.getLogger('urllib3').setLevel(logging.WARNING) logging.getLogger('matplotlib').setLevel(logging.WARNING) #hf_token = os.environ.get('TOKEN') #hf_token1 = os.environ.get('TOKEN1') #hf_token2 = os.environ.get('TOKEN2') #hf_token_config = os.environ.get('TOKEN_config') from matplotlib import pyplot as plt import datetime import subprocess def tts_fn(_text, _gender, _lang, _rate, _volume, sid, vc_transform, auto_f0,cluster_ratio, slice_db, f0_predictor): if len( _text) > 400: return "请上传小于200字的文本", None try: _rate = f"+{int(_rate*100)}%" if _rate >= 0 else f"{int(_rate*100)}%" _volume = f"+{int(_volume*100)}%" if _volume >= 0 else f"{int(_volume*100)}%" if _lang == "Auto": _gender = "Male" if _gender == "男" else "Female" subprocess.run([r"python", "tts.py", _text, _lang, _rate, _volume, _gender]) else: subprocess.run([r"python", "tts.py", _text, _lang, _rate, _volume]) input_audio = "tts.wav" audio, sampling_rate = soundfile.read(input_audio) if np.issubdtype(audio.dtype, np.integer): audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32) if len(audio.shape) > 1: audio = librosa.to_mono(audio.transpose(1, 0)) if sampling_rate != 44100: audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=44100) soundfile.write(input_audio, audio, 44100, format="wav") output_file_path = "tts_output.mp3" _audio = model.slice_inference(input_audio, sid, vc_transform, slice_db, cluster_ratio, auto_f0, 0.4,f0_predictor=f0_predictor,clip_seconds=40) print (_text, _gender, _lang, _rate, _volume, sid, vc_transform, auto_f0,cluster_ratio, slice_db, f0_predictor) soundfile.write("tts_output.mp3", _audio, 44100, format="mp3") return "Success", output_file_path except Exception as e: print(e) def f0_to_pitch(ff): f0_pitch = 69 + 12 * np.log2(ff / 441) return f0_pitch def compute_f0(wav_file1, wav_file2,tran): y1, sr1 = librosa.load(wav_file1, sr=44100) y2, sr2 = librosa.load(wav_file2, sr=44100) # Compute the f0 using the YIN pitch estimation method f0_1 = librosa.core.yin(y1, fmin=1, fmax=400) f0_2 = librosa.core.yin(y2, fmin=1, fmax=400) # 半 音 偏差 sum_y = [] if np.sum(wav_file1 == 0) / len(wav_file1) > 0.9: mistake, var_take = 0, 0 else: for i in range(min(len(f0_1), len(f0_2))): if f0_1[i] > 0 and f0_2[i] > 0: sum_y.append( abs(f0_to_pitch(f0_2[i]) - (f0_to_pitch(f0_1[i]) + tran))) num_y = 0 for x in sum_y: num_y += x len_y = len(sum_y) if len(sum_y) else 1 mistake = round(float(num_y / len_y), 2) var_take = round(float(np.std(sum_y, ddof=1)), 2) print("mistake", mistake, var_take) return f0_1, f0_2, sr1, sr2, round(mistake / 10, 2), round(var_take / 10, 2) def same_auth(username, password): now = datetime.datetime.utcnow() + datetime.timedelta(hours=8) print(username, password,now.strftime("%Y-%m-%d %H:%M:%S")) username = username.replace("https://","").replace("http://","").replace("/","") return username == base64.b64decode( b'c292aXRzNC5ub2dpemFrYTQ2LmNj' ).decode() or username == base64.b64decode( b'c292aXRzNC1kZXYubm9naXpha2E0Ni5jYw==' ).decode() or password == base64.b64decode( b'c292aXRzNC1kZXYubm9naXpha2E0Ni5jYw==' ).decode() or password == base64.b64decode( b'c292aXRzNC5ub2dpemFrYTQ2LmNj' ).decode() def vc_fn(output_format,sid, input_audio, vc_transform, auto_f0,cluster_ratio, slice_db,f0_predictor,clip_seconds=50): start_time = time.time() if input_audio is None: return "You need to upload an audio ", None audio, sampling_rate = soundfile.read(input_audio) duration = audio.shape[0] / sampling_rate if duration > 280: return "请上传小于280s的音频,需要转换长音频请使用tgbot", None , None if np.issubdtype(audio.dtype, np.integer): audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32) if len(audio.shape) > 1: audio = librosa.to_mono(audio.transpose(1, 0)) if sampling_rate != 44100: audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=44100) out_wav_path = "temp.wav" soundfile.write(out_wav_path, audio, 44100, format="wav") now = datetime.datetime.utcnow() + datetime.timedelta(hours=8) print(sid, vc_transform, auto_f0,cluster_ratio, slice_db,f0_predictor,now.strftime("%Y-%m-%d %H:%M:%S")) _audio = model.slice_inference(out_wav_path, sid, vc_transform, slice_db, cluster_ratio, auto_f0, 0.4,f0_predictor=f0_predictor,clip_seconds=clip_seconds,loudness_envelope_adjustment = 0) out_wav_path1 = 'output_'+f'{sid}_{vc_transform}.{output_format}' soundfile.write(out_wav_path1, _audio, 44100, format=output_format) used_time = round(time.time() - start_time, 2) out_str = ("Success! total use time:{}s".format(used_time)) return out_str ,out_wav_path1 def change_audio(audio,vc): new_audio = audio return new_audio,vc def loadmodel(model_): global model model_name = os.path.splitext(os.path.basename(model_))[0] model.unload_model() if os.path.exists("./kmeans/" + model_name + ".pt") == True: model = Svc(model_, "configs/" + model_name + ".json", cluster_model_path="./kmeans/" + model_name + ".pt") else: model = Svc(model_, "configs/" + model_name + ".json") spks = list(model.spk2id.keys()) print(model_, "configs/" + model_name + ".json", "./kmeans/" + model_name + ".pt") return update_dropdown(spks) def update_dropdown(new_choices): global model spks = list(model.spk2id.keys()) new_choices = gr.Dropdown.update(choices=spks) return new_choices sid ="" import pyzipper hf_token1 = os.getenv("TOKEN1").encode("utf-8") with pyzipper.AESZipFile('./N.zip') as zf: zf.pwd = hf_token1 zf.extractall() with pyzipper.AESZipFile('./N_2.zip') as zf: zf.pwd = hf_token1 zf.extractall() model = Svc("./N/44.pth", "configs/44.json" , cluster_model_path="./kmeans/44.pt") modelPaths = [] for dirpath, dirnames, filenames in os.walk("./N/"): for filename in filenames: modelPaths.append(os.path.join(dirpath, filename)) app = gr.Blocks(theme='NoCrypt/miku') with app: with gr.Tabs(): with gr.TabItem(" "): #gr.Markdown( #'