rcell commited on
Commit
5c37b3d
1 Parent(s): 26e83c1

优化F0提取速度

Browse files
Files changed (2) hide show
  1. app.py +9 -7
  2. requirements.txt +2 -1
app.py CHANGED
@@ -15,19 +15,21 @@ import utils
15
  from models import SynthesizerTrn
16
  from text.symbols import symbols
17
  from text import text_to_sequence
 
 
18
  def resize2d(source, target_len):
19
  source[source<0.001] = np.nan
20
  target = np.interp(np.arange(0, len(source)*target_len, len(source))/ target_len, np.arange(0, len(source)), source)
21
  return np.nan_to_num(target)
22
- def convert_wav_22050_to_f0(audio):
23
- tmp = librosa.pyin(audio,
24
- fmin=librosa.note_to_hz('C0'),
25
- fmax=librosa.note_to_hz('C7'),
26
- frame_length=1780)[0]
 
27
  f0 = np.zeros_like(tmp)
28
- f0[tmp>0] = tmp[tmp>0]
29
  return f0
30
-
31
  def get_text(text, hps):
32
  text_norm = text_to_sequence(text, hps.data.text_cleaners)
33
  if hps.data.add_blank:
 
15
  from models import SynthesizerTrn
16
  from text.symbols import symbols
17
  from text import text_to_sequence
18
+ import torchcrepe
19
+
20
  def resize2d(source, target_len):
21
  source[source<0.001] = np.nan
22
  target = np.interp(np.arange(0, len(source)*target_len, len(source))/ target_len, np.arange(0, len(source)), source)
23
  return np.nan_to_num(target)
24
+
25
+ def convert_wav_22050_to_f0(path):
26
+ audio, sr = torchcrepe.load.audio(path)
27
+ tmp = torchcrepe.predict(audio=audio, fmin=50, fmax=550,
28
+ sample_rate=22050, model='full',
29
+ batch_size=2048, device='cuda:0').numpy()[0]
30
  f0 = np.zeros_like(tmp)
31
+ f0[tmp > 0] = tmp[tmp > 0]
32
  return f0
 
33
  def get_text(text, hps):
34
  text_norm = text_to_sequence(text, hps.data.text_cleaners)
35
  if hps.data.add_blank:
requirements.txt CHANGED
@@ -8,4 +8,5 @@ tensorboard==2.3.0
8
  torch
9
  torchvision
10
  Unidecode==1.1.1
11
- torchaudio
 
 
8
  torch
9
  torchvision
10
  Unidecode==1.1.1
11
+ torchaudio
12
+ torchcrepe