rcell commited on
Commit
422853a
1 Parent(s): 2fba440
app.py CHANGED
@@ -38,17 +38,19 @@ def get_text(text, hps):
38
 
39
 
40
  hps = utils.get_hparams_from_file("configs/ljs_base.json")
41
-
42
- net_g = SynthesizerTrn(
43
  len(symbols),
44
- hps.data.filter_length // 2 + 1,
45
- hps.train.segment_size // hps.data.hop_length,
46
- **hps.model)
 
 
47
  import numpy as np
48
 
49
  hubert = torch.hub.load("bshall/hubert:main", "hubert_soft")
50
 
51
- _ = utils.load_checkpoint("G_88000.pth", net_g, None)
52
 
53
  def vc_fn(input_audio,vc_transform):
54
  if input_audio is None:
@@ -64,21 +66,23 @@ def vc_fn(input_audio,vc_transform):
64
  if sampling_rate != 16000:
65
  audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
66
 
67
- audio22050 = librosa.resample(audio, orig_sr=sampling_rate, target_sr=22050)
68
  f0 = convert_wav_22050_to_f0(audio22050)
69
 
70
  source = torch.FloatTensor(audio).unsqueeze(0).unsqueeze(0)
71
  print(source.shape)
72
  with torch.inference_mode():
73
  units = hubert.units(source)
74
- f0 = resize2d(f0, len(units[:, 0])) * vc_transform
75
- units[:, 0] = f0 / 10
76
-
77
- stn_tst = torch.FloatTensor(units.squeeze(0))
 
 
78
  with torch.no_grad():
79
  x_tst = stn_tst.unsqueeze(0)
80
  x_tst_lengths = torch.LongTensor([stn_tst.size(0)])
81
- audio = net_g.infer(x_tst, x_tst_lengths, noise_scale=0.1, noise_scale_w=0.1, length_scale=1)[0][
82
  0, 0].data.float().numpy()
83
 
84
  return "Success", (hps.data.sampling_rate, audio)
@@ -90,7 +94,7 @@ with app:
90
  with gr.Tabs():
91
  with gr.TabItem("Basic"):
92
  vc_input3 = gr.Audio(label="Input Audio (30s limitation)")
93
- vc_transform = gr.Number(label="transform")
94
  vc_submit = gr.Button("Convert", variant="primary")
95
  vc_output1 = gr.Textbox(label="Output Message")
96
  vc_output2 = gr.Audio(label="Output Audio")
 
38
 
39
 
40
  hps = utils.get_hparams_from_file("configs/ljs_base.json")
41
+ hps_ms = utils.get_hparams_from_file("configs/vctk_base.json")
42
+ net_g_ms = SynthesizerTrn(
43
  len(symbols),
44
+ hps_ms.data.filter_length // 2 + 1,
45
+ hps_ms.train.segment_size // hps.data.hop_length,
46
+ n_speakers=hps_ms.data.n_speakers,
47
+ **hps_ms.model)
48
+
49
  import numpy as np
50
 
51
  hubert = torch.hub.load("bshall/hubert:main", "hubert_soft")
52
 
53
+ _ = utils.load_checkpoint("G_312000.pth", net_g_ms, None)
54
 
55
  def vc_fn(input_audio,vc_transform):
56
  if input_audio is None:
 
66
  if sampling_rate != 16000:
67
  audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
68
 
69
+ audio22050 = librosa.resample(audio, orig_sr=16000, target_sr=22050)
70
  f0 = convert_wav_22050_to_f0(audio22050)
71
 
72
  source = torch.FloatTensor(audio).unsqueeze(0).unsqueeze(0)
73
  print(source.shape)
74
  with torch.inference_mode():
75
  units = hubert.units(source)
76
+ soft = units.squeeze(0).numpy()
77
+ print(sampling_rate)
78
+ f0 = resize2d(f0, len(soft[:, 0])) * vc_transform
79
+ soft[:, 0] = f0 / 10
80
+ sid = torch.LongTensor([0])
81
+ stn_tst = torch.FloatTensor(soft)
82
  with torch.no_grad():
83
  x_tst = stn_tst.unsqueeze(0)
84
  x_tst_lengths = torch.LongTensor([stn_tst.size(0)])
85
+ audio = net_g_ms.infer(x_tst, x_tst_lengths,sid=sid, noise_scale=0.1, noise_scale_w=0.1, length_scale=1)[0][
86
  0, 0].data.float().numpy()
87
 
88
  return "Success", (hps.data.sampling_rate, audio)
 
94
  with gr.Tabs():
95
  with gr.TabItem("Basic"):
96
  vc_input3 = gr.Audio(label="Input Audio (30s limitation)")
97
+ vc_transform = gr.Number(label="transform",value=1.0)
98
  vc_submit = gr.Button("Convert", variant="primary")
99
  vc_output1 = gr.Textbox(label="Output Message")
100
  vc_output2 = gr.Audio(label="Output Audio")
configs/vctk_base.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "train": {
3
  "log_interval": 100,
4
- "eval_interval": 3000,
5
  "seed": 1234,
6
  "epochs": 10000,
7
  "learning_rate": 2e-4,
@@ -17,8 +17,8 @@
17
  "c_kl": 1.0
18
  },
19
  "data": {
20
- "training_files":"filelists/train_mul.txt",
21
- "validation_files":"filelists/val_mul.txt",
22
  "text_cleaners":["english_cleaners2"],
23
  "max_wav_value": 32768.0,
24
  "sampling_rate": 22050,
@@ -29,7 +29,7 @@
29
  "mel_fmin": 0.0,
30
  "mel_fmax": null,
31
  "add_blank": true,
32
- "n_speakers": 7,
33
  "cleaned_text": true
34
  },
35
  "model": {
 
1
  {
2
  "train": {
3
  "log_interval": 100,
4
+ "eval_interval": 2000,
5
  "seed": 1234,
6
  "epochs": 10000,
7
  "learning_rate": 2e-4,
 
17
  "c_kl": 1.0
18
  },
19
  "data": {
20
+ "training_files":"filelists/train_sing_mul.txt",
21
+ "validation_files":"filelists/val_sing_mul.txt",
22
  "text_cleaners":["english_cleaners2"],
23
  "max_wav_value": 32768.0,
24
  "sampling_rate": 22050,
 
29
  "mel_fmin": 0.0,
30
  "mel_fmax": null,
31
  "add_blank": true,
32
+ "n_speakers": 2,
33
  "cleaned_text": true
34
  },
35
  "model": {
data_utils.py CHANGED
@@ -5,27 +5,35 @@ import numpy as np
5
  import torch
6
  import torch.utils.data
7
  import numpy as np
8
- import commons
9
  from mel_processing import spectrogram_torch
10
  from utils import load_wav_to_torch, load_filepaths_and_text
11
  from text import text_to_sequence, cleaned_text_to_sequence
12
 
13
 
 
 
 
 
 
 
 
14
  class TextAudioLoader(torch.utils.data.Dataset):
15
  """
16
  1) loads audio, text pairs
17
  2) normalizes text and converts them to sequences of integers
18
  3) computes spectrograms from audio files.
19
  """
 
20
  def __init__(self, audiopaths_and_text, hparams):
21
  self.audiopaths_and_text = load_filepaths_and_text(audiopaths_and_text)
22
- self.text_cleaners = hparams.text_cleaners
23
- self.max_wav_value = hparams.max_wav_value
24
- self.sampling_rate = hparams.sampling_rate
25
- self.filter_length = hparams.filter_length
26
- self.hop_length = hparams.hop_length
27
- self.win_length = hparams.win_length
28
- self.sampling_rate = hparams.sampling_rate
29
 
30
  self.cleaned_text = getattr(hparams, "cleaned_text", False)
31
 
@@ -37,7 +45,6 @@ class TextAudioLoader(torch.utils.data.Dataset):
37
  random.shuffle(self.audiopaths_and_text)
38
  self._filter()
39
 
40
-
41
  def _filter(self):
42
  """
43
  Filter text & store spec lengths
@@ -74,8 +81,8 @@ class TextAudioLoader(torch.utils.data.Dataset):
74
  spec = torch.load(spec_filename)
75
  else:
76
  spec = spectrogram_torch(audio_norm, self.filter_length,
77
- self.sampling_rate, self.hop_length, self.win_length,
78
- center=False)
79
  spec = torch.squeeze(spec, 0)
80
  torch.save(spec, spec_filename)
81
  return spec, audio_norm
@@ -88,8 +95,14 @@ class TextAudioLoader(torch.utils.data.Dataset):
88
  # if self.add_blank:
89
  # text_norm = commons.intersperse(text_norm, 0)
90
  # text_norm = torch.LongTensor(text_norm)
 
91
  soft = np.load(text)
92
-
 
 
 
 
 
93
  text_norm = torch.FloatTensor(soft)
94
  return text_norm
95
 
@@ -103,6 +116,7 @@ class TextAudioLoader(torch.utils.data.Dataset):
103
  class TextAudioCollate():
104
  """ Zero-pads model inputs and targets
105
  """
 
106
  def __init__(self, return_ids=False):
107
  self.return_ids = return_ids
108
 
@@ -135,7 +149,7 @@ class TextAudioCollate():
135
  row = batch[ids_sorted_decreasing[i]]
136
 
137
  text = row[0]
138
- text_padded[i, :text.size(0),:] = text
139
  text_lengths[i] = text.size(0)
140
 
141
  spec = row[1]
@@ -152,21 +166,24 @@ class TextAudioCollate():
152
 
153
 
154
  """Multi speaker version"""
 
 
155
  class TextAudioSpeakerLoader(torch.utils.data.Dataset):
156
  """
157
  1) loads audio, speaker_id, text pairs
158
  2) normalizes text and converts them to sequences of integers
159
  3) computes spectrograms from audio files.
160
  """
 
161
  def __init__(self, audiopaths_sid_text, hparams):
162
  self.audiopaths_sid_text = load_filepaths_and_text(audiopaths_sid_text)
163
  self.text_cleaners = hparams.text_cleaners
164
  self.max_wav_value = hparams.max_wav_value
165
  self.sampling_rate = hparams.sampling_rate
166
- self.filter_length = hparams.filter_length
167
- self.hop_length = hparams.hop_length
168
- self.win_length = hparams.win_length
169
- self.sampling_rate = hparams.sampling_rate
170
 
171
  self.cleaned_text = getattr(hparams, "cleaned_text", False)
172
 
@@ -215,15 +232,23 @@ class TextAudioSpeakerLoader(torch.utils.data.Dataset):
215
  spec = torch.load(spec_filename)
216
  else:
217
  spec = spectrogram_torch(audio_norm, self.filter_length,
218
- self.sampling_rate, self.hop_length, self.win_length,
219
- center=False)
220
  spec = torch.squeeze(spec, 0)
221
  torch.save(spec, spec_filename)
222
  return spec, audio_norm
223
 
224
  def get_text(self, text):
225
  soft = np.load(text)
226
-
 
 
 
 
 
 
 
 
227
  text_norm = torch.FloatTensor(soft)
228
  return text_norm
229
 
@@ -241,6 +266,7 @@ class TextAudioSpeakerLoader(torch.utils.data.Dataset):
241
  class TextAudioSpeakerCollate():
242
  """ Zero-pads model inputs and targets
243
  """
 
244
  def __init__(self, return_ids=False):
245
  self.return_ids = return_ids
246
 
@@ -297,20 +323,21 @@ class DistributedBucketSampler(torch.utils.data.distributed.DistributedSampler):
297
  Maintain similar input lengths in a batch.
298
  Length groups are specified by boundaries.
299
  Ex) boundaries = [b1, b2, b3] -> any batch is included either {x | b1 < length(x) <=b2} or {x | b2 < length(x) <= b3}.
300
-
301
  It removes samples which are not included in the boundaries.
302
  Ex) boundaries = [b1, b2, b3] -> any x s.t. length(x) <= b1 or length(x) > b3 are discarded.
303
  """
 
304
  def __init__(self, dataset, batch_size, boundaries, num_replicas=None, rank=None, shuffle=True):
305
  super().__init__(dataset, num_replicas=num_replicas, rank=rank, shuffle=shuffle)
306
  self.lengths = dataset.lengths
307
  self.batch_size = batch_size
308
  self.boundaries = boundaries
309
-
310
  self.buckets, self.num_samples_per_bucket = self._create_buckets()
311
  self.total_size = sum(self.num_samples_per_bucket)
312
  self.num_samples = self.total_size // self.num_replicas
313
-
314
  def _create_buckets(self):
315
  buckets = [[] for _ in range(len(self.boundaries) - 1)]
316
  for i in range(len(self.lengths)):
@@ -318,12 +345,12 @@ class DistributedBucketSampler(torch.utils.data.distributed.DistributedSampler):
318
  idx_bucket = self._bisect(length)
319
  if idx_bucket != -1:
320
  buckets[idx_bucket].append(i)
321
-
322
  for i in range(len(buckets) - 1, 0, -1):
323
  if len(buckets[i]) == 0:
324
  buckets.pop(i)
325
- self.boundaries.pop(i+1)
326
-
327
  num_samples_per_bucket = []
328
  for i in range(len(buckets)):
329
  len_bucket = len(buckets[i])
@@ -331,61 +358,61 @@ class DistributedBucketSampler(torch.utils.data.distributed.DistributedSampler):
331
  rem = (total_batch_size - (len_bucket % total_batch_size)) % total_batch_size
332
  num_samples_per_bucket.append(len_bucket + rem)
333
  return buckets, num_samples_per_bucket
334
-
335
  def __iter__(self):
336
- # deterministically shuffle based on epoch
337
- g = torch.Generator()
338
- g.manual_seed(self.epoch)
339
-
340
- indices = []
341
- if self.shuffle:
342
- for bucket in self.buckets:
343
- indices.append(torch.randperm(len(bucket), generator=g).tolist())
344
- else:
345
- for bucket in self.buckets:
346
- indices.append(list(range(len(bucket))))
347
-
348
- batches = []
349
- for i in range(len(self.buckets)):
350
- bucket = self.buckets[i]
351
- len_bucket = len(bucket)
352
- ids_bucket = indices[i]
353
- num_samples_bucket = self.num_samples_per_bucket[i]
354
-
355
- # add extra samples to make it evenly divisible
356
- rem = num_samples_bucket - len_bucket
357
- ids_bucket = ids_bucket + ids_bucket * (rem // len_bucket) + ids_bucket[:(rem % len_bucket)]
358
-
359
- # subsample
360
- ids_bucket = ids_bucket[self.rank::self.num_replicas]
361
-
362
- # batching
363
- for j in range(len(ids_bucket) // self.batch_size):
364
- batch = [bucket[idx] for idx in ids_bucket[j*self.batch_size:(j+1)*self.batch_size]]
365
- batches.append(batch)
366
-
367
- if self.shuffle:
368
- batch_ids = torch.randperm(len(batches), generator=g).tolist()
369
- batches = [batches[i] for i in batch_ids]
370
- self.batches = batches
371
-
372
- assert len(self.batches) * self.batch_size == self.num_samples
373
- return iter(self.batches)
374
-
375
  def _bisect(self, x, lo=0, hi=None):
376
- if hi is None:
377
- hi = len(self.boundaries) - 1
378
-
379
- if hi > lo:
380
- mid = (hi + lo) // 2
381
- if self.boundaries[mid] < x and x <= self.boundaries[mid+1]:
382
- return mid
383
- elif x <= self.boundaries[mid]:
384
- return self._bisect(x, lo, mid)
385
- else:
386
- return self._bisect(x, mid + 1, hi)
387
- else:
388
- return -1
389
 
390
  def __len__(self):
391
  return self.num_samples // self.batch_size
 
5
  import torch
6
  import torch.utils.data
7
  import numpy as np
8
+ import commons
9
  from mel_processing import spectrogram_torch
10
  from utils import load_wav_to_torch, load_filepaths_and_text
11
  from text import text_to_sequence, cleaned_text_to_sequence
12
 
13
 
14
+ def dropout1d(myarray, ratio=0.5):
15
+ indices = np.random.choice(np.arange(myarray.size), replace=False,
16
+ size=int(myarray.size * ratio))
17
+ myarray[indices] = 0
18
+ return myarray
19
+
20
+
21
  class TextAudioLoader(torch.utils.data.Dataset):
22
  """
23
  1) loads audio, text pairs
24
  2) normalizes text and converts them to sequences of integers
25
  3) computes spectrograms from audio files.
26
  """
27
+
28
  def __init__(self, audiopaths_and_text, hparams):
29
  self.audiopaths_and_text = load_filepaths_and_text(audiopaths_and_text)
30
+ self.text_cleaners = hparams.text_cleaners
31
+ self.max_wav_value = hparams.max_wav_value
32
+ self.sampling_rate = hparams.sampling_rate
33
+ self.filter_length = hparams.filter_length
34
+ self.hop_length = hparams.hop_length
35
+ self.win_length = hparams.win_length
36
+ self.sampling_rate = hparams.sampling_rate
37
 
38
  self.cleaned_text = getattr(hparams, "cleaned_text", False)
39
 
 
45
  random.shuffle(self.audiopaths_and_text)
46
  self._filter()
47
 
 
48
  def _filter(self):
49
  """
50
  Filter text & store spec lengths
 
81
  spec = torch.load(spec_filename)
82
  else:
83
  spec = spectrogram_torch(audio_norm, self.filter_length,
84
+ self.sampling_rate, self.hop_length, self.win_length,
85
+ center=False)
86
  spec = torch.squeeze(spec, 0)
87
  torch.save(spec, spec_filename)
88
  return spec, audio_norm
 
95
  # if self.add_blank:
96
  # text_norm = commons.intersperse(text_norm, 0)
97
  # text_norm = torch.LongTensor(text_norm)
98
+
99
  soft = np.load(text)
100
+
101
+ # # 添加F0信息
102
+ # head, rear = text.split(".")
103
+ # f0 = np.load(head+".f0."+rear)
104
+ # soft[:,0] = f0/10
105
+
106
  text_norm = torch.FloatTensor(soft)
107
  return text_norm
108
 
 
116
  class TextAudioCollate():
117
  """ Zero-pads model inputs and targets
118
  """
119
+
120
  def __init__(self, return_ids=False):
121
  self.return_ids = return_ids
122
 
 
149
  row = batch[ids_sorted_decreasing[i]]
150
 
151
  text = row[0]
152
+ text_padded[i, :text.size(0), :] = text
153
  text_lengths[i] = text.size(0)
154
 
155
  spec = row[1]
 
166
 
167
 
168
  """Multi speaker version"""
169
+
170
+
171
  class TextAudioSpeakerLoader(torch.utils.data.Dataset):
172
  """
173
  1) loads audio, speaker_id, text pairs
174
  2) normalizes text and converts them to sequences of integers
175
  3) computes spectrograms from audio files.
176
  """
177
+
178
  def __init__(self, audiopaths_sid_text, hparams):
179
  self.audiopaths_sid_text = load_filepaths_and_text(audiopaths_sid_text)
180
  self.text_cleaners = hparams.text_cleaners
181
  self.max_wav_value = hparams.max_wav_value
182
  self.sampling_rate = hparams.sampling_rate
183
+ self.filter_length = hparams.filter_length
184
+ self.hop_length = hparams.hop_length
185
+ self.win_length = hparams.win_length
186
+ self.sampling_rate = hparams.sampling_rate
187
 
188
  self.cleaned_text = getattr(hparams, "cleaned_text", False)
189
 
 
232
  spec = torch.load(spec_filename)
233
  else:
234
  spec = spectrogram_torch(audio_norm, self.filter_length,
235
+ self.sampling_rate, self.hop_length, self.win_length,
236
+ center=False)
237
  spec = torch.squeeze(spec, 0)
238
  torch.save(spec, spec_filename)
239
  return spec, audio_norm
240
 
241
  def get_text(self, text):
242
  soft = np.load(text)
243
+ head, rear = text.split(".")
244
+ f0 = np.load(head + ".f0." + rear)
245
+ p = random.random()
246
+ # print(p)
247
+ if p < 0.3:
248
+ f0 = dropout1d(f0, 0.6)
249
+ # print(f0)
250
+ soft[:, 0] = f0 / 10
251
+ # soft = soft + np.expand_dims(np.log(f0),1)*0.2
252
  text_norm = torch.FloatTensor(soft)
253
  return text_norm
254
 
 
266
  class TextAudioSpeakerCollate():
267
  """ Zero-pads model inputs and targets
268
  """
269
+
270
  def __init__(self, return_ids=False):
271
  self.return_ids = return_ids
272
 
 
323
  Maintain similar input lengths in a batch.
324
  Length groups are specified by boundaries.
325
  Ex) boundaries = [b1, b2, b3] -> any batch is included either {x | b1 < length(x) <=b2} or {x | b2 < length(x) <= b3}.
326
+
327
  It removes samples which are not included in the boundaries.
328
  Ex) boundaries = [b1, b2, b3] -> any x s.t. length(x) <= b1 or length(x) > b3 are discarded.
329
  """
330
+
331
  def __init__(self, dataset, batch_size, boundaries, num_replicas=None, rank=None, shuffle=True):
332
  super().__init__(dataset, num_replicas=num_replicas, rank=rank, shuffle=shuffle)
333
  self.lengths = dataset.lengths
334
  self.batch_size = batch_size
335
  self.boundaries = boundaries
336
+
337
  self.buckets, self.num_samples_per_bucket = self._create_buckets()
338
  self.total_size = sum(self.num_samples_per_bucket)
339
  self.num_samples = self.total_size // self.num_replicas
340
+
341
  def _create_buckets(self):
342
  buckets = [[] for _ in range(len(self.boundaries) - 1)]
343
  for i in range(len(self.lengths)):
 
345
  idx_bucket = self._bisect(length)
346
  if idx_bucket != -1:
347
  buckets[idx_bucket].append(i)
348
+
349
  for i in range(len(buckets) - 1, 0, -1):
350
  if len(buckets[i]) == 0:
351
  buckets.pop(i)
352
+ self.boundaries.pop(i + 1)
353
+
354
  num_samples_per_bucket = []
355
  for i in range(len(buckets)):
356
  len_bucket = len(buckets[i])
 
358
  rem = (total_batch_size - (len_bucket % total_batch_size)) % total_batch_size
359
  num_samples_per_bucket.append(len_bucket + rem)
360
  return buckets, num_samples_per_bucket
361
+
362
  def __iter__(self):
363
+ # deterministically shuffle based on epoch
364
+ g = torch.Generator()
365
+ g.manual_seed(self.epoch)
366
+
367
+ indices = []
368
+ if self.shuffle:
369
+ for bucket in self.buckets:
370
+ indices.append(torch.randperm(len(bucket), generator=g).tolist())
371
+ else:
372
+ for bucket in self.buckets:
373
+ indices.append(list(range(len(bucket))))
374
+
375
+ batches = []
376
+ for i in range(len(self.buckets)):
377
+ bucket = self.buckets[i]
378
+ len_bucket = len(bucket)
379
+ ids_bucket = indices[i]
380
+ num_samples_bucket = self.num_samples_per_bucket[i]
381
+
382
+ # add extra samples to make it evenly divisible
383
+ rem = num_samples_bucket - len_bucket
384
+ ids_bucket = ids_bucket + ids_bucket * (rem // len_bucket) + ids_bucket[:(rem % len_bucket)]
385
+
386
+ # subsample
387
+ ids_bucket = ids_bucket[self.rank::self.num_replicas]
388
+
389
+ # batching
390
+ for j in range(len(ids_bucket) // self.batch_size):
391
+ batch = [bucket[idx] for idx in ids_bucket[j * self.batch_size:(j + 1) * self.batch_size]]
392
+ batches.append(batch)
393
+
394
+ if self.shuffle:
395
+ batch_ids = torch.randperm(len(batches), generator=g).tolist()
396
+ batches = [batches[i] for i in batch_ids]
397
+ self.batches = batches
398
+
399
+ assert len(self.batches) * self.batch_size == self.num_samples
400
+ return iter(self.batches)
401
+
402
  def _bisect(self, x, lo=0, hi=None):
403
+ if hi is None:
404
+ hi = len(self.boundaries) - 1
405
+
406
+ if hi > lo:
407
+ mid = (hi + lo) // 2
408
+ if self.boundaries[mid] < x and x <= self.boundaries[mid + 1]:
409
+ return mid
410
+ elif x <= self.boundaries[mid]:
411
+ return self._bisect(x, lo, mid)
412
+ else:
413
+ return self._bisect(x, mid + 1, hi)
414
+ else:
415
+ return -1
416
 
417
  def __len__(self):
418
  return self.num_samples // self.batch_size
filelists/train_sing_mul.txt ADDED
The diff for this file is too large to render. See raw diff
 
filelists/val_sing_mul.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ /content/cpop/wavs/dev/2001000003.wav|1|/content/cpop/soft/dev/2001000003.npy
2
+ /content/cpop/wavs/dev/2002000055.wav|1|/content/cpop/soft/dev/2002000055.npy
3
+ /content/cpop/wavs/dev/2001000002.wav|1|/content/cpop/soft/dev/2001000002.npy
4
+ /content/cpop/wavs/dev/2001000001.wav|1|/content/cpop/soft/dev/2001000001.npy