ylacombe commited on
Commit
4c64218
·
1 Parent(s): fc22820

Update utils/voco_bark.py

Browse files
Files changed (1) hide show
  1. utils/voco_bark.py +12 -5
utils/voco_bark.py CHANGED
@@ -1,11 +1,19 @@
1
  from vocos import Vocos
2
- from transformers.models.bark import BarkSemanticModel, BarkCoarseModel, BarkFineModel
 
 
3
  from transformers.models.bark.generation_configuration_bark import (
4
  BarkCoarseGenerationConfig,
5
  BarkFineGenerationConfig,
6
  BarkSemanticGenerationConfig,
7
  )
8
  from transformers import BarkConfig
 
 
 
 
 
 
9
  import torch
10
 
11
  class BarkModel(BarkPreTrainedModel):
@@ -18,8 +26,7 @@ class BarkModel(BarkPreTrainedModel):
18
  self.coarse_acoustics = BarkCoarseModel(config.coarse_acoustics_config)
19
  self.fine_acoustics = BarkFineModel(config.fine_acoustics_config)
20
 
21
- self.vocos = Vocos.from_pretrained("hubertsiuzdak/vocos-encodec-24khz-v2")
22
-
23
  self.config = config
24
 
25
  @property
@@ -195,7 +202,7 @@ class BarkModel(BarkPreTrainedModel):
195
  # 4. Decode the output and generate audio array
196
  bandwidth_id = torch.tensor([2]).to(self.device)
197
  # transpose
198
- value = value.transpose(0,1)
199
  value = self.vocos.codes_to_features(value)
200
  value = self.vocos.decode(value, bandwidth_id=bandwidth_id)
201
 
@@ -204,4 +211,4 @@ class BarkModel(BarkPreTrainedModel):
204
  self.vocos.offload()
205
 
206
 
207
- return audio
 
1
  from vocos import Vocos
2
+ from typing import Dict, Optional, Tuple, Union
3
+
4
+ from transformers.models.bark import BarkSemanticModel, BarkCoarseModel, BarkFineModel, BarkPreTrainedModel
5
  from transformers.models.bark.generation_configuration_bark import (
6
  BarkCoarseGenerationConfig,
7
  BarkFineGenerationConfig,
8
  BarkSemanticGenerationConfig,
9
  )
10
  from transformers import BarkConfig
11
+ from transformers.modeling_utils import get_parameter_device
12
+ from transformers.utils import (
13
+ is_accelerate_available,
14
+
15
+ )
16
+
17
  import torch
18
 
19
  class BarkModel(BarkPreTrainedModel):
 
26
  self.coarse_acoustics = BarkCoarseModel(config.coarse_acoustics_config)
27
  self.fine_acoustics = BarkFineModel(config.fine_acoustics_config)
28
 
29
+ self.vocos = Vocos.from_pretrained("hubertsiuzdak/vocos-encodec-24khz-v2")
 
30
  self.config = config
31
 
32
  @property
 
202
  # 4. Decode the output and generate audio array
203
  bandwidth_id = torch.tensor([2]).to(self.device)
204
  # transpose
205
+ value = output.transpose(0,1)
206
  value = self.vocos.codes_to_features(value)
207
  value = self.vocos.decode(value, bandwidth_id=bandwidth_id)
208
 
 
211
  self.vocos.offload()
212
 
213
 
214
+ return value