Spaces:
Running
Running
Update utils/voco_bark.py
Browse files- utils/voco_bark.py +12 -5
utils/voco_bark.py
CHANGED
@@ -1,11 +1,19 @@
|
|
1 |
from vocos import Vocos
|
2 |
-
from
|
|
|
|
|
3 |
from transformers.models.bark.generation_configuration_bark import (
|
4 |
BarkCoarseGenerationConfig,
|
5 |
BarkFineGenerationConfig,
|
6 |
BarkSemanticGenerationConfig,
|
7 |
)
|
8 |
from transformers import BarkConfig
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
import torch
|
10 |
|
11 |
class BarkModel(BarkPreTrainedModel):
|
@@ -18,8 +26,7 @@ class BarkModel(BarkPreTrainedModel):
|
|
18 |
self.coarse_acoustics = BarkCoarseModel(config.coarse_acoustics_config)
|
19 |
self.fine_acoustics = BarkFineModel(config.fine_acoustics_config)
|
20 |
|
21 |
-
|
22 |
-
|
23 |
self.config = config
|
24 |
|
25 |
@property
|
@@ -195,7 +202,7 @@ class BarkModel(BarkPreTrainedModel):
|
|
195 |
# 4. Decode the output and generate audio array
|
196 |
bandwidth_id = torch.tensor([2]).to(self.device)
|
197 |
# transpose
|
198 |
-
value =
|
199 |
value = self.vocos.codes_to_features(value)
|
200 |
value = self.vocos.decode(value, bandwidth_id=bandwidth_id)
|
201 |
|
@@ -204,4 +211,4 @@ class BarkModel(BarkPreTrainedModel):
|
|
204 |
self.vocos.offload()
|
205 |
|
206 |
|
207 |
-
return
|
|
|
1 |
from vocos import Vocos
|
2 |
+
from typing import Dict, Optional, Tuple, Union
|
3 |
+
|
4 |
+
from transformers.models.bark import BarkSemanticModel, BarkCoarseModel, BarkFineModel, BarkPreTrainedModel
|
5 |
from transformers.models.bark.generation_configuration_bark import (
|
6 |
BarkCoarseGenerationConfig,
|
7 |
BarkFineGenerationConfig,
|
8 |
BarkSemanticGenerationConfig,
|
9 |
)
|
10 |
from transformers import BarkConfig
|
11 |
+
from transformers.modeling_utils import get_parameter_device
|
12 |
+
from transformers.utils import (
|
13 |
+
is_accelerate_available,
|
14 |
+
|
15 |
+
)
|
16 |
+
|
17 |
import torch
|
18 |
|
19 |
class BarkModel(BarkPreTrainedModel):
|
|
|
26 |
self.coarse_acoustics = BarkCoarseModel(config.coarse_acoustics_config)
|
27 |
self.fine_acoustics = BarkFineModel(config.fine_acoustics_config)
|
28 |
|
29 |
+
self.vocos = Vocos.from_pretrained("hubertsiuzdak/vocos-encodec-24khz-v2")
|
|
|
30 |
self.config = config
|
31 |
|
32 |
@property
|
|
|
202 |
# 4. Decode the output and generate audio array
|
203 |
bandwidth_id = torch.tensor([2]).to(self.device)
|
204 |
# transpose
|
205 |
+
value = output.transpose(0,1)
|
206 |
value = self.vocos.codes_to_features(value)
|
207 |
value = self.vocos.decode(value, bandwidth_id=bandwidth_id)
|
208 |
|
|
|
211 |
self.vocos.offload()
|
212 |
|
213 |
|
214 |
+
return value
|