|
from transformers.configuration_utils import PretrainedConfig |
|
|
|
|
|
class EcapaConfig(PretrainedConfig): |
|
|
|
model_type = 'ecapa' |
|
|
|
def __init__( |
|
self, |
|
n_mels=80, |
|
sample_rate=16000, |
|
win_length=25, |
|
hop_length=10, |
|
mean_norm=True, |
|
std_norm=False, |
|
norm_type='sentence', |
|
hidden_size=192, |
|
channels=[512, 512, 512, 512, 1536], |
|
kernel_sizes=[5, 3, 3, 3, 1], |
|
dilations=[1, 2, 3, 4, 1], |
|
attention_channels=128, |
|
res2net_scale=8, |
|
se_channels=128, |
|
global_context=True, |
|
groups=[1, 1, 1, 1, 1], |
|
num_classes=1251, |
|
loss_fn='aam', |
|
auto_map={ |
|
"AutoConfig": "configuration_ecapa.EcapaConfig", |
|
"AutoModel": "modeling_ecapa.EcapaModel", |
|
}, |
|
initializer_range=0.02, |
|
**kwargs |
|
): |
|
|
|
self.n_mels = n_mels |
|
self.sample_rate = sample_rate |
|
self.win_length = win_length |
|
self.hop_length = hop_length |
|
|
|
|
|
self.mean_norm = mean_norm |
|
self.std_norm = std_norm |
|
self.norm_type = norm_type |
|
|
|
|
|
self.channels = channels |
|
self.kernel_sizes = kernel_sizes |
|
self.attention_channels = attention_channels |
|
self.dilations = dilations |
|
self.res2net_scale = res2net_scale |
|
self.se_channels = se_channels |
|
self.global_context = global_context |
|
self.groups = groups |
|
self.hidden_size = hidden_size |
|
|
|
|
|
self.num_classes = num_classes |
|
self.loss_fn = loss_fn |
|
|
|
|
|
self.auto_map = auto_map |
|
self.initializer_range = initializer_range |
|
|
|
super().__init__(**kwargs) |