jukebox-5b-lyrics / config.json
ArthurZ's picture
ArthurZ HF staff
add model
87a503c
{
"_name_or_path": "ArthurZ/jukebox-5b-lyrics",
"activation_function": "quick_gelu",
"alignment_head": [
2,
null,
null
],
"alignment_layer": [
68,
null,
null
],
"architectures": [
"JukeboxModel"
],
"attn_dropout": 0.0,
"attn_init_scale": 1.0,
"attn_order": [
10,
2,
2
],
"blocks": 128,
"bos_token_id": 50256,
"c_res": 1,
"cond_c_res": [
0,
1,
1
],
"cond_depth": [
3,
16,
16
],
"cond_dilation_cycle": [
null,
8,
8
],
"cond_dilation_growth_rate": [
1,
3,
3
],
"cond_m_conv": 1,
"cond_res_scale": [
null,
true,
false
],
"cond_width": [
128,
1024,
1024
],
"cond_zero_out": false,
"copy_input": false,
"depth": [
79,
72,
72
],
"downs_t": [
3,
2,
2
],
"emb_dropout": 0.0,
"eos_token_id": 50256,
"fp16_params": true,
"hop_fraction": [
0.125,
0.5,
0.5
],
"hop_length": 256,
"init_scale": [
0.1,
1,
1
],
"initializer_range": 0.02,
"l_bins": 2048,
"labels": true,
"layer_norm_epsilon": 1e-05,
"m_attn": 0.25,
"max_bow_genre_size": 5,
"max_duration": 600.0,
"merged_decoder": [
true,
false,
false
],
"min_duration": 23.8,
"mlp_init_scale": 0.02,
"model_type": "jukebox",
"multispec_loss_hop_length": [
240,
120,
50
],
"multispec_loss_n_fft": [
2048,
1024,
512
],
"multispec_loss_window_size": [
1200,
600,
240
],
"multispectral": 1.0,
"n_ctx": [
8192,
8192,
8192
],
"n_embd": 768,
"n_head": 12,
"n_heads": [
8,
1,
1
],
"n_inner": null,
"n_layer": 12,
"n_positions": 1024,
"n_samples": 1,
"n_tokens": [
512,
0,
0
],
"n_vocab": 80,
"name": "AudioSamples",
"nb_priors": 3,
"pos_init": false,
"prime_attn_dropout": 0.0,
"prime_attn_order": [
2,
0,
0
],
"prime_blocks": 32,
"prime_c_res": 1,
"prime_cond_c_res": [
0,
1,
1
],
"prime_depth": [
18,
3,
3
],
"prime_emb_dropout": 0.0,
"prime_heads": 4,
"prime_init_scale": [
0.1,
0.4,
0.4
],
"prime_loss_fraction": [
0.4,
0.0,
0.0
],
"prime_m_attn": 0.25,
"prime_m_mlp": 1.0,
"prime_pos_init": false,
"prime_res_scale": false,
"prime_resid_dropout": 0.0,
"prime_spread": null,
"prime_width": [
1280,
128,
128
],
"prime_zero_out": false,
"priors_width": [
4096,
2048,
1024
],
"reorder_and_upcast_attn": false,
"res_scale": false,
"resid_dropout": 0.0,
"sample_hop_length": 30000,
"sample_length": 1058304,
"sample_length_in_seconds": 24,
"scale_attn_by_inverse_layer_idx": false,
"scale_attn_weights": true,
"single_enc_dec": [
false,
false,
false
],
"spectral": 0.0,
"spread": null,
"sr": 44100,
"strides_t": [
2,
2,
2
],
"summary_activation": null,
"summary_first_dropout": 0.1,
"summary_proj_to_labels": true,
"summary_type": "cls_index",
"summary_use_proj": true,
"t_bins": 128,
"torch_dtype": "float32",
"total_sample_length_in_seconds": 180,
"transformers_version": "4.21.0.dev0",
"use_cache": true,
"use_nonrelative_specloss": true,
"use_tokens": [
true,
false,
false
],
"vocab_size": 50257,
"vq_vae_codebook_dimension": 2048,
"vq_vae_commit": 0.02,
"vq_vae_conv_block_depth": 4,
"vq_vae_conv_block_width": 32,
"vq_vae_depth": 4,
"vq_vae_dilation_cycle": null,
"vq_vae_dilation_growth_rate": 3,
"vq_vae_downs_t": [
3,
2,
2
],
"vq_vae_emmbedding_width": 64,
"vq_vae_levels": 3,
"vq_vae_lmu": 0.99,
"vq_vae_m_conv": 1,
"vq_vae_multipliers": [
2,
1,
1
],
"vq_vae_reverse_decoder_dilation": 1,
"vq_vae_strides_t": [
2,
2,
2
],
"vq_vae_width": 64,
"vqvae_z_shapes": [
[
8268
],
[
33072
],
[
132288
]
],
"width": [
4800,
1920,
1920
],
"y_bins": [
[
120,
4111
],
[
120,
4111
],
[
120,
4111
]
],
"zero_out": false
}