ylacombe
/

bark-large

Text-to-Audio

Transformers

PyTorch

bark

Inference Endpoints

Model card Files Files and versions Community

ylacombe commited on Jun 29, 2023

Commit

6faafce

1 Parent(s): f9cc45b

23dc09ba352101d5569fd4f0b1b48b18208e34e6d40bb81322cdd11418f84a63

Browse files

Files changed (1) hide show

config.json +99 -15

config.json CHANGED Viewed

@@ -81,11 +81,105 @@
     "use_bfloat16": false,
     "use_cache": true
   },
-  "coarse_infer_token": 12050,
-  "coarse_rate_hz": 75,
-  "coarse_semantic_pad_token": 12048,
-  "codebook_size": 1024,
-  "context_window_size": 1024,
   "fine_acoustics_config": {
     "_name_or_path": "",
     "add_cross_attention": false,
@@ -165,10 +259,6 @@
     "use_cache": true
   },
   "model_type": "bark",
-  "n_coarse_codebooks": 2,
-  "n_fine_codebooks": 8,
-  "pretrained_encodec_name_or_path": "facebook/encodec_24khz",
-  "sample_rate": 24000,
   "semantic_config": {
     "_name_or_path": "",
     "add_cross_attention": false,
@@ -247,12 +337,6 @@
     "use_bfloat16": false,
     "use_cache": true
   },
-  "semantic_infer_token": 129599,
-  "semantic_pad_token": 10000,
-  "semantic_rate_hz": 49.9,
-  "semantic_vocab_size": 10000,
-  "text_encoding_offset": 10048,
-  "text_pad_token": 129595,
   "torch_dtype": "float32",
   "transformers_version": null
 }

     "use_bfloat16": false,
     "use_cache": true
   },
+  "codec_config": {
+    "_name_or_path": "ArthurZ/encodec_24khz",
+    "add_cross_attention": false,
+    "architectures": [
+      "EncodecModel"
+    ],
+    "audio_channels": 1,
+    "bad_words_ids": null,
+    "begin_suppress_tokens": null,
+    "bos_token_id": null,
+    "chunk_length_s": null,
+    "chunk_size_feed_forward": 0,
+    "codebook_dim": 128,
+    "codebook_size": 1024,
+    "compress": 2,
+    "cross_attention_hidden_size": null,
+    "decoder_start_token_id": null,
+    "dilation_growth_rate": 2,
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "early_stopping": false,
+    "encoder_no_repeat_ngram_size": 0,
+    "eos_token_id": null,
+    "exponential_decay_length_penalty": null,
+    "finetuning_task": null,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": null,
+    "hidden_size": 128,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "is_decoder": false,
+    "is_encoder_decoder": false,
+    "kernel_size": 7,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "last_kernel_size": 7,
+    "length_penalty": 1.0,
+    "max_length": 20,
+    "min_length": 0,
+    "model_type": "encodec",
+    "no_repeat_ngram_size": 0,
+    "norm_type": "weight_norm",
+    "normalize": false,
+    "num_beam_groups": 1,
+    "num_beams": 1,
+    "num_filters": 32,
+    "num_lstm_layers": 2,
+    "num_residual_layers": 1,
+    "num_return_sequences": 1,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_scores": false,
+    "overlap": null,
+    "pad_mode": "reflect",
+    "pad_token_id": null,
+    "prefix": null,
+    "problem_type": null,
+    "pruned_heads": {},
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "residual_kernel_size": 3,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "sampling_rate": 24000,
+    "sep_token_id": null,
+    "suppress_tokens": null,
+    "target_bandwidths": [
+      1.5,
+      3.0,
+      6.0,
+      12.0,
+      24.0
+    ],
+    "task_specific_params": null,
+    "temperature": 1.0,
+    "tf_legacy_loss": false,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": true,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": "float32",
+    "torchscript": false,
+    "transformers_version": "4.31.0.dev0",
+    "trim_right_ratio": 1.0,
+    "typical_p": 1.0,
+    "upsampling_ratios": [
+      8,
+      5,
+      4,
+      2
+    ],
+    "use_bfloat16": false,
+    "use_causal_conv": true
+  },
   "fine_acoustics_config": {
     "_name_or_path": "",
     "add_cross_attention": false,
     "use_cache": true
   },
   "model_type": "bark",
   "semantic_config": {
     "_name_or_path": "",
     "add_cross_attention": false,
     "use_bfloat16": false,
     "use_cache": true
   },
   "torch_dtype": "float32",
   "transformers_version": null
 }