tiiuae
/

falcon-mamba-7b-instruct

Text Generation

Inference Endpoints

Model card Files Files and versions Community

Update config.json

#1

by ybelkada - opened Jul 30

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

Files changed (2) hide show

config.json +2 -2
tokenizer_config.json +1 -1

config.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "_name_or_path": "/home/ubuntu/checkpoints/ckpt_sagemaker_SFT_5829_padding",
   "architectures": [
-    "FalconMambaForCausalLM"
   ],
   "bos_token_id": 0,
   "conv_kernel": 4,
@@ -12,7 +12,7 @@
   "initializer_range": 0.1,
   "intermediate_size": 8192,
   "layer_norm_epsilon": 1e-05,
-  "model_type": "falcon_mamba",
   "num_hidden_layers": 64,
   "pad_token_id": 0,
   "rescale_prenorm_residual": false,

 {
   "_name_or_path": "/home/ubuntu/checkpoints/ckpt_sagemaker_SFT_5829_padding",
   "architectures": [
+    "MambaForCausalLM"
   ],
   "bos_token_id": 0,
   "conv_kernel": 4,
   "initializer_range": 0.1,
   "intermediate_size": 8192,
   "layer_norm_epsilon": 1e-05,
+  "model_type": "mamba",
   "num_hidden_layers": 64,
   "pad_token_id": 0,
   "rescale_prenorm_residual": false,

tokenizer_config.json CHANGED Viewed

@@ -120,7 +120,7 @@
     "input_ids",
     "attention_mask"
   ],
-  "model_max_length": 2048,
   "pad_to_multiple_of": null,
   "pad_token": "<|end_of_text|>",
   "pad_token_type_id": 0,

     "input_ids",
     "attention_mask"
   ],
+  "model_max_length": 1000000000000000019884624838656,
   "pad_to_multiple_of": null,
   "pad_token": "<|end_of_text|>",
   "pad_token_type_id": 0,