Text Generation
Transformers
Safetensors
English
falcon_mamba
conversational
Inference Endpoints
Files changed (2) hide show
  1. config.json +2 -2
  2. tokenizer_config.json +1 -1
config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "_name_or_path": "/home/ubuntu/checkpoints/ckpt_sagemaker_SFT_5829_padding",
3
  "architectures": [
4
- "FalconMambaForCausalLM"
5
  ],
6
  "bos_token_id": 0,
7
  "conv_kernel": 4,
@@ -12,7 +12,7 @@
12
  "initializer_range": 0.1,
13
  "intermediate_size": 8192,
14
  "layer_norm_epsilon": 1e-05,
15
- "model_type": "falcon_mamba",
16
  "num_hidden_layers": 64,
17
  "pad_token_id": 0,
18
  "rescale_prenorm_residual": false,
 
1
  {
2
  "_name_or_path": "/home/ubuntu/checkpoints/ckpt_sagemaker_SFT_5829_padding",
3
  "architectures": [
4
+ "MambaForCausalLM"
5
  ],
6
  "bos_token_id": 0,
7
  "conv_kernel": 4,
 
12
  "initializer_range": 0.1,
13
  "intermediate_size": 8192,
14
  "layer_norm_epsilon": 1e-05,
15
+ "model_type": "mamba",
16
  "num_hidden_layers": 64,
17
  "pad_token_id": 0,
18
  "rescale_prenorm_residual": false,
tokenizer_config.json CHANGED
@@ -120,7 +120,7 @@
120
  "input_ids",
121
  "attention_mask"
122
  ],
123
- "model_max_length": 2048,
124
  "pad_to_multiple_of": null,
125
  "pad_token": "<|end_of_text|>",
126
  "pad_token_type_id": 0,
 
120
  "input_ids",
121
  "attention_mask"
122
  ],
123
+ "model_max_length": 1000000000000000019884624838656,
124
  "pad_to_multiple_of": null,
125
  "pad_token": "<|end_of_text|>",
126
  "pad_token_type_id": 0,