张瀚灵
commited on
Commit
·
f416b0e
1
Parent(s):
12d5e7e
update config
Browse files- mlc-chat-config.json +3 -3
mlc-chat-config.json
CHANGED
@@ -12,7 +12,7 @@
|
|
12 |
"position_embedding_base": 10000.0,
|
13 |
"partial_rotary_factor": 0.4,
|
14 |
"num_key_value_heads": 32,
|
15 |
-
"context_window_size":
|
16 |
"prefill_chunk_size": 128,
|
17 |
"head_dim": 80,
|
18 |
"truncation_ranks": {
|
@@ -302,12 +302,12 @@
|
|
302 |
]
|
303 |
},
|
304 |
"tensor_parallel_shards": 1,
|
305 |
-
"max_batch_size":
|
306 |
},
|
307 |
"vocab_size": 51200,
|
308 |
"context_window_size": 2048,
|
309 |
"sliding_window_size": -1,
|
310 |
-
"prefill_chunk_size":
|
311 |
"attention_sink_size": -1,
|
312 |
"tensor_parallel_shards": 1,
|
313 |
"pipeline_parallel_stages": 1,
|
|
|
12 |
"position_embedding_base": 10000.0,
|
13 |
"partial_rotary_factor": 0.4,
|
14 |
"num_key_value_heads": 32,
|
15 |
+
"context_window_size": 128,
|
16 |
"prefill_chunk_size": 128,
|
17 |
"head_dim": 80,
|
18 |
"truncation_ranks": {
|
|
|
302 |
]
|
303 |
},
|
304 |
"tensor_parallel_shards": 1,
|
305 |
+
"max_batch_size": 1
|
306 |
},
|
307 |
"vocab_size": 51200,
|
308 |
"context_window_size": 2048,
|
309 |
"sliding_window_size": -1,
|
310 |
+
"prefill_chunk_size": 2048,
|
311 |
"attention_sink_size": -1,
|
312 |
"tensor_parallel_shards": 1,
|
313 |
"pipeline_parallel_stages": 1,
|