joostinyi-baseten
commited on
Upload folder using huggingface_hub
Browse files- config.json +6 -6
- rank0.engine +2 -2
config.json
CHANGED
@@ -63,10 +63,10 @@
|
|
63 |
"max_input_len": 1024,
|
64 |
"max_seq_len": 1048576,
|
65 |
"opt_batch_size": null,
|
66 |
-
"max_batch_size":
|
67 |
"max_beam_width": 1,
|
68 |
-
"max_num_tokens":
|
69 |
-
"opt_num_tokens":
|
70 |
"max_prompt_embedding_table_size": 0,
|
71 |
"kv_cache_type": "PAGED",
|
72 |
"gather_context_logits": false,
|
@@ -122,8 +122,8 @@
|
|
122 |
"plugin_config": {
|
123 |
"dtype": "float16",
|
124 |
"bert_attention_plugin": "auto",
|
125 |
-
"gpt_attention_plugin": "
|
126 |
-
"gemm_plugin": "
|
127 |
"gemm_swiglu_plugin": null,
|
128 |
"fp8_rowwise_gemm_plugin": null,
|
129 |
"smooth_quant_gemm_plugin": null,
|
@@ -149,7 +149,7 @@
|
|
149 |
"tokens_per_block": 64,
|
150 |
"use_paged_context_fmha": true,
|
151 |
"use_fp8_context_fmha": false,
|
152 |
-
"multiple_profiles":
|
153 |
"paged_state": false,
|
154 |
"streamingllm": false,
|
155 |
"manage_weights": false,
|
|
|
63 |
"max_input_len": 1024,
|
64 |
"max_seq_len": 1048576,
|
65 |
"opt_batch_size": null,
|
66 |
+
"max_batch_size": 128,
|
67 |
"max_beam_width": 1,
|
68 |
+
"max_num_tokens": 32000,
|
69 |
+
"opt_num_tokens": 128,
|
70 |
"max_prompt_embedding_table_size": 0,
|
71 |
"kv_cache_type": "PAGED",
|
72 |
"gather_context_logits": false,
|
|
|
122 |
"plugin_config": {
|
123 |
"dtype": "float16",
|
124 |
"bert_attention_plugin": "auto",
|
125 |
+
"gpt_attention_plugin": "auto",
|
126 |
+
"gemm_plugin": "auto",
|
127 |
"gemm_swiglu_plugin": null,
|
128 |
"fp8_rowwise_gemm_plugin": null,
|
129 |
"smooth_quant_gemm_plugin": null,
|
|
|
149 |
"tokens_per_block": 64,
|
150 |
"use_paged_context_fmha": true,
|
151 |
"use_fp8_context_fmha": false,
|
152 |
+
"multiple_profiles": false,
|
153 |
"paged_state": false,
|
154 |
"streamingllm": false,
|
155 |
"manage_weights": false,
|
rank0.engine
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9ab2e652421c6f2f5afb545f7d710aa08783d94a93eb5911af856d9b1da6bd3c
|
3 |
+
size 16135219868
|