joostinyi-baseten commited on
Commit
e75da24
·
verified ·
1 Parent(s): 76901ac

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. config.json +6 -6
  2. rank0.engine +2 -2
config.json CHANGED
@@ -63,10 +63,10 @@
63
  "max_input_len": 1024,
64
  "max_seq_len": 1048576,
65
  "opt_batch_size": null,
66
- "max_batch_size": 256,
67
  "max_beam_width": 1,
68
- "max_num_tokens": 4096,
69
- "opt_num_tokens": null,
70
  "max_prompt_embedding_table_size": 0,
71
  "kv_cache_type": "PAGED",
72
  "gather_context_logits": false,
@@ -122,8 +122,8 @@
122
  "plugin_config": {
123
  "dtype": "float16",
124
  "bert_attention_plugin": "auto",
125
- "gpt_attention_plugin": "float16",
126
- "gemm_plugin": "float16",
127
  "gemm_swiglu_plugin": null,
128
  "fp8_rowwise_gemm_plugin": null,
129
  "smooth_quant_gemm_plugin": null,
@@ -149,7 +149,7 @@
149
  "tokens_per_block": 64,
150
  "use_paged_context_fmha": true,
151
  "use_fp8_context_fmha": false,
152
- "multiple_profiles": true,
153
  "paged_state": false,
154
  "streamingllm": false,
155
  "manage_weights": false,
 
63
  "max_input_len": 1024,
64
  "max_seq_len": 1048576,
65
  "opt_batch_size": null,
66
+ "max_batch_size": 128,
67
  "max_beam_width": 1,
68
+ "max_num_tokens": 32000,
69
+ "opt_num_tokens": 128,
70
  "max_prompt_embedding_table_size": 0,
71
  "kv_cache_type": "PAGED",
72
  "gather_context_logits": false,
 
122
  "plugin_config": {
123
  "dtype": "float16",
124
  "bert_attention_plugin": "auto",
125
+ "gpt_attention_plugin": "auto",
126
+ "gemm_plugin": "auto",
127
  "gemm_swiglu_plugin": null,
128
  "fp8_rowwise_gemm_plugin": null,
129
  "smooth_quant_gemm_plugin": null,
 
149
  "tokens_per_block": 64,
150
  "use_paged_context_fmha": true,
151
  "use_fp8_context_fmha": false,
152
+ "multiple_profiles": false,
153
  "paged_state": false,
154
  "streamingllm": false,
155
  "manage_weights": false,
rank0.engine CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1cff19f5c52a4e565b8e011c091863a93a8f31dc16f9c738711770b932b4a283
3
- size 16171359220
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ab2e652421c6f2f5afb545f7d710aa08783d94a93eb5911af856d9b1da6bd3c
3
+ size 16135219868