alexmarques commited on
Commit
38e03ba
1 Parent(s): 9233f20

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +8 -8
README.md CHANGED
@@ -143,7 +143,7 @@ This version of the lm-evaluation-harness includes versions of MMLU, ARC-Challen
143
  </td>
144
  <td><strong>Meta-Llama-3.1-8B-Instruct </strong>
145
  </td>
146
- <td><strong>Meta-Llama-3.1-8B-Instruct-quantized.w8a8 (this model)</strong>
147
  </td>
148
  <td><strong>Recovery</strong>
149
  </td>
@@ -350,7 +350,7 @@ lm_eval \
350
  ```
351
  lm_eval \
352
  --model vllm \
353
- --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
354
  --tasks mmlu_pt_llama_3.1_instruct \
355
  --fewshot_as_multiturn \
356
  --apply_chat_template \
@@ -362,7 +362,7 @@ lm_eval \
362
  ```
363
  lm_eval \
364
  --model vllm \
365
- --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
366
  --tasks mmlu_es_llama_3.1_instruct \
367
  --fewshot_as_multiturn \
368
  --apply_chat_template \
@@ -374,7 +374,7 @@ lm_eval \
374
  ```
375
  lm_eval \
376
  --model vllm \
377
- --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
378
  --tasks mmlu_it_llama_3.1_instruct \
379
  --fewshot_as_multiturn \
380
  --apply_chat_template \
@@ -386,7 +386,7 @@ lm_eval \
386
  ```
387
  lm_eval \
388
  --model vllm \
389
- --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
390
  --tasks mmlu_de_llama_3.1_instruct \
391
  --fewshot_as_multiturn \
392
  --apply_chat_template \
@@ -398,7 +398,7 @@ lm_eval \
398
  ```
399
  lm_eval \
400
  --model vllm \
401
- --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
402
  --tasks mmlu_fr_llama_3.1_instruct \
403
  --fewshot_as_multiturn \
404
  --apply_chat_template \
@@ -410,7 +410,7 @@ lm_eval \
410
  ```
411
  lm_eval \
412
  --model vllm \
413
- --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
414
  --tasks mmlu_hi_llama_3.1_instruct \
415
  --fewshot_as_multiturn \
416
  --apply_chat_template \
@@ -422,7 +422,7 @@ lm_eval \
422
  ```
423
  lm_eval \
424
  --model vllm \
425
- --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
426
  --tasks mmlu_th_llama_3.1_instruct \
427
  --fewshot_as_multiturn \
428
  --apply_chat_template \
 
143
  </td>
144
  <td><strong>Meta-Llama-3.1-8B-Instruct </strong>
145
  </td>
146
+ <td><strong>Meta-Llama-3.1-8B-Instruct-quantized.w8a16 (this model)</strong>
147
  </td>
148
  <td><strong>Recovery</strong>
149
  </td>
 
350
  ```
351
  lm_eval \
352
  --model vllm \
353
+ --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
354
  --tasks mmlu_pt_llama_3.1_instruct \
355
  --fewshot_as_multiturn \
356
  --apply_chat_template \
 
362
  ```
363
  lm_eval \
364
  --model vllm \
365
+ --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
366
  --tasks mmlu_es_llama_3.1_instruct \
367
  --fewshot_as_multiturn \
368
  --apply_chat_template \
 
374
  ```
375
  lm_eval \
376
  --model vllm \
377
+ --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
378
  --tasks mmlu_it_llama_3.1_instruct \
379
  --fewshot_as_multiturn \
380
  --apply_chat_template \
 
386
  ```
387
  lm_eval \
388
  --model vllm \
389
+ --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
390
  --tasks mmlu_de_llama_3.1_instruct \
391
  --fewshot_as_multiturn \
392
  --apply_chat_template \
 
398
  ```
399
  lm_eval \
400
  --model vllm \
401
+ --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
402
  --tasks mmlu_fr_llama_3.1_instruct \
403
  --fewshot_as_multiturn \
404
  --apply_chat_template \
 
410
  ```
411
  lm_eval \
412
  --model vllm \
413
+ --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
414
  --tasks mmlu_hi_llama_3.1_instruct \
415
  --fewshot_as_multiturn \
416
  --apply_chat_template \
 
422
  ```
423
  lm_eval \
424
  --model vllm \
425
+ --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
426
  --tasks mmlu_th_llama_3.1_instruct \
427
  --fewshot_as_multiturn \
428
  --apply_chat_template \