neuralmagic
/

Meta-Llama-3.1-8B-Instruct-quantized.w8a16

@@ -143,7 +143,7 @@ This version of the lm-evaluation-harness includes versions of MMLU, ARC-Challen
    </td>
    <td><strong>Meta-Llama-3.1-8B-Instruct </strong>
    </td>
-   <td><strong>Meta-Llama-3.1-8B-Instruct-quantized.w8a8 (this model)</strong>
    </td>
    <td><strong>Recovery</strong>
    </td>
@@ -350,7 +350,7 @@ lm_eval \
 ```
 lm_eval \
   --model vllm \
-  --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
   --tasks mmlu_pt_llama_3.1_instruct \
   --fewshot_as_multiturn \
   --apply_chat_template \
@@ -362,7 +362,7 @@ lm_eval \
 ```
 lm_eval \
   --model vllm \
-  --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
   --tasks mmlu_es_llama_3.1_instruct \
   --fewshot_as_multiturn \
   --apply_chat_template \
@@ -374,7 +374,7 @@ lm_eval \
 ```
 lm_eval \
   --model vllm \
-  --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
   --tasks mmlu_it_llama_3.1_instruct \
   --fewshot_as_multiturn \
   --apply_chat_template \
@@ -386,7 +386,7 @@ lm_eval \
 ```
 lm_eval \
   --model vllm \
-  --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
   --tasks mmlu_de_llama_3.1_instruct \
   --fewshot_as_multiturn \
   --apply_chat_template \
@@ -398,7 +398,7 @@ lm_eval \
 ```
 lm_eval \
   --model vllm \
-  --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
   --tasks mmlu_fr_llama_3.1_instruct \
   --fewshot_as_multiturn \
   --apply_chat_template \
@@ -410,7 +410,7 @@ lm_eval \
 ```
 lm_eval \
   --model vllm \
-  --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
   --tasks mmlu_hi_llama_3.1_instruct \
   --fewshot_as_multiturn \
   --apply_chat_template \
@@ -422,7 +422,7 @@ lm_eval \
 ```
 lm_eval \
   --model vllm \
-  --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
   --tasks mmlu_th_llama_3.1_instruct \
   --fewshot_as_multiturn \
   --apply_chat_template \

    </td>
    <td><strong>Meta-Llama-3.1-8B-Instruct </strong>
    </td>
+   <td><strong>Meta-Llama-3.1-8B-Instruct-quantized.w8a16 (this model)</strong>
    </td>
    <td><strong>Recovery</strong>
    </td>
 ```
 lm_eval \
   --model vllm \
+  --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
   --tasks mmlu_pt_llama_3.1_instruct \
   --fewshot_as_multiturn \
   --apply_chat_template \
 ```
 lm_eval \
   --model vllm \
+  --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
   --tasks mmlu_es_llama_3.1_instruct \
   --fewshot_as_multiturn \
   --apply_chat_template \
 ```
 lm_eval \
   --model vllm \
+  --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
   --tasks mmlu_it_llama_3.1_instruct \
   --fewshot_as_multiturn \
   --apply_chat_template \
 ```
 lm_eval \
   --model vllm \
+  --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
   --tasks mmlu_de_llama_3.1_instruct \
   --fewshot_as_multiturn \
   --apply_chat_template \
 ```
 lm_eval \
   --model vllm \
+  --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
   --tasks mmlu_fr_llama_3.1_instruct \
   --fewshot_as_multiturn \
   --apply_chat_template \
 ```
 lm_eval \
   --model vllm \
+  --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
   --tasks mmlu_hi_llama_3.1_instruct \
   --fewshot_as_multiturn \
   --apply_chat_template \
 ```
 lm_eval \
   --model vllm \
+  --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
   --tasks mmlu_th_llama_3.1_instruct \
   --fewshot_as_multiturn \
   --apply_chat_template \