bowenbaoamd commited on
Commit
870f255
1 Parent(s): d4f9cb6

Update README.md

Browse files

Remove trailing spaces.

Files changed (1) hide show
  1. README.md +4 -4
README.md CHANGED
@@ -18,9 +18,9 @@ license_link: https://github.com/meta-llama/llama-models/blob/main/models/llama3
18
  ```sh
19
  export MODEL_DIR = [local model checkpoint folder] or meta-llama/Meta-Llama-3.1-8B-Instruct
20
  # single GPU
21
- python3 quantize_quark.py \
22
  --model_dir $MODEL_DIR \
23
- --output_dir Meta-Llama-3.1-8B-Instruct-FP8-KV \
24
  --quant_scheme w_fp8_a_fp8 \
25
  --kv_cache_dtype fp8 \
26
  --num_calib_data 128 \
@@ -28,9 +28,9 @@ python3 quantize_quark.py \
28
  --no_weight_matrix_merge
29
 
30
  # If model size is too large for single GPU, please use multi GPU instead.
31
- python3 quantize_quark.py \
32
  --model_dir $MODEL_DIR \
33
- --output_dir Meta-Llama-3.1-8B-Instruct-FP8-KV \
34
  --quant_scheme w_fp8_a_fp8 \
35
  --kv_cache_dtype fp8 \
36
  --num_calib_data 128 \
 
18
  ```sh
19
  export MODEL_DIR = [local model checkpoint folder] or meta-llama/Meta-Llama-3.1-8B-Instruct
20
  # single GPU
21
+ python3 quantize_quark.py \
22
  --model_dir $MODEL_DIR \
23
+ --output_dir Meta-Llama-3.1-8B-Instruct-FP8-KV \
24
  --quant_scheme w_fp8_a_fp8 \
25
  --kv_cache_dtype fp8 \
26
  --num_calib_data 128 \
 
28
  --no_weight_matrix_merge
29
 
30
  # If model size is too large for single GPU, please use multi GPU instead.
31
+ python3 quantize_quark.py \
32
  --model_dir $MODEL_DIR \
33
+ --output_dir Meta-Llama-3.1-8B-Instruct-FP8-KV \
34
  --quant_scheme w_fp8_a_fp8 \
35
  --kv_cache_dtype fp8 \
36
  --num_calib_data 128 \