Xenova HF staff commited on
Commit
c4f9132
1 Parent(s): 3de5e69

Improve code snippet

Browse files
Files changed (1) hide show
  1. README.md +3 -5
README.md CHANGED
@@ -92,7 +92,7 @@ Alternatively, one may want to run that via `AutoAWQ` even though it's built on
92
 
93
  ```python
94
  import torch
95
- from autoawq import AutoAWQForCausalLM
96
  from transformers import AutoModelForCausalLM, AutoTokenizer
97
 
98
  model_id = "hugging-quants/Meta-Llama-3.1-405B-Instruct-AWQ-INT4"
@@ -102,9 +102,6 @@ prompt = [
102
  ]
103
 
104
  tokenizer = AutoTokenizer.from_pretrained(model_id)
105
-
106
- inputs = tokenizer.apply_chat_template(prompt, tokenize=True, add_generation_prompt=True, return_tensors="pt").cuda()
107
-
108
  model = AutoAWQForCausalLM.from_pretrained(
109
  model_id,
110
  torch_dtype=torch.float16,
@@ -113,7 +110,8 @@ model = AutoAWQForCausalLM.from_pretrained(
113
  fuse_layers=True,
114
  )
115
 
116
- outputs = model.generate(inputs, do_sample=True, max_new_tokens=256)
 
117
  print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
118
  ```
119
 
 
92
 
93
  ```python
94
  import torch
95
+ from awq import AutoAWQForCausalLM
96
  from transformers import AutoModelForCausalLM, AutoTokenizer
97
 
98
  model_id = "hugging-quants/Meta-Llama-3.1-405B-Instruct-AWQ-INT4"
 
102
  ]
103
 
104
  tokenizer = AutoTokenizer.from_pretrained(model_id)
 
 
 
105
  model = AutoAWQForCausalLM.from_pretrained(
106
  model_id,
107
  torch_dtype=torch.float16,
 
110
  fuse_layers=True,
111
  )
112
 
113
+ inputs = tokenizer.apply_chat_template(prompt, tokenize=True, add_generation_prompt=True, return_tensors="pt", return_dict=True).to('cuda')
114
+ outputs = model.generate(**inputs, do_sample=True, max_new_tokens=256)
115
  print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
116
  ```
117