alvarobartt HF staff commited on
Commit
a9df6d8
·
verified ·
1 Parent(s): ecb75e2

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +16 -20
README.md CHANGED
@@ -49,13 +49,18 @@ import torch
49
  from transformers import AutoModelForCausalLM, AutoTokenizer
50
 
51
  model_id = "hugging-quants/Meta-Llama-3.1-70B-Instruct-GPTQ-INT4"
 
 
 
 
 
 
 
 
52
  prompt = [
53
  {"role": "system", "content": "You are a helpful assistant, that responds as a pirate."},
54
  {"role": "user", "content": "What's Deep Learning?"},
55
  ]
56
-
57
- tokenizer = AutoTokenizer.from_pretrained(model_id)
58
-
59
  inputs = tokenizer.apply_chat_template(
60
  prompt,
61
  tokenize=True,
@@ -64,13 +69,6 @@ inputs = tokenizer.apply_chat_template(
64
  return_dict=True,
65
  ).to("cuda")
66
 
67
- model = AutoModelForCausalLM.from_pretrained(
68
- model_id,
69
- torch_dtype=torch.float16,
70
- low_cpu_mem_usage=True,
71
- device_map="auto",
72
- )
73
-
74
  outputs = model.generate(**inputs, do_sample=True, max_new_tokens=256)
75
  print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
76
  ```
@@ -92,13 +90,18 @@ from auto_gptq import AutoGPTQForCausalLM
92
  from transformers import AutoModelForCausalLM, AutoTokenizer
93
 
94
  model_id = "hugging-quants/Meta-Llama-3.1-70B-Instruct-GPTQ-INT4"
 
 
 
 
 
 
 
 
95
  prompt = [
96
  {"role": "system", "content": "You are a helpful assistant, that responds as a pirate."},
97
  {"role": "user", "content": "What's Deep Learning?"},
98
  ]
99
-
100
- tokenizer = AutoTokenizer.from_pretrained(model_id)
101
-
102
  inputs = tokenizer.apply_chat_template(
103
  prompt,
104
  tokenize=True,
@@ -107,13 +110,6 @@ inputs = tokenizer.apply_chat_template(
107
  return_dict=True,
108
  ).to("cuda")
109
 
110
- model = AutoGPTQForCausalLM.from_pretrained(
111
- model_id,
112
- torch_dtype=torch.float16,
113
- low_cpu_mem_usage=True,
114
- device_map="auto",
115
- )
116
-
117
  outputs = model.generate(**inputs, do_sample=True, max_new_tokens=256)
118
  print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
119
  ```
 
49
  from transformers import AutoModelForCausalLM, AutoTokenizer
50
 
51
  model_id = "hugging-quants/Meta-Llama-3.1-70B-Instruct-GPTQ-INT4"
52
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
53
+ model = AutoModelForCausalLM.from_pretrained(
54
+ model_id,
55
+ torch_dtype=torch.float16,
56
+ low_cpu_mem_usage=True,
57
+ device_map="auto",
58
+ )
59
+
60
  prompt = [
61
  {"role": "system", "content": "You are a helpful assistant, that responds as a pirate."},
62
  {"role": "user", "content": "What's Deep Learning?"},
63
  ]
 
 
 
64
  inputs = tokenizer.apply_chat_template(
65
  prompt,
66
  tokenize=True,
 
69
  return_dict=True,
70
  ).to("cuda")
71
 
 
 
 
 
 
 
 
72
  outputs = model.generate(**inputs, do_sample=True, max_new_tokens=256)
73
  print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
74
  ```
 
90
  from transformers import AutoModelForCausalLM, AutoTokenizer
91
 
92
  model_id = "hugging-quants/Meta-Llama-3.1-70B-Instruct-GPTQ-INT4"
93
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
94
+ model = AutoGPTQForCausalLM.from_pretrained(
95
+ model_id,
96
+ torch_dtype=torch.float16,
97
+ low_cpu_mem_usage=True,
98
+ device_map="auto",
99
+ )
100
+
101
  prompt = [
102
  {"role": "system", "content": "You are a helpful assistant, that responds as a pirate."},
103
  {"role": "user", "content": "What's Deep Learning?"},
104
  ]
 
 
 
105
  inputs = tokenizer.apply_chat_template(
106
  prompt,
107
  tokenize=True,
 
110
  return_dict=True,
111
  ).to("cuda")
112
 
 
 
 
 
 
 
 
113
  outputs = model.generate(**inputs, do_sample=True, max_new_tokens=256)
114
  print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
115
  ```