Piotr Zalewski
commited on
Commit
•
97c1241
1
Parent(s):
3438e81
copied READMEs be like
Browse files
README.md
CHANGED
@@ -62,7 +62,7 @@ from transformers import pipeline
|
|
62 |
|
63 |
pipe = pipeline(
|
64 |
"text-generation",
|
65 |
-
model="google/gemma-2-
|
66 |
model_kwargs={"torch_dtype": torch.bfloat16},
|
67 |
device="cuda", # replace with "mps" to run on a Mac device
|
68 |
)
|
@@ -84,9 +84,9 @@ print(assistant_response)
|
|
84 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
85 |
import torch
|
86 |
|
87 |
-
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-
|
88 |
model = AutoModelForCausalLM.from_pretrained(
|
89 |
-
"google/gemma-2-
|
90 |
device_map="auto",
|
91 |
torch_dtype=torch.bfloat16,
|
92 |
)
|
@@ -122,9 +122,9 @@ You can also use `float32` if you skip the dtype, but no precision increase will
|
|
122 |
# pip install accelerate
|
123 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
124 |
|
125 |
-
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-
|
126 |
model = AutoModelForCausalLM.from_pretrained(
|
127 |
-
"google/gemma-2-
|
128 |
device_map="auto",
|
129 |
)
|
130 |
|
@@ -142,7 +142,7 @@ for running Gemma 2 through a command line interface, or CLI. Follow the [instal
|
|
142 |
for getting started, then launch the CLI through the following command:
|
143 |
|
144 |
```shell
|
145 |
-
local-gemma --model
|
146 |
```
|
147 |
|
148 |
#### Quantized Versions through `bitsandbytes`
|
@@ -158,9 +158,9 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
|
158 |
|
159 |
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
|
160 |
|
161 |
-
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-
|
162 |
model = AutoModelForCausalLM.from_pretrained(
|
163 |
-
"google/gemma-2-
|
164 |
quantization_config=quantization_config,
|
165 |
)
|
166 |
|
@@ -183,9 +183,9 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
|
183 |
|
184 |
quantization_config = BitsAndBytesConfig(load_in_4bit=True)
|
185 |
|
186 |
-
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-
|
187 |
model = AutoModelForCausalLM.from_pretrained(
|
188 |
-
"google/gemma-2-
|
189 |
quantization_config=quantization_config,
|
190 |
)
|
191 |
|
@@ -220,8 +220,8 @@ import torch
|
|
220 |
torch.set_float32_matmul_precision("high")
|
221 |
|
222 |
# load the model + tokenizer
|
223 |
-
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-
|
224 |
-
model = Gemma2ForCausalLM.from_pretrained("google/gemma-2-
|
225 |
model.to("cuda")
|
226 |
|
227 |
# apply the torch compile transformation
|
@@ -271,7 +271,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
271 |
import transformers
|
272 |
import torch
|
273 |
|
274 |
-
model_id = "google/gemma-2-
|
275 |
dtype = torch.bfloat16
|
276 |
|
277 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
|
|
62 |
|
63 |
pipe = pipeline(
|
64 |
"text-generation",
|
65 |
+
model="google/gemma-2-27b-it",
|
66 |
model_kwargs={"torch_dtype": torch.bfloat16},
|
67 |
device="cuda", # replace with "mps" to run on a Mac device
|
68 |
)
|
|
|
84 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
85 |
import torch
|
86 |
|
87 |
+
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-27b-it")
|
88 |
model = AutoModelForCausalLM.from_pretrained(
|
89 |
+
"google/gemma-2-27b-it",
|
90 |
device_map="auto",
|
91 |
torch_dtype=torch.bfloat16,
|
92 |
)
|
|
|
122 |
# pip install accelerate
|
123 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
124 |
|
125 |
+
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-27b-it")
|
126 |
model = AutoModelForCausalLM.from_pretrained(
|
127 |
+
"google/gemma-2-27b-it",
|
128 |
device_map="auto",
|
129 |
)
|
130 |
|
|
|
142 |
for getting started, then launch the CLI through the following command:
|
143 |
|
144 |
```shell
|
145 |
+
local-gemma --model 27b --preset speed
|
146 |
```
|
147 |
|
148 |
#### Quantized Versions through `bitsandbytes`
|
|
|
158 |
|
159 |
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
|
160 |
|
161 |
+
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-27b-it")
|
162 |
model = AutoModelForCausalLM.from_pretrained(
|
163 |
+
"google/gemma-2-27b-it",
|
164 |
quantization_config=quantization_config,
|
165 |
)
|
166 |
|
|
|
183 |
|
184 |
quantization_config = BitsAndBytesConfig(load_in_4bit=True)
|
185 |
|
186 |
+
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-27b-it")
|
187 |
model = AutoModelForCausalLM.from_pretrained(
|
188 |
+
"google/gemma-2-27b-it",
|
189 |
quantization_config=quantization_config,
|
190 |
)
|
191 |
|
|
|
220 |
torch.set_float32_matmul_precision("high")
|
221 |
|
222 |
# load the model + tokenizer
|
223 |
+
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-27b-it")
|
224 |
+
model = Gemma2ForCausalLM.from_pretrained("google/gemma-2-27b-it", torch_dtype=torch.bfloat16)
|
225 |
model.to("cuda")
|
226 |
|
227 |
# apply the torch compile transformation
|
|
|
271 |
import transformers
|
272 |
import torch
|
273 |
|
274 |
+
model_id = "google/gemma-2-27b-it"
|
275 |
dtype = torch.bfloat16
|
276 |
|
277 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|