Piotr Zalewski commited on
Commit
7c13e43
1 Parent(s): 6f6e06d

copied READMEs be like

Browse files
Files changed (1) hide show
  1. README.md +13 -13
README.md CHANGED
@@ -62,7 +62,7 @@ from transformers import pipeline
62
 
63
  pipe = pipeline(
64
  "text-generation",
65
- model="google/gemma-2-9b-it",
66
  model_kwargs={"torch_dtype": torch.bfloat16},
67
  device="cuda", # replace with "mps" to run on a Mac device
68
  )
@@ -84,9 +84,9 @@ print(assistant_response)
84
  from transformers import AutoTokenizer, AutoModelForCausalLM
85
  import torch
86
 
87
- tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-9b-it")
88
  model = AutoModelForCausalLM.from_pretrained(
89
- "google/gemma-2-9b-it",
90
  device_map="auto",
91
  torch_dtype=torch.bfloat16,
92
  )
@@ -122,9 +122,9 @@ You can also use `float32` if you skip the dtype, but no precision increase will
122
  # pip install accelerate
123
  from transformers import AutoTokenizer, AutoModelForCausalLM
124
 
125
- tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-9b-it")
126
  model = AutoModelForCausalLM.from_pretrained(
127
- "google/gemma-2-9b-it",
128
  device_map="auto",
129
  )
130
 
@@ -142,7 +142,7 @@ for running Gemma 2 through a command line interface, or CLI. Follow the [instal
142
  for getting started, then launch the CLI through the following command:
143
 
144
  ```shell
145
- local-gemma --model 9b --preset speed
146
  ```
147
 
148
  #### Quantized Versions through `bitsandbytes`
@@ -158,9 +158,9 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
158
 
159
  quantization_config = BitsAndBytesConfig(load_in_8bit=True)
160
 
161
- tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-9b-it")
162
  model = AutoModelForCausalLM.from_pretrained(
163
- "google/gemma-2-9b-it",
164
  quantization_config=quantization_config,
165
  )
166
 
@@ -183,9 +183,9 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
183
 
184
  quantization_config = BitsAndBytesConfig(load_in_4bit=True)
185
 
186
- tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-9b-it")
187
  model = AutoModelForCausalLM.from_pretrained(
188
- "google/gemma-2-9b-it",
189
  quantization_config=quantization_config,
190
  )
191
 
@@ -220,8 +220,8 @@ import torch
220
  torch.set_float32_matmul_precision("high")
221
 
222
  # load the model + tokenizer
223
- tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-9b-it")
224
- model = Gemma2ForCausalLM.from_pretrained("google/gemma-2-9b-it", torch_dtype=torch.bfloat16)
225
  model.to("cuda")
226
 
227
  # apply the torch compile transformation
@@ -271,7 +271,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
271
  import transformers
272
  import torch
273
 
274
- model_id = "google/gemma-2-9b-it"
275
  dtype = torch.bfloat16
276
 
277
  tokenizer = AutoTokenizer.from_pretrained(model_id)
 
62
 
63
  pipe = pipeline(
64
  "text-generation",
65
+ model="google/gemma-2-2b-it",
66
  model_kwargs={"torch_dtype": torch.bfloat16},
67
  device="cuda", # replace with "mps" to run on a Mac device
68
  )
 
84
  from transformers import AutoTokenizer, AutoModelForCausalLM
85
  import torch
86
 
87
+ tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-2b-it")
88
  model = AutoModelForCausalLM.from_pretrained(
89
+ "google/gemma-2-2b-it",
90
  device_map="auto",
91
  torch_dtype=torch.bfloat16,
92
  )
 
122
  # pip install accelerate
123
  from transformers import AutoTokenizer, AutoModelForCausalLM
124
 
125
+ tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-2b-it")
126
  model = AutoModelForCausalLM.from_pretrained(
127
+ "google/gemma-2-2b-it",
128
  device_map="auto",
129
  )
130
 
 
142
  for getting started, then launch the CLI through the following command:
143
 
144
  ```shell
145
+ local-gemma --model 2b --preset speed
146
  ```
147
 
148
  #### Quantized Versions through `bitsandbytes`
 
158
 
159
  quantization_config = BitsAndBytesConfig(load_in_8bit=True)
160
 
161
+ tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-2b-it")
162
  model = AutoModelForCausalLM.from_pretrained(
163
+ "google/gemma-2-2b-it",
164
  quantization_config=quantization_config,
165
  )
166
 
 
183
 
184
  quantization_config = BitsAndBytesConfig(load_in_4bit=True)
185
 
186
+ tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-2b-it")
187
  model = AutoModelForCausalLM.from_pretrained(
188
+ "google/gemma-2-2b-it",
189
  quantization_config=quantization_config,
190
  )
191
 
 
220
  torch.set_float32_matmul_precision("high")
221
 
222
  # load the model + tokenizer
223
+ tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-2b-it")
224
+ model = Gemma2ForCausalLM.from_pretrained("google/gemma-2-2b-it", torch_dtype=torch.bfloat16)
225
  model.to("cuda")
226
 
227
  # apply the torch compile transformation
 
271
  import transformers
272
  import torch
273
 
274
+ model_id = "google/gemma-2-2b-it"
275
  dtype = torch.bfloat16
276
 
277
  tokenizer = AutoTokenizer.from_pretrained(model_id)