Crystalcareai commited on
Commit
f0fff66
1 Parent(s): bde282e

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +14 -8
README.md CHANGED
@@ -1,36 +1,42 @@
1
- ```import torch
 
 
2
  from transformers import AutoTokenizer, TextStreamer, AutoModelForCausalLM
3
 
4
  model_path = "Crystalcareai/GemMoE-Medium-v0.5"
5
 
6
  # Load model
 
7
  model = AutoModelForCausalLM.from_pretrained(
8
  model_path,
9
  device_map="auto",
10
  low_cpu_mem_usage=True,
11
  torch_dtype=torch.float16,
12
- attn_implementation="flash_attention_2"
13
  trust_remote_code=True,
14
  )
15
 
16
  tokenizer = AutoTokenizer.from_pretrained(model_path)
 
17
  streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
18
 
19
  # Convert prompt to tokens
20
- prompt_template = "[INST] {prompt} [/INST]"
21
 
 
22
  prompt = "You're standing on the surface of the Earth. "\
23
- "You walk one mile south, one mile west and one mile north. "\
24
- "You end up exactly where you started. Where are you?"
25
 
26
  tokens = tokenizer(
27
- prompt_template.format(prompt=prompt),
28
  return_tensors='pt'
29
  ).input_ids.cuda()
30
 
31
  # Generate output
 
32
  generation_output = model.generate(
33
- tokens,
34
  streamer=streamer,
35
  max_new_tokens=512
36
- )```
 
 
1
+ ```python
2
+ import torch
3
+
4
  from transformers import AutoTokenizer, TextStreamer, AutoModelForCausalLM
5
 
6
  model_path = "Crystalcareai/GemMoE-Medium-v0.5"
7
 
8
  # Load model
9
+
10
  model = AutoModelForCausalLM.from_pretrained(
11
  model_path,
12
  device_map="auto",
13
  low_cpu_mem_usage=True,
14
  torch_dtype=torch.float16,
15
+ attn_implementation="flash_attention_2",
16
  trust_remote_code=True,
17
  )
18
 
19
  tokenizer = AutoTokenizer.from_pretrained(model_path)
20
+
21
  streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
22
 
23
  # Convert prompt to tokens
 
24
 
25
+ prompt_template = "[INST] {prompt} [/INST]"
26
  prompt = "You're standing on the surface of the Earth. "\
27
+ "You walk one mile south, one mile west and one mile north. "\
28
+ "You end up exactly where you started. Where are you?"
29
 
30
  tokens = tokenizer(
31
+ prompt_template.format(prompt=prompt),
32
  return_tensors='pt'
33
  ).input_ids.cuda()
34
 
35
  # Generate output
36
+
37
  generation_output = model.generate(
38
+ tokens,
39
  streamer=streamer,
40
  max_new_tokens=512
41
+ )
42
+ ```