dfurman commited on
Commit
c8c30fe
1 Parent(s): 82ffd7d

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +53 -11
README.md CHANGED
@@ -35,27 +35,69 @@ parameters:
35
  dtype: bfloat16
36
  ```
37
 
38
- ## 💻 Usage
 
 
 
 
39
 
40
  ```python
41
  !pip install -qU transformers accelerate
42
 
43
- from transformers import AutoTokenizer
44
- import transformers
45
  import torch
46
 
47
  model = "dfurman/HermesBagel-34B-v0.1"
48
- messages = [{"role": "user", "content": "What is a large language model?"}]
49
 
50
  tokenizer = AutoTokenizer.from_pretrained(model)
51
- prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
52
- pipeline = transformers.pipeline(
53
- "text-generation",
54
- model=model,
55
- torch_dtype=torch.float16,
56
  device_map="auto",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  )
 
 
58
 
59
- outputs = pipeline(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
60
- print(outputs[0]["generated_text"])
 
 
 
 
61
  ```
 
35
  dtype: bfloat16
36
  ```
37
 
38
+ ## Basic Usage
39
+
40
+ <details>
41
+
42
+ <summary>Setup</summary>
43
 
44
  ```python
45
  !pip install -qU transformers accelerate
46
 
47
+ from transformers import AutoTokenizer, AutoModelForCausalLM
 
48
  import torch
49
 
50
  model = "dfurman/HermesBagel-34B-v0.1"
 
51
 
52
  tokenizer = AutoTokenizer.from_pretrained(model)
53
+
54
+ model = AutoModelForCausalLM.from_pretrained(
55
+ model,
56
+ torch_dtype=torch.bfloat16,
 
57
  device_map="auto",
58
+ trust_remote_code=True,
59
+ )
60
+ ```
61
+
62
+ </details>
63
+
64
+
65
+ ```python
66
+ messages = [
67
+ {"role": "user", "content": "What is a large language model?"},
68
+ ]
69
+
70
+ print("\n\n*** Prompt:")
71
+ input_ids = tokenizer.apply_chat_template(
72
+ messages,
73
+ tokenize=True,
74
+ return_tensors="pt",
75
+ )
76
+ print(tokenizer.decode(input_ids[0]))
77
+
78
+ print("\n\n*** Generate:")
79
+ with torch.autocast("cuda", dtype=torch.bfloat16):
80
+ output = model.generate(
81
+ input_ids=input_ids.to("cuda"),
82
+ max_new_tokens=256,
83
+ return_dict_in_generate=True,
84
+ do_sample=True,
85
+ temperature=0.7,
86
+ top_k=50,
87
+ top_p=0.95
88
+ )
89
+
90
+ response = tokenizer.decode(
91
+ output["sequences"][0][len(input_ids[0]):],
92
+ skip_special_tokens=True
93
  )
94
+ print(response)
95
+ ```
96
 
97
+ **Outputs**
98
+
99
+ ```python
100
+ """
101
+ coming
102
+ """
103
  ```