shenzhi-wang commited on
Commit
69437f3
·
verified ·
1 Parent(s): 41d6e26

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +22 -19
README.md CHANGED
@@ -138,32 +138,35 @@ deepspeed --num_gpus 8 src/train_bash.py \
138
  # 2. Usage
139
 
140
  ```python
141
- from transformers import AutoTokenizer, AutoModelForCausalLM
142
 
143
- model_id = "shenzhi-wang/Llama3-8B-Chinese-Chat"
144
-
145
- tokenizer = AutoTokenizer.from_pretrained(model_id)
146
- model = AutoModelForCausalLM.from_pretrained(
147
- model_id, torch_dtype="auto", device_map="auto"
148
  )
149
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  messages = [
 
 
 
 
151
  {"role": "user", "content": "写一首诗吧"},
152
  ]
153
 
154
- input_ids = tokenizer.apply_chat_template(
155
- messages, add_generation_prompt=True, return_tensors="pt"
156
- ).to(model.device)
157
-
158
- outputs = model.generate(
159
- input_ids,
160
- max_new_tokens=8192,
161
- do_sample=True,
162
- temperature=0.6,
163
- top_p=0.9,
164
- )
165
- response = outputs[0][input_ids.shape[-1]:]
166
- print(tokenizer.decode(response, skip_special_tokens=True))
167
  ```
168
 
169
  # 3. Examples
 
138
  # 2. Usage
139
 
140
  ```python
141
+ from llama_cpp import Llama
142
 
143
+ model = Llama(
144
+ "/Your/Path/To/GGUF/File",
145
+ verbose=False,
146
+ n_gpu_layers=-1,
 
147
  )
148
 
149
+ system_prompt = "You are a helpful assistant."
150
+
151
+ def generate_reponse(_model, _messages, _max_tokens=8192):
152
+ _output = _model.create_chat_completion(
153
+ _messages,
154
+ stop=["<|eot_id|>", "<|end_of_text|>"],
155
+ max_tokens=_max_tokens,
156
+ )["choices"][0]["message"]["content"]
157
+ return _output
158
+
159
+ # The following are some examples
160
+
161
  messages = [
162
+ {
163
+ "role": "system",
164
+ "content": system_prompt,
165
+ },
166
  {"role": "user", "content": "写一首诗吧"},
167
  ]
168
 
169
+ print(generate_reponse(model, messages))
 
 
 
 
 
 
 
 
 
 
 
 
170
  ```
171
 
172
  # 3. Examples