shibing624 commited on
Commit
fa56d50
1 Parent(s): f2a5f61

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +5 -4
README.md CHANGED
@@ -114,11 +114,11 @@ from peft import PeftModel
114
  from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
115
 
116
 
117
- model = AutoModelForCausalLM.from_pretrained("baichuan-inc/Baichuan-13B-Chat", device_map='auto', trust_remote_code=True)
118
  model.generation_config = GenerationConfig.from_pretrained("baichuan-inc/Baichuan-13B-Chat", trust_remote_code=True)
119
  tokenizer = AutoTokenizer.from_pretrained("baichuan-inc/Baichuan-13B-Chat", trust_remote_code=True)
120
  model = PeftModel.from_pretrained(model, "shibing624/vicuna-baichuan-13b-chat-lora")
121
- device = "cuda" if torch.cuda.is_available() else "cpu"
122
 
123
  def generate_prompt(instruction):
124
  return f"""A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.USER: {instruction} ASSISTANT: """
@@ -128,11 +128,11 @@ sents = ['一岁宝宝发烧能吃啥药', "who are you?"]
128
  for s in sents:
129
  q = generate_prompt(s)
130
  inputs = tokenizer(q, return_tensors="pt")
131
- inputs = inputs.to(device=device)
132
 
133
  generate_ids = model.generate(
134
  **inputs,
135
- max_new_tokens=120,
136
  )
137
 
138
  output = tokenizer.batch_decode(generate_ids, skip_special_tokens=True)[0]
@@ -154,6 +154,7 @@ vicuna-baichuan-13b-chat-lora
154
  └── adapter_model.bin
155
  ```
156
 
 
157
 
158
  ### Inference Examples
159
 
 
114
  from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
115
 
116
 
117
+ model = AutoModelForCausalLM.from_pretrained("baichuan-inc/Baichuan-13B-Chat", device_map='auto', torch_dtype=torch.float16, trust_remote_code=True)
118
  model.generation_config = GenerationConfig.from_pretrained("baichuan-inc/Baichuan-13B-Chat", trust_remote_code=True)
119
  tokenizer = AutoTokenizer.from_pretrained("baichuan-inc/Baichuan-13B-Chat", trust_remote_code=True)
120
  model = PeftModel.from_pretrained(model, "shibing624/vicuna-baichuan-13b-chat-lora")
121
+ device = torch.device(0) if torch.cuda.is_available() else torch.device("cpu")
122
 
123
  def generate_prompt(instruction):
124
  return f"""A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.USER: {instruction} ASSISTANT: """
 
128
  for s in sents:
129
  q = generate_prompt(s)
130
  inputs = tokenizer(q, return_tensors="pt")
131
+ inputs = inputs.to(device)
132
 
133
  generate_ids = model.generate(
134
  **inputs,
135
+ max_new_tokens=512,
136
  )
137
 
138
  output = tokenizer.batch_decode(generate_ids, skip_special_tokens=True)[0]
 
154
  └── adapter_model.bin
155
  ```
156
 
157
+ - Inference GPU: 27G
158
 
159
  ### Inference Examples
160