Safetensors
qwen2
ZiyiYe commited on
Commit
0eb531e
1 Parent(s): 5764b6d

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +44 -0
README.md ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ datasets:
4
+ - Skywork/Skywork-Reward-Preference-80K-v0.1
5
+ base_model:
6
+ - Qwen/Qwen2-7B-Instruct
7
+ ---
8
+ ```python
9
+ import torch
10
+ from transformers import AutoModelForCausalLM, AutoTokenizer
11
+
12
+ model_name = "/cpfs/29f69eb5e2e60f26/user/rlhf/yzy/OpenRLHF-new/outputs/inference/zeroshot_llm_merge/dpo/model/dpo_critic_trained_merge/checkpoints/Con-J-Qwen2-7B"
13
+ model = AutoModelForCausalLM.from_pretrained(model_name, device_map='auto', trust_remote_code=True)
14
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
15
+
16
+ question = "What is the range of the numeric output of a sigmoid node in a neural network?"
17
+ answer1 = "The output of a sigmoid node is bounded between -1 and 1."
18
+ answer2 = "The output of a sigmoid node is bounded between 0 and 1."
19
+
20
+ # Format and tokenize the conversations
21
+ CON_J_PROMPT = """作为一个评价专家,给定一个问题和它的两个可能的回答,请选出哪一个回答在连贯性、准确性、覆盖度和上述定义的整体质量方面最为符合。请用JSON格式输出你的判断, 其中"原因"是你提供的解释,"更好的回答"是整数类型的1或2,例如{{"原因": "你的解释", "更好的回答": 1}}。以下是问题和候选回答的内容:
22
+ \n问题:{instruction}
23
+ 回答1:{output_1}
24
+ 回答2:{output_2}"""
25
+ user_prompt = CON_J_PROMPT.format(instruction=question, output_1=answer1, output_2=answer2)
26
+ system_prompt = ""
27
+ messages = [
28
+ {"role": "system", "content": system_prompt,},
29
+ {"role": "user", "content": user_prompt},
30
+ ]
31
+ prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
32
+ prompt = tokenizer([prompt], return_tensors="pt")
33
+
34
+ # Generate judgment for the given prompt
35
+ with torch.no_grad():
36
+ generated_ids = model.generate(prompt.input_ids, do_sample=False, max_new_tokens=2048,)
37
+ generated_ids = [
38
+ output_ids[len(input_ids):] for input_ids, output_ids in zip(prompt.input_ids, generated_ids)
39
+ ]
40
+ response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
41
+
42
+ # response: {"原因": "回答1中的-1是错误的,因为sigmoid函数的实际输出范围是0到1,而不是包括-1。回答2准确地描述了sigmoid函数的输出范围是0到1。",\n "更好的回答": 2}
43
+
44
+ ```