AndrewZeng commited on
Commit
eb75edf
1 Parent(s): fa5a3c6

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +30 -27
README.md CHANGED
@@ -2,35 +2,30 @@
2
  license: apache-2.0
3
  ---
4
 
5
- ## Usage Code
 
 
 
 
6
 
7
  ```python
8
- import torch
9
  from transformers import AutoTokenizer, AutoModelForCausalLM
10
  import numpy as np
11
  from scipy.special import softmax
12
- # 选择模型和模型名称(例如,这里使用GPT-2模型)
13
  model_name = "hkust-nlp/Deita-Complexity-Scorer"
14
  tokenizer = AutoTokenizer.from_pretrained(model_name)
15
  model = AutoModelForCausalLM.from_pretrained(model_name)
16
 
17
 
18
- complexity_template = ("You are a helpful assistant. Please identify the complexity score of the following user query. \n##Query: {instruction} \n##Complexity: ")
19
- # 输入文本
20
- input_text = "write a performance review for a junior data scientist"
21
-
22
-
23
- user_input = complexity_template.format(instruction=input_text)
24
-
25
- # 将输入文本编码为tokens
26
- input_ids = tokenizer.encode(user_input, return_tensors="pt")
27
-
28
- # 生成文本
29
- max_length = 512 # 设置生成文本的最大长度
30
- outputs = model.generate(input_ids, max_length=512, num_return_sequences=1, return_dict_in_generate=True, output_scores=True)
31
- logprobs_list = outputs.scores[0][0]
32
- score_logits = []
33
- id2score = {
34
  29896: "1",
35
  29906: "2",
36
  29941: "3",
@@ -38,12 +33,20 @@ id2score = {
38
  29945: "5",
39
  29953: "6"
40
  }
41
- score_template = np.array([1,2,3,4,5,6])
42
- for k in id2score:
43
- score_logits.append(logprobs_list[k])
44
- score_logits = np.array(score_logits)
45
- score_npy = softmax(score_logits, axis=0)
46
- score_npy = score_npy * score_template
47
-
48
- score_npy = np.sum(score_npy, axis=0)
 
 
 
 
 
 
 
 
49
  ```
 
2
  license: apache-2.0
3
  ---
4
 
5
+ # Deita-Quality-Scorer
6
+
7
+ Deita-Quality-Scorer is a tool for automatically annotating the Instruction Complexity of SFT data.
8
+
9
+ ## Uses
10
 
11
  ```python
 
12
  from transformers import AutoTokenizer, AutoModelForCausalLM
13
  import numpy as np
14
  from scipy.special import softmax
 
15
  model_name = "hkust-nlp/Deita-Complexity-Scorer"
16
  tokenizer = AutoTokenizer.from_pretrained(model_name)
17
  model = AutoModelForCausalLM.from_pretrained(model_name)
18
 
19
 
20
+ def infer_complexity(model, tokenizer, input_text):
21
+ complexity_template = ("You are a helpful assistant. Please identify the complexity score of the following user query. \n##Query: {instruction} \n##Complexity: ")
22
+ user_input = complexity_template.format(instruction=input_text)
23
+ input_ids = tokenizer.encode(user_input, return_tensors="pt")
24
+ max_length = 512
25
+ outputs = model.generate(input_ids, max_length=512, num_return_sequences=1, return_dict_in_generate=True, output_scores=True)
26
+ logprobs_list = outputs.scores[0][0]
27
+ score_logits = []
28
+ id2score = {
 
 
 
 
 
 
 
29
  29896: "1",
30
  29906: "2",
31
  29941: "3",
 
33
  29945: "5",
34
  29953: "6"
35
  }
36
+ score_template = np.array([1,2,3,4,5,6])
37
+ for k in id2score:
38
+ score_logits.append(logprobs_list[k])
39
+ score_logits = np.array(score_logits)
40
+ score_npy = softmax(score_logits, axis=0)
41
+ score_npy = score_npy * score_template
42
+
43
+ score_npy = np.sum(score_npy, axis=0)
44
+ return score_npy
45
+
46
+ input_text = "write a performance review for a junior data scientist"
47
+ complexity_score = infer_complexity(model, tokenizer, input_text)
48
+
49
+ print(complexity_score)
50
+
51
+
52
  ```