update : zeroclue
Browse files- README.md +87 -0
- pytorch_model.bin +1 -1
README.md
CHANGED
@@ -107,6 +107,93 @@ example_dict={
|
|
107 |
|
108 |
"生成式摘要":{"text_a":"针对传统的流量分类管理系统存在不稳定、结果反馈不及时、分类结果显示不直观等问题,设计一个基于web的在线的流量分类管理系统.该系统采用流中前5个包(排除3次握手包)所含信息作为特征值计算资源,集成一种或多种分类算法用于在线网络流量分类,应用数据可视化技术处理分类结果.实验表明:在采用适应在线分类的特征集和c4.5决策树算法做分类时,系统能快速做出分类,且精度达到94%以上;数据可视化有助于人机交互,改善分类指导."}
|
109 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
```
|
111 |
|
112 |
## 预训练或微调 prtrain or finetune
|
|
|
107 |
|
108 |
"生成式摘要":{"text_a":"针对传统的流量分类管理系统存在不稳定、结果反馈不及时、分类结果显示不直观等问题,设计一个基于web的在线的流量分类管理系统.该系统采用流中前5个包(排除3次握手包)所含信息作为特征值计算资源,集成一种或多种分类算法用于在线网络流量分类,应用数据可视化技术处理分类结果.实验表明:在采用适应在线分类的特征集和c4.5决策树算法做分类时,系统能快速做出分类,且精度达到94%以上;数据可视化有助于人机交互,改善分类指导."}
|
109 |
}
|
110 |
+
|
111 |
+
|
112 |
+
# 构造prompt的过程中,verbalizer这个占位key的内容,是通过 "/".join(choices) 拼接起来
|
113 |
+
dataset2instruction = {
|
114 |
+
"情感分析": {
|
115 |
+
"prompt": "{}任务:【{}】这篇文章的情感态度是什么?{}",
|
116 |
+
"keys_order": ["subtask_type","text_a", "verbalizer"],
|
117 |
+
"data_type": "classification",
|
118 |
+
},
|
119 |
+
"文本分类": {
|
120 |
+
"prompt": "{}任务:【{}】这篇文章的类别是什么?{}",
|
121 |
+
"keys_order": ["subtask_type","text_a", "verbalizer"],
|
122 |
+
"data_type": "classification",
|
123 |
+
},
|
124 |
+
"新闻分类": {
|
125 |
+
"prompt": "{}任务:【{}】这篇文章的类别是什么?{}",
|
126 |
+
"keys_order": ["subtask_type","text_a", "verbalizer"],
|
127 |
+
"data_type": "classification",
|
128 |
+
},
|
129 |
+
"意图识别": {
|
130 |
+
"prompt": "{}任务:【{}】这句话的意图是什么?{}",
|
131 |
+
"keys_order": ["subtask_type","text_a", "verbalizer"],
|
132 |
+
"data_type": "classification",
|
133 |
+
},
|
134 |
+
# --------------------
|
135 |
+
"自然语言推理": {
|
136 |
+
"prompt": "{}任务:【{}】和【{}】,以上两句话的逻辑关系是什么?{}",
|
137 |
+
"keys_order": ["subtask_type","text_a", "text_b", "verbalizer"],
|
138 |
+
"data_type": "classification",
|
139 |
+
},
|
140 |
+
"语义匹配": {
|
141 |
+
"prompt": "{}任务:【{}】和【{}】,以上两句话的内容是否相似?{}",
|
142 |
+
"keys_order": ["subtask_type","text_a", "text_b", "verbalizer"],
|
143 |
+
"data_type": "classification",
|
144 |
+
},
|
145 |
+
# -----------------------
|
146 |
+
"指代消解": {
|
147 |
+
"prompt": "{}任务:文章【{}】中{}{}",
|
148 |
+
"keys_order": ["subtask_type","text_a", "question", "verbalizer"],
|
149 |
+
"data_type": "classification",
|
150 |
+
},
|
151 |
+
"多项选择": {
|
152 |
+
"prompt": "{}任务:阅读文章【{}】问题【{}】?{}",
|
153 |
+
"keys_order": ["subtask_type","text_a", "question", "verbalizer"],
|
154 |
+
"data_type": "classification",
|
155 |
+
},
|
156 |
+
# ------------------------
|
157 |
+
"抽取式阅读理解": {
|
158 |
+
"prompt": "{}任务:阅读文章【{}】问题【{}】的答案是什么?",
|
159 |
+
"keys_order": ["subtask_type","text_a", "question"],
|
160 |
+
"data_type": "mrc",
|
161 |
+
},
|
162 |
+
"实体识别": {
|
163 |
+
"prompt": "{}任务:找出【{}】这篇文章中所有【{}】类型的实体?",
|
164 |
+
"keys_order": ["subtask_type","text_a", "question"],
|
165 |
+
"data_type": "ner",
|
166 |
+
},
|
167 |
+
# ------------------------
|
168 |
+
"关键词抽取": {
|
169 |
+
"prompt": "{}任务:【{}】这篇文章的关键词是什么?",
|
170 |
+
"keys_order": ["subtask_type","text_a"],
|
171 |
+
"data_type": "keys",
|
172 |
+
},
|
173 |
+
"关键词识别":{
|
174 |
+
"prompt": "{}任务:阅读文章【{}】问题【{}】{}",
|
175 |
+
"keys_order": ["subtask_type","text_a","question","verbalizer"],
|
176 |
+
"data_type": "classification",
|
177 |
+
},
|
178 |
+
"生成式摘要": {
|
179 |
+
"prompt": "{}任务:【{}】这篇文章的摘要是什么?",
|
180 |
+
"keys_order": ["subtask_type","text_a"],
|
181 |
+
"data_type": "summ",
|
182 |
+
},
|
183 |
+
}
|
184 |
+
|
185 |
+
def get_instruction(sample):
|
186 |
+
|
187 |
+
template = dataset2instruction[sample["subtask_type"]]
|
188 |
+
# print(template)
|
189 |
+
# print(sample)
|
190 |
+
sample["instruction"] = template["prompt"].format(*[
|
191 |
+
sample[k] for k in template["keys_order"]
|
192 |
+
])
|
193 |
+
|
194 |
+
print(sample["instruction"])
|
195 |
+
|
196 |
+
return sample["instruction"]
|
197 |
```
|
198 |
|
199 |
## 预训练或微调 prtrain or finetune
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 309825349
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:672f4076e5023fb899fcf29e0f1a00f81ceea75c491c0642cef7723df673a988
|
3 |
size 309825349
|