JRQi commited on
Commit
686b10f
1 Parent(s): cf3bdbb

Update game3.py

Browse files
Files changed (1) hide show
  1. game3.py +68 -2
game3.py CHANGED
@@ -109,6 +109,8 @@ def func3(num_selected, human_predict, num1, num2, user_important):
109
 
110
  def interpre3(num_selected):
111
  fname = 'data3_convai2_inferred.txt'
 
 
112
  with open(fname) as f:
113
  content = f.readlines()
114
  text = eval(content[int(num_selected*2)])
@@ -116,7 +118,38 @@ def interpre3(num_selected):
116
 
117
  print(interpretation)
118
 
119
- res = {"original": text['text'], "interpretation": interpretation}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  # pos = []
121
  # neg = []
122
  # res = []
@@ -156,6 +189,7 @@ def func3_written(text_written, human_predict, lang_written):
156
 
157
  device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
158
  classifier = pipeline("text-classification", model="padmajabfrl/Gender-Classification", device=device)
 
159
 
160
  output = classifier([text_written])
161
 
@@ -181,8 +215,40 @@ def func3_written(text_written, human_predict, lang_written):
181
 
182
  shap_values = explainer([text_written])
183
  interpretation = list(zip(shap_values.data[0], shap_values.values[0, :, 1]))
 
184
 
185
- res = {"original": text_written, "interpretation": interpretation}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
  print(res)
187
 
188
  return res, ai_predict, chatbot
 
109
 
110
  def interpre3(num_selected):
111
  fname = 'data3_convai2_inferred.txt'
112
+ tokenizer = AutoTokenizer.from_pretrained("padmajabfrl/Gender-Classification")
113
+
114
  with open(fname) as f:
115
  content = f.readlines()
116
  text = eval(content[int(num_selected*2)])
 
118
 
119
  print(interpretation)
120
 
121
+ encodings = tokenizer(text['text'], return_offsets_mapping=True)
122
+
123
+ print(encodings['offset_mapping'])
124
+ is_subword = [False, False]
125
+ for i in range(2, len(encodings['offset_mapping'])):
126
+ if encodings['offset_mapping'][i][0] == encodings['offset_mapping'][i-1][1]:
127
+ is_subword.append(True)
128
+ else:
129
+ is_subword.append(False)
130
+ print(is_subword)
131
+ interpretation_combined = []
132
+
133
+ index_tmp = 0
134
+ while index_tmp < (len(interpretation) - 1):
135
+ if not is_subword[index_tmp+1]:
136
+ interpretation_combined.append(interpretation[index_tmp])
137
+ index_tmp += 1
138
+ else:
139
+ text_combined = interpretation[index_tmp][0]
140
+ score_combinded = interpretation[index_tmp][1]
141
+ length = 1
142
+ while is_subword[index_tmp+length]:
143
+ text_combined += interpretation[index_tmp+length][0]
144
+ score_combinded += interpretation[index_tmp+length][1]
145
+ length += 1
146
+ interpretation_combined.append((text_combined, score_combinded/length))
147
+ index_tmp += length
148
+
149
+ interpretation_combined.append(('', 0.0))
150
+ print(interpretation_combined)
151
+
152
+ res = {"original": text['text'], "interpretation": interpretation_combined}
153
  # pos = []
154
  # neg = []
155
  # res = []
 
189
 
190
  device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
191
  classifier = pipeline("text-classification", model="padmajabfrl/Gender-Classification", device=device)
192
+ tokenizer = AutoTokenizer.from_pretrained("padmajabfrl/Gender-Classification")
193
 
194
  output = classifier([text_written])
195
 
 
215
 
216
  shap_values = explainer([text_written])
217
  interpretation = list(zip(shap_values.data[0], shap_values.values[0, :, 1]))
218
+
219
 
220
+ encodings = tokenizer(text['text'], return_offsets_mapping=True)
221
+
222
+ print(encodings['offset_mapping'])
223
+ is_subword = [False, False]
224
+ for i in range(2, len(encodings['offset_mapping'])):
225
+ if encodings['offset_mapping'][i][0] == encodings['offset_mapping'][i-1][1]:
226
+ is_subword.append(True)
227
+ else:
228
+ is_subword.append(False)
229
+ print(is_subword)
230
+ interpretation_combined = []
231
+
232
+ index_tmp = 0
233
+ while index_tmp < (len(interpretation) - 1):
234
+ if not is_subword[index_tmp+1]:
235
+ interpretation_combined.append(interpretation[index_tmp])
236
+ index_tmp += 1
237
+ else:
238
+ text_combined = interpretation[index_tmp][0]
239
+ score_combinded = interpretation[index_tmp][1]
240
+ length = 1
241
+ while is_subword[index_tmp+length]:
242
+ text_combined += interpretation[index_tmp+length][0]
243
+ score_combinded += interpretation[index_tmp+length][1]
244
+ length += 1
245
+ interpretation_combined.append((text_combined, score_combinded/length))
246
+ index_tmp += length
247
+
248
+ interpretation_combined.append(('', 0.0))
249
+ print(interpretation_combined)
250
+
251
+ res = {"original": text_written, "interpretation": interpretation_combined}
252
  print(res)
253
 
254
  return res, ai_predict, chatbot