JRQi commited on
Commit
c297653
1 Parent(s): 364b55a

Update game1.py

Browse files
Files changed (1) hide show
  1. game1.py +13 -9
game1.py CHANGED
@@ -5,6 +5,9 @@ import pandas as pd
5
  import gradio as gr
6
  import numpy as np
7
  import torch
 
 
 
8
 
9
  def read1(lang, num_selected_former):
10
  if lang in ['en']:
@@ -107,13 +110,11 @@ def func1(lang_selected, num_selected, human_predict, num1, num2, user_important
107
 
108
  # (START) off-the-shelf version -- slow at the beginning
109
  # Load model directly
110
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
111
-
112
  tokenizer = AutoTokenizer.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
113
  model = AutoModelForSequenceClassification.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
114
 
115
  # Use a pipeline as a high-level helper
116
- from transformers import pipeline
117
 
118
  device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
119
  print(device)
@@ -272,13 +273,21 @@ def func1(lang_selected, num_selected, human_predict, num1, num2, user_important
272
  def interpre1(lang_selected, num_selected):
273
  if lang_selected in ['en']:
274
  fname = 'data1_en.txt'
 
275
  else:
276
  fname = 'data1_nl_10.txt'
 
 
277
  with open(fname) as f:
278
  content = f.readlines()
279
  text = eval(content[int(num_selected*2)])
280
  interpretation = eval(content[int(num_selected*2+1)])
281
-
 
 
 
 
 
282
  print(interpretation)
283
 
284
  res = {"original": text['text'], "interpretation": interpretation}
@@ -337,8 +346,6 @@ def func1_written(text_written, human_predict, lang_written):
337
  '''
338
 
339
  # (START) off-the-shelf version
340
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
341
- from transformers import pipeline
342
 
343
 
344
  # tokenizer = AutoTokenizer.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
@@ -373,9 +380,6 @@ def func1_written(text_written, human_predict, lang_written):
373
  ai_predict += int(random.randint(-1, 1))
374
  chatbot.append(("AI thinks in a different way from human. 😉", "⬅️ Feel free to try another one! ⬅️"))
375
 
376
-
377
- import shap
378
-
379
  # sentiment_classifier = pipeline("text-classification", return_all_scores=True)
380
  device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
381
 
 
5
  import gradio as gr
6
  import numpy as np
7
  import torch
8
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
9
+ from transformers import pipeline
10
+ import shap
11
 
12
  def read1(lang, num_selected_former):
13
  if lang in ['en']:
 
110
 
111
  # (START) off-the-shelf version -- slow at the beginning
112
  # Load model directly
113
+
 
114
  tokenizer = AutoTokenizer.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
115
  model = AutoModelForSequenceClassification.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
116
 
117
  # Use a pipeline as a high-level helper
 
118
 
119
  device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
120
  print(device)
 
273
  def interpre1(lang_selected, num_selected):
274
  if lang_selected in ['en']:
275
  fname = 'data1_en.txt'
276
+ tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
277
  else:
278
  fname = 'data1_nl_10.txt'
279
+ tokenizer = AutoTokenizer.from_pretrained("DTAI-KULeuven/robbert-v2-dutch-sentiment")
280
+
281
  with open(fname) as f:
282
  content = f.readlines()
283
  text = eval(content[int(num_selected*2)])
284
  interpretation = eval(content[int(num_selected*2+1)])
285
+
286
+ encodings = tokenizer(text['text'], is_pretokenized=False, return_offsets_mapping=True)
287
+ print(encodings['offset_mapping'])
288
+ is_subword = np.array(encodings['offset_mapping'])[:,0] != 0
289
+ print(is_subword)
290
+ print(abc)
291
  print(interpretation)
292
 
293
  res = {"original": text['text'], "interpretation": interpretation}
 
346
  '''
347
 
348
  # (START) off-the-shelf version
 
 
349
 
350
 
351
  # tokenizer = AutoTokenizer.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
 
380
  ai_predict += int(random.randint(-1, 1))
381
  chatbot.append(("AI thinks in a different way from human. 😉", "⬅️ Feel free to try another one! ⬅️"))
382
 
 
 
 
383
  # sentiment_classifier = pipeline("text-classification", return_all_scores=True)
384
  device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
385