Spaces:

ajitrajasekharan
/

Qualitative-pretrained-model-evaluation

Runtime error

App Files Files Community

ajitrajasekharan commited on Jan 30, 2022

Commit

1f4b5d0

1 Parent(s): 5ce6476

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -6

app.py CHANGED Viewed

@@ -36,28 +36,30 @@ def decode(tokenizer, pred_idx, top_clean):
 def encode(tokenizer, text_sentence, add_special_tokens=True):
   text_sentence = text_sentence.replace('<mask>', tokenizer.mask_token)
     # if <mask> is the last token, append a "." so that models dont predict punctuation.
-  if tokenizer.mask_token == text_sentence.split()[-1]:
-    text_sentence += ' .'
   input_ids = torch.tensor([tokenizer.encode(text_sentence, add_special_tokens=add_special_tokens)])
   if (tokenizer.mask_token in text_sentence.split()):
     mask_idx = torch.where(input_ids == tokenizer.mask_token_id)[1].tolist()[0]
   else:
     mask_idx = 0
-  return input_ids, mask_idx
 def get_all_predictions(text_sentence, model_name,top_clean=5):
     # ========================= BERT =================================
-  input_ids, mask_idx = encode(bert_tokenizer, text_sentence)
   with torch.no_grad():
     predict = bert_model(input_ids)[0]
   bert = decode(bert_tokenizer, predict[0, mask_idx, :].topk(top_k*5).indices.tolist(), top_clean)
   cls = decode(bert_tokenizer, predict[0, 0, :].topk(top_k*5).indices.tolist(), top_clean)
   if ("[MASK]" in text_sentence or "<mask>" in text_sentence):
-    return {'Input sentence':text_sentence,'Model':model_name,'Masked position': bert,'[CLS]':cls}
   else:
-    return {'Input sentence':text_sentence,'Model':model_name,'[CLS]':cls}
 def get_bert_prediction(input_text,top_k,model_name):
   try:

 def encode(tokenizer, text_sentence, add_special_tokens=True):
   text_sentence = text_sentence.replace('<mask>', tokenizer.mask_token)
     # if <mask> is the last token, append a "." so that models dont predict punctuation.
+  #if tokenizer.mask_token == text_sentence.split()[-1]:
+  #  text_sentence += ' .'
+  tokenized_text = bert_tokenizer.tokenize(text_sentence)
   input_ids = torch.tensor([tokenizer.encode(text_sentence, add_special_tokens=add_special_tokens)])
   if (tokenizer.mask_token in text_sentence.split()):
     mask_idx = torch.where(input_ids == tokenizer.mask_token_id)[1].tolist()[0]
   else:
     mask_idx = 0
+  return input_ids, mask_idx,tokenized_text
 def get_all_predictions(text_sentence, model_name,top_clean=5):
     # ========================= BERT =================================
+  input_ids, mask_idx,tokenized_text = encode(bert_tokenizer, text_sentence)
   with torch.no_grad():
     predict = bert_model(input_ids)[0]
   bert = decode(bert_tokenizer, predict[0, mask_idx, :].topk(top_k*5).indices.tolist(), top_clean)
   cls = decode(bert_tokenizer, predict[0, 0, :].topk(top_k*5).indices.tolist(), top_clean)
   if ("[MASK]" in text_sentence or "<mask>" in text_sentence):
+    return {'Input sentence':text_sentence,'Tokenized text': tokenized_text,'Model':model_name,'Masked position': bert,'[CLS]':cls}
   else:
+    return {'Input sentence':text_sentence,'Tokenized text': tokenized_text,'Model':model_name,'[CLS]':cls}
 def get_bert_prediction(input_text,top_k,model_name):
   try: