izhx commited on
Commit
c573e53
1 Parent(s): 62916e8
Files changed (1) hide show
  1. app.py +9 -5
app.py CHANGED
@@ -25,15 +25,19 @@ def download_file(filename):
25
 
26
  NLTK = nltk_load(download_file('english.pickle'))
27
  sent_cut_en = NLTK.tokenize
28
- LR_GLTR_EN, LR_PPL_EN = [
29
  pickle.load(open(download_file(f'{lang}-gpt2-{name}.pkl'), 'rb'))
30
- for lang, name in [('en', 'gltr'), ('en', 'ppl')]
31
  ]
32
 
33
  NAME_EN = 'gpt2'
34
  TOKENIZER_EN = GPT2Tokenizer.from_pretrained(NAME_EN)
35
  MODEL_EN = GPT2LMHeadModel.from_pretrained(NAME_EN)
36
 
 
 
 
 
37
 
38
  # code borrowed from https://github.com/blmoistawinde/HarvestText
39
  def sent_cut_zh(para: str) -> List[str]:
@@ -143,7 +147,7 @@ def predict_en(text: str) -> List:
143
  def predict_zh(text: str) -> List:
144
  with torch.no_grad():
145
  feat = gpt2_features(text, TOKENIZER_ZH, MODEL_ZH, sent_cut_zh)
146
- out = lr_predict(*feat, None, None, ['人类', 'ChatGPT'])
147
  return out
148
 
149
 
@@ -208,10 +212,10 @@ with gr.Blocks() as demo:
208
  value="对于OpenAI大力出奇迹的工作,自然每个人都有自己的看点。我自己最欣赏的地方是ChatGPT如何解决 “AI校正(Alignment)“这个问题。这个问题也是我们课题组这两年在探索的学术问题之一。"
209
  )
210
  button2 = gr.Button("🤖 预测!")
211
- gr.Markdown("GLTR")
212
  label2_gltr = gr.Textbox(lines=1, label='预测结果 🎃')
213
  score2_gltr = gr.Textbox(lines=1, label='模型概率')
214
- gr.Markdown("PPL")
215
  label2_ppl = gr.Textbox(lines=1, label='PPL 预测结果 🎃')
216
  score2_ppl = gr.Textbox(lines=1, label='PPL 模型概率')
217
 
 
25
 
26
  NLTK = nltk_load(download_file('english.pickle'))
27
  sent_cut_en = NLTK.tokenize
28
+ LR_GLTR_EN, LR_PPL_EN, LR_GLTR_ZH, LR_PPL_ZH = [
29
  pickle.load(open(download_file(f'{lang}-gpt2-{name}.pkl'), 'rb'))
30
+ for lang, name in [('en', 'gltr'), ('en', 'ppl'), ('zh', 'gltr'), ('zh', 'ppl')]
31
  ]
32
 
33
  NAME_EN = 'gpt2'
34
  TOKENIZER_EN = GPT2Tokenizer.from_pretrained(NAME_EN)
35
  MODEL_EN = GPT2LMHeadModel.from_pretrained(NAME_EN)
36
 
37
+ NAME_ZH = 'IDEA-CCNL/Wenzhong-GPT2-110M'
38
+ TOKENIZER_ZH = GPT2Tokenizer.from_pretrained(NAME_ZH)
39
+ MODEL_ZH = GPT2LMHeadModel.from_pretrained(NAME_ZH)
40
+
41
 
42
  # code borrowed from https://github.com/blmoistawinde/HarvestText
43
  def sent_cut_zh(para: str) -> List[str]:
 
147
  def predict_zh(text: str) -> List:
148
  with torch.no_grad():
149
  feat = gpt2_features(text, TOKENIZER_ZH, MODEL_ZH, sent_cut_zh)
150
+ out = lr_predict(*feat, LR_GLTR_ZH, LR_PPL_ZH, ['人类', 'ChatGPT'])
151
  return out
152
 
153
 
 
212
  value="对于OpenAI大力出奇迹的工作,自然每个人都有自己的看点。我自己最欣赏的地方是ChatGPT如何解决 “AI校正(Alignment)“这个问题。这个问题也是我们课题组这两年在探索的学术问题之一。"
213
  )
214
  button2 = gr.Button("🤖 预测!")
215
+ gr.Markdown("GLTR (中文测试集准确率 86.39%)")
216
  label2_gltr = gr.Textbox(lines=1, label='预测结果 🎃')
217
  score2_gltr = gr.Textbox(lines=1, label='模型概率')
218
+ gr.Markdown("PPL (中文测试集准确率 59.04%, 持续优化中...)")
219
  label2_ppl = gr.Textbox(lines=1, label='PPL 预测结果 🎃')
220
  score2_ppl = gr.Textbox(lines=1, label='PPL 模型概率')
221