Spaces:
Runtime error
Runtime error
add zh
Browse files
app.py
CHANGED
@@ -25,15 +25,19 @@ def download_file(filename):
|
|
25 |
|
26 |
NLTK = nltk_load(download_file('english.pickle'))
|
27 |
sent_cut_en = NLTK.tokenize
|
28 |
-
LR_GLTR_EN, LR_PPL_EN = [
|
29 |
pickle.load(open(download_file(f'{lang}-gpt2-{name}.pkl'), 'rb'))
|
30 |
-
for lang, name in [('en', 'gltr'), ('en', 'ppl')]
|
31 |
]
|
32 |
|
33 |
NAME_EN = 'gpt2'
|
34 |
TOKENIZER_EN = GPT2Tokenizer.from_pretrained(NAME_EN)
|
35 |
MODEL_EN = GPT2LMHeadModel.from_pretrained(NAME_EN)
|
36 |
|
|
|
|
|
|
|
|
|
37 |
|
38 |
# code borrowed from https://github.com/blmoistawinde/HarvestText
|
39 |
def sent_cut_zh(para: str) -> List[str]:
|
@@ -143,7 +147,7 @@ def predict_en(text: str) -> List:
|
|
143 |
def predict_zh(text: str) -> List:
|
144 |
with torch.no_grad():
|
145 |
feat = gpt2_features(text, TOKENIZER_ZH, MODEL_ZH, sent_cut_zh)
|
146 |
-
out = lr_predict(*feat,
|
147 |
return out
|
148 |
|
149 |
|
@@ -208,10 +212,10 @@ with gr.Blocks() as demo:
|
|
208 |
value="对于OpenAI大力出奇迹的工作,自然每个人都有自己的看点。我自己最欣赏的地方是ChatGPT如何解决 “AI校正(Alignment)“这个问题。这个问题也是我们课题组这两年在探索的学术问题之一。"
|
209 |
)
|
210 |
button2 = gr.Button("🤖 预测!")
|
211 |
-
gr.Markdown("GLTR")
|
212 |
label2_gltr = gr.Textbox(lines=1, label='预测结果 🎃')
|
213 |
score2_gltr = gr.Textbox(lines=1, label='模型概率')
|
214 |
-
gr.Markdown("PPL")
|
215 |
label2_ppl = gr.Textbox(lines=1, label='PPL 预测结果 🎃')
|
216 |
score2_ppl = gr.Textbox(lines=1, label='PPL 模型概率')
|
217 |
|
|
|
25 |
|
26 |
NLTK = nltk_load(download_file('english.pickle'))
|
27 |
sent_cut_en = NLTK.tokenize
|
28 |
+
LR_GLTR_EN, LR_PPL_EN, LR_GLTR_ZH, LR_PPL_ZH = [
|
29 |
pickle.load(open(download_file(f'{lang}-gpt2-{name}.pkl'), 'rb'))
|
30 |
+
for lang, name in [('en', 'gltr'), ('en', 'ppl'), ('zh', 'gltr'), ('zh', 'ppl')]
|
31 |
]
|
32 |
|
33 |
NAME_EN = 'gpt2'
|
34 |
TOKENIZER_EN = GPT2Tokenizer.from_pretrained(NAME_EN)
|
35 |
MODEL_EN = GPT2LMHeadModel.from_pretrained(NAME_EN)
|
36 |
|
37 |
+
NAME_ZH = 'IDEA-CCNL/Wenzhong-GPT2-110M'
|
38 |
+
TOKENIZER_ZH = GPT2Tokenizer.from_pretrained(NAME_ZH)
|
39 |
+
MODEL_ZH = GPT2LMHeadModel.from_pretrained(NAME_ZH)
|
40 |
+
|
41 |
|
42 |
# code borrowed from https://github.com/blmoistawinde/HarvestText
|
43 |
def sent_cut_zh(para: str) -> List[str]:
|
|
|
147 |
def predict_zh(text: str) -> List:
|
148 |
with torch.no_grad():
|
149 |
feat = gpt2_features(text, TOKENIZER_ZH, MODEL_ZH, sent_cut_zh)
|
150 |
+
out = lr_predict(*feat, LR_GLTR_ZH, LR_PPL_ZH, ['人类', 'ChatGPT'])
|
151 |
return out
|
152 |
|
153 |
|
|
|
212 |
value="对于OpenAI大力出奇迹的工作,自然每个人都有自己的看点。我自己最欣赏的地方是ChatGPT如何解决 “AI校正(Alignment)“这个问题。这个问题也是我们课题组这两年在探索的学术问题之一。"
|
213 |
)
|
214 |
button2 = gr.Button("🤖 预测!")
|
215 |
+
gr.Markdown("GLTR (中文测试集准确率 86.39%)")
|
216 |
label2_gltr = gr.Textbox(lines=1, label='预测结果 🎃')
|
217 |
score2_gltr = gr.Textbox(lines=1, label='模型概率')
|
218 |
+
gr.Markdown("PPL (中文测试集准确率 59.04%, 持续优化中...)")
|
219 |
label2_ppl = gr.Textbox(lines=1, label='PPL 预测结果 🎃')
|
220 |
score2_ppl = gr.Textbox(lines=1, label='PPL 模型概率')
|
221 |
|