JunzhaoSun commited on
Commit
14f33b1
1 Parent(s): 85a6cce
Files changed (2) hide show
  1. app.py +93 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/local/bin/python3
2
+ #-*- coding:utf-8 -*-
3
+
4
+ import gradio as gr
5
+ import torch
6
+ from transformers import AutoTokenizer, AutoModelForCausalLM
7
+ import os
8
+
9
+ checkpoint = "gpt2-large"
10
+ # checkpoint = "/innev/open-ai/huggingface/models/gpt2-large"
11
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint)
12
+ model = AutoModelForCausalLM.from_pretrained(checkpoint)
13
+
14
+ def generate(text):
15
+ # text = 'Who was Jim Henson ? Jim Henson was a'
16
+
17
+ # 编码一段文本
18
+ # 编码后为[8241, 373, 5395, 367, 19069, 5633, 5395, 367, 19069, 373, 257]
19
+ indexed_tokens = tokenizer.encode(text)
20
+ # 转换为pytorch tensor
21
+ # tensor([[ 8241, 373, 5395, 367, 19069, 5633, 5395, 367, 19069, 373, 257]])
22
+ # shape为 torch.Size([1, 11])
23
+ tokens_tensor = torch.tensor([indexed_tokens])
24
+
25
+
26
+ # 设置为evaluation模式,去取消激活dropout等模块。
27
+ # 在huggingface/transformers框架中,默认就是eval模式
28
+ model.eval()
29
+
30
+ # 预测所有token
31
+ with torch.no_grad():
32
+ # 将输入tensor输入,就得到了模型的输出,非常简单
33
+ # outputs是一个元组,所有huggingface/transformers模型的输出都是元组
34
+ # 本初的元组有两个,第一个是预测得分(没经过softmax之前的,也叫作logits),
35
+ # 第二个是past,里面的attention计算的key value值
36
+ # 此时我们需要的是第一个值
37
+ outputs = model(tokens_tensor)
38
+ # predictions shape为 torch.Size([1, 11, 50257]),
39
+ # 也就是11个词每个词的预测得分(没经过softmax之前的)
40
+ # 也叫做logits
41
+ predictions = outputs[0]
42
+
43
+ # 我们需要预测下一个单词,所以是使用predictions第一个batch,最后一个词的logits去计算
44
+ # predicted_index = 582,通过计算最大得分的索引得到的
45
+ predicted_index = torch.argmax(predictions[0, -1, :]).item()
46
+ # 反向解码为我们需要的文本
47
+ predicted_text = tokenizer.decode(indexed_tokens + [predicted_index])
48
+ # predicted_text = tokenizer.decode([predicted_index])
49
+ # 解码后的文本:'Who was Jim Henson? Jim Henson was a man'
50
+ # 成功预测出单词 'man'
51
+
52
+ return predicted_text
53
+
54
+
55
+ def doloop(prompts):
56
+
57
+ text = prompts
58
+ total = 1
59
+ while text[-1] != "." and total < 20:
60
+ text = generate(text)
61
+ print("Index %s: %s" % (total, text))
62
+ total = total + 1
63
+
64
+ return text, total
65
+
66
+
67
+ title = "GPT2 large"
68
+
69
+ description = """
70
+ 本例为使用GPT2模型的简单推测语句DEMO,输入前面的句子,推测出后面的句子。
71
+
72
+ 使用原始模型,未经过微调。只支持英文输入输出。
73
+ """
74
+
75
+ examples = [
76
+ ["Who was Jim Henson ? Jim Henson was a", None],
77
+ ["My name is Julien and I like to", None],
78
+ ["My name is Thomas and my main", None],
79
+ ["My name is Mariama, my favorite", None],
80
+ ["My name is Clara and I am", None],
81
+ ]
82
+
83
+ gr.Interface(
84
+ fn=doloop,
85
+ inputs=gr.Text(label="输入前置语句"),
86
+ outputs=[
87
+ gr.Text(label="补全后输出"),
88
+ gr.Text(label="循环次数"),
89
+ ],
90
+ title=title,
91
+ description=description,
92
+ examples=examples,
93
+ ).launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ git+https://github.com/huggingface/transformers.git
2
+ torch
3
+ gradio