Spaces:
Runtime error
Runtime error
Commit
•
e95e269
0
Parent(s):
Duplicate from yizhangliu/chatGPT
Browse filesCo-authored-by: yizhangliu <yizhangliu@users.noreply.huggingface.co>
- .gitattributes +34 -0
- README.md +13 -0
- app.py +516 -0
- baidu_translate/module.py +106 -0
- encoder.json +0 -0
- encoder.py +120 -0
- requirements.txt +11 -0
- utils.py +54 -0
- vocab.bpe +0 -0
.gitattributes
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: ChatGPT
|
3 |
+
emoji: 📊
|
4 |
+
colorFrom: blue
|
5 |
+
colorTo: blue
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 3.12.0
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
duplicated_from: yizhangliu/chatGPT
|
11 |
+
---
|
12 |
+
|
13 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
@@ -0,0 +1,516 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os, sys, json
|
2 |
+
os.system("pip install gradio==3.19.1")
|
3 |
+
import openai
|
4 |
+
import gradio as gr
|
5 |
+
|
6 |
+
from loguru import logger
|
7 |
+
import paddlehub as hub
|
8 |
+
import random
|
9 |
+
from encoder import get_encoder
|
10 |
+
|
11 |
+
openai.api_key = os.getenv("OPENAI_API_KEY")
|
12 |
+
|
13 |
+
from utils import get_tmt_client, getTextTrans_tmt
|
14 |
+
tmt_client = get_tmt_client()
|
15 |
+
|
16 |
+
def getTextTrans(text, source='zh', target='en'):
|
17 |
+
def is_chinese(string):
|
18 |
+
for ch in string:
|
19 |
+
if u'\u4e00' <= ch <= u'\u9fff':
|
20 |
+
return True
|
21 |
+
return False
|
22 |
+
|
23 |
+
if not is_chinese(text) and target == 'en':
|
24 |
+
return text
|
25 |
+
|
26 |
+
try:
|
27 |
+
text_translation = getTextTrans_tmt(tmt_client, text, source, target)
|
28 |
+
return text_translation
|
29 |
+
except Exception as e:
|
30 |
+
return text
|
31 |
+
|
32 |
+
start_work = """async() => {
|
33 |
+
function isMobile() {
|
34 |
+
try {
|
35 |
+
document.createEvent("TouchEvent"); return true;
|
36 |
+
} catch(e) {
|
37 |
+
return false;
|
38 |
+
}
|
39 |
+
}
|
40 |
+
function getClientHeight()
|
41 |
+
{
|
42 |
+
var clientHeight=0;
|
43 |
+
if(document.body.clientHeight&&document.documentElement.clientHeight) {
|
44 |
+
var clientHeight = (document.body.clientHeight<document.documentElement.clientHeight)?document.body.clientHeight:document.documentElement.clientHeight;
|
45 |
+
} else {
|
46 |
+
var clientHeight = (document.body.clientHeight>document.documentElement.clientHeight)?document.body.clientHeight:document.documentElement.clientHeight;
|
47 |
+
}
|
48 |
+
return clientHeight;
|
49 |
+
}
|
50 |
+
|
51 |
+
function setNativeValue(element, value) {
|
52 |
+
const valueSetter = Object.getOwnPropertyDescriptor(element.__proto__, 'value').set;
|
53 |
+
const prototype = Object.getPrototypeOf(element);
|
54 |
+
const prototypeValueSetter = Object.getOwnPropertyDescriptor(prototype, 'value').set;
|
55 |
+
|
56 |
+
if (valueSetter && valueSetter !== prototypeValueSetter) {
|
57 |
+
prototypeValueSetter.call(element, value);
|
58 |
+
} else {
|
59 |
+
valueSetter.call(element, value);
|
60 |
+
}
|
61 |
+
element.dispatchEvent(new Event('input', { bubbles: true }));
|
62 |
+
}
|
63 |
+
function get_clear_innerHTML(innerHTML) {
|
64 |
+
innerHTML = innerHTML.replace(/<p>|<\\/p>|\\n/g, '');
|
65 |
+
regexp = /\\★☆(.*?)\\☆★/;
|
66 |
+
match = innerHTML.match(regexp);
|
67 |
+
if (match) {
|
68 |
+
innerHTML = match[1];
|
69 |
+
}
|
70 |
+
return innerHTML;
|
71 |
+
}
|
72 |
+
function save_conversation(chatbot) {
|
73 |
+
var conversations = new Array();
|
74 |
+
var conversations_clear = new Array();
|
75 |
+
for (var i = 0; i < chatbot.children.length; i++) {
|
76 |
+
testid_icon = '☟:'; //'user'
|
77 |
+
if (chatbot.children[i].dataset['testid'] == 'bot') {
|
78 |
+
testid_icon = '☝:'; //'bot'
|
79 |
+
}
|
80 |
+
innerHTML = chatbot.children[i].innerHTML;
|
81 |
+
conversations.push(testid_icon + innerHTML);
|
82 |
+
if (innerHTML.indexOf("<img") == -1 && innerHTML.indexOf("null_") == -1) {
|
83 |
+
conversations_clear.push(testid_icon + get_clear_innerHTML(innerHTML));
|
84 |
+
}
|
85 |
+
}
|
86 |
+
var json_str = JSON.stringify(conversations);
|
87 |
+
setNativeValue(window['chat_his'], JSON.stringify(conversations_clear));
|
88 |
+
localStorage.setItem('chatgpt_conversations', json_str);
|
89 |
+
}
|
90 |
+
function img_click(img) {
|
91 |
+
this_width = parseInt(img.style.width) + 20;
|
92 |
+
if (this_width > 100) {
|
93 |
+
this_width = 20;
|
94 |
+
}
|
95 |
+
img.style.width = this_width + "%";
|
96 |
+
img.style.height = img.offsetWidth + 'px';
|
97 |
+
}
|
98 |
+
function load_conversation(chatbot) {
|
99 |
+
var json_str = localStorage.getItem('chatgpt_conversations');
|
100 |
+
if (json_str) {
|
101 |
+
var conversations_clear = new Array();
|
102 |
+
conversations = JSON.parse(json_str);
|
103 |
+
for (var i = 0; i < conversations.length; i++) {
|
104 |
+
innerHTML = conversations[i];
|
105 |
+
if (innerHTML.indexOf("☝:") == -1) {
|
106 |
+
className = "message user svelte-134zwfa";
|
107 |
+
bgcolor = "#16a34a";
|
108 |
+
testid = "user";
|
109 |
+
testid_icon = '☟:'; //'user'
|
110 |
+
} else {
|
111 |
+
className = "message bot svelte-134zwfa";
|
112 |
+
bgcolor = "#2563eb";
|
113 |
+
testid = "bot";
|
114 |
+
testid_icon = '☝:'; //'bot'
|
115 |
+
}
|
116 |
+
var new_div = document.createElement("div");
|
117 |
+
new_div.className = className;
|
118 |
+
new_div.style.backgroundColor = bgcolor;
|
119 |
+
new_div.dataset.testid = testid;
|
120 |
+
if (innerHTML.indexOf("data:image/jpeg") >= 0) {
|
121 |
+
new_div.style.width = "20%";
|
122 |
+
new_div.style.padding = "0.2rem";
|
123 |
+
new_div.onclick = function(e) {
|
124 |
+
img_click(this);
|
125 |
+
}
|
126 |
+
setTimeout(function(){
|
127 |
+
new_div.style.height = new_div.offsetWidth + 'px';
|
128 |
+
new_div.children[0].setAttribute('style', 'max-width: none; width:100%');
|
129 |
+
}, 10);
|
130 |
+
}
|
131 |
+
innerHTML = innerHTML.replace("☝:", "");
|
132 |
+
innerHTML = innerHTML.replace("☟:", "");
|
133 |
+
new_div.innerHTML = innerHTML;
|
134 |
+
if (innerHTML.indexOf("null_") != -1) {
|
135 |
+
new_div.style.display = 'none';
|
136 |
+
}
|
137 |
+
chatbot.appendChild(new_div);
|
138 |
+
|
139 |
+
if (innerHTML.indexOf("<img") == -1 && innerHTML.indexOf("null_") == -1) {
|
140 |
+
conversations_clear.push(testid_icon + get_clear_innerHTML(innerHTML));
|
141 |
+
}
|
142 |
+
}
|
143 |
+
setNativeValue(window['chat_his'], JSON.stringify(conversations_clear));
|
144 |
+
setTimeout(function(){
|
145 |
+
window['chat_bot1'].children[1].scrollTop = window['chat_bot1'].children[1].scrollHeight;
|
146 |
+
}, 500);
|
147 |
+
}
|
148 |
+
}
|
149 |
+
var gradioEl = document.querySelector('body > gradio-app').shadowRoot;
|
150 |
+
if (!gradioEl) {
|
151 |
+
gradioEl = document.querySelector('body > gradio-app');
|
152 |
+
}
|
153 |
+
|
154 |
+
if (typeof window['gradioEl'] === 'undefined') {
|
155 |
+
window['gradioEl'] = gradioEl;
|
156 |
+
|
157 |
+
const page1 = window['gradioEl'].querySelectorAll('#page_1')[0];
|
158 |
+
const page2 = window['gradioEl'].querySelectorAll('#page_2')[0];
|
159 |
+
|
160 |
+
page1.style.display = "none";
|
161 |
+
page2.style.display = "block";
|
162 |
+
window['div_count'] = 0;
|
163 |
+
window['chat_radio_0'] = window['gradioEl'].querySelectorAll('#chat_radio')[0].querySelectorAll('input[name=radio-chat_radio]')[0];
|
164 |
+
window['chat_radio_1'] = window['gradioEl'].querySelectorAll('#chat_radio')[0].querySelectorAll('input[name=radio-chat_radio]')[1];
|
165 |
+
window['chat_bot'] = window['gradioEl'].querySelectorAll('#chat_bot')[0];
|
166 |
+
window['chat_bot1'] = window['gradioEl'].querySelectorAll('#chat_bot1')[0];
|
167 |
+
window['my_prompt'] = window['gradioEl'].querySelectorAll('#my_prompt')[0].querySelectorAll('textarea')[0];
|
168 |
+
window['my_prompt_en'] = window['gradioEl'].querySelectorAll('#my_prompt_en')[0].querySelectorAll('textarea')[0];
|
169 |
+
window['chat_his'] = window['gradioEl'].querySelectorAll('#chat_history')[0].querySelectorAll('textarea')[0];
|
170 |
+
chat_row = window['gradioEl'].querySelectorAll('#chat_row')[0];
|
171 |
+
prompt_row = window['gradioEl'].querySelectorAll('#prompt_row')[0];
|
172 |
+
window['chat_bot1'].children[1].children[0].textContent = '';
|
173 |
+
|
174 |
+
clientHeight = getClientHeight();
|
175 |
+
if (isMobile()) {
|
176 |
+
output_htmls = window['gradioEl'].querySelectorAll('.output-html');
|
177 |
+
for (var i = 0; i < output_htmls.length; i++) {
|
178 |
+
output_htmls[i].style.display = "none";
|
179 |
+
}
|
180 |
+
new_height = (clientHeight - 250) + 'px';
|
181 |
+
} else {
|
182 |
+
new_height = (clientHeight - 350) + 'px';
|
183 |
+
}
|
184 |
+
chat_row.style.height = new_height;
|
185 |
+
window['chat_bot'].style.height = new_height;
|
186 |
+
window['chat_bot'].children[1].style.height = new_height;
|
187 |
+
window['chat_bot1'].style.height = new_height;
|
188 |
+
window['chat_bot1'].children[1].style.height = new_height;
|
189 |
+
window['chat_bot1'].children[0].style.top = (parseInt(window['chat_bot1'].style.height)-window['chat_bot1'].children[0].offsetHeight-2) + 'px';
|
190 |
+
prompt_row.children[0].style.flex = 'auto';
|
191 |
+
prompt_row.children[0].style.width = '100%';
|
192 |
+
window['gradioEl'].querySelectorAll('#chat_radio')[0].style.flex = 'auto';
|
193 |
+
window['gradioEl'].querySelectorAll('#chat_radio')[0].style.width = '100%';
|
194 |
+
prompt_row.children[0].setAttribute('style','flex-direction: inherit; flex: 1 1 auto; width: 100%;border-color: green;border-width: 1px !important;')
|
195 |
+
window['chat_bot1'].children[1].setAttribute('style', 'border-bottom-right-radius:0;top:unset;bottom:0;padding-left:0.1rem');
|
196 |
+
window['gradioEl'].querySelectorAll('#btns_row')[0].children[0].setAttribute('style', 'min-width: min(10px, 100%); flex-grow: 1');
|
197 |
+
window['gradioEl'].querySelectorAll('#btns_row')[0].children[1].setAttribute('style', 'min-width: min(10px, 100%); flex-grow: 1');
|
198 |
+
|
199 |
+
load_conversation(window['chat_bot1'].children[1].children[0]);
|
200 |
+
window['chat_bot1'].children[1].scrollTop = window['chat_bot1'].children[1].scrollHeight;
|
201 |
+
|
202 |
+
window['gradioEl'].querySelectorAll('#clear-btn')[0].onclick = function(e){
|
203 |
+
if (confirm('Clear all outputs?')==true) {
|
204 |
+
for (var i = window['chat_bot'].children[1].children[0].children.length-1; i >= 0; i--) {
|
205 |
+
window['chat_bot'].children[1].children[0].removeChild(window['chat_bot'].children[1].children[0].children[i]);
|
206 |
+
}
|
207 |
+
for (var i = window['chat_bot1'].children[1].children[0].children.length-1; i >= 0; i--) {
|
208 |
+
window['chat_bot1'].children[1].children[0].removeChild(window['chat_bot1'].children[1].children[0].children[i]);
|
209 |
+
}
|
210 |
+
window['div_count'] = 0;
|
211 |
+
save_conversation(window['chat_bot1'].children[1].children[0]);
|
212 |
+
}
|
213 |
+
}
|
214 |
+
|
215 |
+
function set_buttons(action) {
|
216 |
+
window['submit-btn'].disabled = action;
|
217 |
+
window['clear-btn'].disabled = action;
|
218 |
+
window['chat_radio_0'].disabled = action;
|
219 |
+
window['chat_radio_1'].disabled = action;
|
220 |
+
btn_color = 'color:#000';
|
221 |
+
if (action) {
|
222 |
+
btn_color = 'color:#ccc';
|
223 |
+
}
|
224 |
+
window['submit-btn'].setAttribute('style', btn_color);
|
225 |
+
window['clear-btn'].setAttribute('style', btn_color);
|
226 |
+
window['chat_radio_0'].setAttribute('style', btn_color);
|
227 |
+
window['chat_radio_1'].setAttribute('style', btn_color);
|
228 |
+
}
|
229 |
+
window['prevPrompt'] = '';
|
230 |
+
window['doCheckPrompt'] = 0;
|
231 |
+
window['prevImgSrc'] = '';
|
232 |
+
window['checkChange'] = function checkChange() {
|
233 |
+
try {
|
234 |
+
if (window['chat_radio_0'].checked) {
|
235 |
+
dot_flashing = window['chat_bot'].children[1].children[0].querySelectorAll('.dot-flashing');
|
236 |
+
if (window['chat_bot'].children[1].children[0].children.length > window['div_count'] && dot_flashing.length == 0) {
|
237 |
+
new_len = window['chat_bot'].children[1].children[0].children.length - window['div_count'];
|
238 |
+
for (var i = 0; i < new_len; i++) {
|
239 |
+
new_div = window['chat_bot'].children[1].children[0].children[window['div_count'] + i].cloneNode(true);
|
240 |
+
window['chat_bot1'].children[1].children[0].appendChild(new_div);
|
241 |
+
}
|
242 |
+
window['div_count'] = window['chat_bot'].children[1].children[0].children.length;
|
243 |
+
window['chat_bot1'].children[1].scrollTop = window['chat_bot1'].children[1].scrollHeight;
|
244 |
+
save_conversation(window['chat_bot1'].children[1].children[0]);
|
245 |
+
}
|
246 |
+
if (window['chat_bot'].children[0].children.length > 1) {
|
247 |
+
set_buttons(true);
|
248 |
+
window['chat_bot1'].children[0].textContent = window['chat_bot'].children[0].children[1].textContent;
|
249 |
+
} else {
|
250 |
+
set_buttons(false);
|
251 |
+
window['chat_bot1'].children[0].textContent = '';
|
252 |
+
}
|
253 |
+
} else {
|
254 |
+
img_index = 0;
|
255 |
+
draw_prompt_en = window['my_prompt_en'].value;
|
256 |
+
if (window['doCheckPrompt'] == 0 && window['prevPrompt'] != draw_prompt_en) {
|
257 |
+
console.log('_____draw_prompt_en___[' + draw_prompt_en + ']_');
|
258 |
+
window['doCheckPrompt'] = 1;
|
259 |
+
window['prevPrompt'] = draw_prompt_en;
|
260 |
+
|
261 |
+
tabitems = window['gradioEl'].querySelectorAll('.tabitem');
|
262 |
+
for (var i = 0; i < tabitems.length; i++) {
|
263 |
+
inputText = tabitems[i].children[0].children[1].children[0].querySelectorAll('input')[0];
|
264 |
+
setNativeValue(inputText, draw_prompt_en);
|
265 |
+
}
|
266 |
+
setTimeout(function() {
|
267 |
+
window['draw_prompt'] = window['my_prompt'].value;
|
268 |
+
btns = window['gradioEl'].querySelectorAll('button');
|
269 |
+
for (var i = 0; i < btns.length; i++) {
|
270 |
+
if (['Generate image','Run'].includes(btns[i].innerText)) {
|
271 |
+
btns[i].click();
|
272 |
+
}
|
273 |
+
}
|
274 |
+
window['doCheckPrompt'] = 0;
|
275 |
+
}, 10);
|
276 |
+
}
|
277 |
+
tabitems = window['gradioEl'].querySelectorAll('.tabitem');
|
278 |
+
imgs = tabitems[img_index].children[0].children[1].children[1].querySelectorAll("img");
|
279 |
+
if (imgs.length > 0) {
|
280 |
+
if (window['prevImgSrc'] !== imgs[0].src) {
|
281 |
+
var user_div = document.createElement("div");
|
282 |
+
user_div.className = "message user svelte-134zwfa";
|
283 |
+
user_div.style.backgroundColor = "#16a34a";
|
284 |
+
user_div.dataset.testid = 'user';
|
285 |
+
user_div.innerHTML = "<p>作画: " + window['draw_prompt'] + "</p><img></img>";
|
286 |
+
window['chat_bot1'].children[1].children[0].appendChild(user_div);
|
287 |
+
var bot_div = document.createElement("div");
|
288 |
+
bot_div.className = "message bot svelte-134zwfa";
|
289 |
+
bot_div.style.backgroundColor = "#2563eb";
|
290 |
+
bot_div.style.width = "20%";
|
291 |
+
bot_div.dataset.testid = 'bot';
|
292 |
+
bot_div.onclick = function(e){
|
293 |
+
img_click(this);
|
294 |
+
}
|
295 |
+
setTimeout(function(){
|
296 |
+
bot_div.style.height = bot_div.offsetWidth + 'px';
|
297 |
+
bot_div.children[0].setAttribute('style', 'max-width:none; width:100%');
|
298 |
+
}, 10);
|
299 |
+
bot_div.style.padding = "0.2rem";
|
300 |
+
bot_div.appendChild(imgs[0].cloneNode(true));
|
301 |
+
window['chat_bot1'].children[1].children[0].appendChild(bot_div);
|
302 |
+
|
303 |
+
window['chat_bot1'].children[1].scrollTop = window['chat_bot1'].children[1].scrollHeight;
|
304 |
+
window['prevImgSrc'] = imgs[0].src;
|
305 |
+
save_conversation(window['chat_bot1'].children[1].children[0]);
|
306 |
+
}
|
307 |
+
}
|
308 |
+
if (tabitems[img_index].children[0].children[1].children[1].children[0].children.length > 1) {
|
309 |
+
tips = tabitems[img_index].children[0].children[1].children[1].children[0].textContent;
|
310 |
+
if (tips.indexOf("Error") == -1) {
|
311 |
+
set_buttons(true);
|
312 |
+
} else {
|
313 |
+
set_buttons(false);
|
314 |
+
}
|
315 |
+
window['chat_bot1'].children[0].textContent = '作画中 ' + tips;
|
316 |
+
} else {
|
317 |
+
set_buttons(false);
|
318 |
+
window['chat_bot1'].children[0].textContent = '';
|
319 |
+
}
|
320 |
+
}
|
321 |
+
|
322 |
+
} catch(e) {
|
323 |
+
}
|
324 |
+
}
|
325 |
+
window['checkChange_interval'] = window.setInterval("window.checkChange()", 500);
|
326 |
+
}
|
327 |
+
|
328 |
+
return false;
|
329 |
+
}"""
|
330 |
+
|
331 |
+
space_ids = {
|
332 |
+
"spaces/stabilityai/stable-diffusion":"Stable Diffusion 2.1",
|
333 |
+
# "spaces/runwayml/stable-diffusion-v1-5":"Stable Diffusion 1.5",
|
334 |
+
# "spaces/stabilityai/stable-diffusion-1":"Stable Diffusion 1.0",
|
335 |
+
}
|
336 |
+
|
337 |
+
tab_actions = []
|
338 |
+
tab_titles = []
|
339 |
+
|
340 |
+
for space_id in space_ids.keys():
|
341 |
+
print(space_id, space_ids[space_id])
|
342 |
+
try:
|
343 |
+
tab = gr.Interface.load(space_id)
|
344 |
+
tab_actions.append(tab)
|
345 |
+
tab_titles.append(space_ids[space_id])
|
346 |
+
except Exception as e:
|
347 |
+
logger.info(f"load_fail__{space_id}_{e}")
|
348 |
+
|
349 |
+
token_encoder = get_encoder()
|
350 |
+
total_tokens = 4096
|
351 |
+
max_output_tokens = 1024
|
352 |
+
max_input_tokens = total_tokens - max_output_tokens
|
353 |
+
|
354 |
+
def set_openai_api_key(api_key):
|
355 |
+
if api_key and api_key.startswith("sk-") and len(api_key) > 50:
|
356 |
+
openai.api_key = api_key
|
357 |
+
|
358 |
+
def get_response_from_openai(input, chat_history, model_radio):
|
359 |
+
error_1 = 'You exceeded your current quota, please check your plan and billing details.'
|
360 |
+
def openai_create(input_list, model_radio):
|
361 |
+
try:
|
362 |
+
# print(f'input_list={input_list}')
|
363 |
+
input_list_len = len(input_list)
|
364 |
+
out_prompt = ''
|
365 |
+
messages = []
|
366 |
+
if model_radio == 'GPT-3.0':
|
367 |
+
out_prompt = 'AI:'
|
368 |
+
for i in range(input_list_len):
|
369 |
+
input = input_list[input_list_len-i-1].replace("<br>", '\n\n')
|
370 |
+
if input.startswith("Openai said:"):
|
371 |
+
input = "☝:"
|
372 |
+
|
373 |
+
if input.startswith("☝:"):
|
374 |
+
if model_radio == 'GPT-3.0':
|
375 |
+
out_prompt = input.replace("☝:", "AI:") + '\n' + out_prompt
|
376 |
+
else:
|
377 |
+
out_prompt = input.replace("☝:", "") + out_prompt
|
378 |
+
messages.insert(0, {"role": "assistant", "content": input.replace("☝:", "")})
|
379 |
+
elif input.startswith("☟:"):
|
380 |
+
if model_radio == 'GPT-3.0':
|
381 |
+
out_prompt = input.replace("☟:", "Human:") + '\n' + out_prompt
|
382 |
+
else:
|
383 |
+
out_prompt = input.replace("☟:", "") + out_prompt
|
384 |
+
messages.insert(0, {"role": "user", "content": input.replace("☟:", "")})
|
385 |
+
tokens = token_encoder.encode(out_prompt)
|
386 |
+
if len(tokens) > max_input_tokens:
|
387 |
+
break
|
388 |
+
|
389 |
+
if model_radio == 'GPT-3.0':
|
390 |
+
# print(out_prompt)
|
391 |
+
response = openai.Completion.create(
|
392 |
+
model="text-davinci-003",
|
393 |
+
prompt=out_prompt,
|
394 |
+
temperature=0.7,
|
395 |
+
max_tokens=max_output_tokens,
|
396 |
+
top_p=1,
|
397 |
+
frequency_penalty=0,
|
398 |
+
presence_penalty=0,
|
399 |
+
stop=[" Human:", " AI:"]
|
400 |
+
)
|
401 |
+
# print(f'response_3.0__:{response}')
|
402 |
+
ret = response.choices[0].text
|
403 |
+
else:
|
404 |
+
# print(messages)
|
405 |
+
response = openai.ChatCompletion.create(
|
406 |
+
model="gpt-3.5-turbo",
|
407 |
+
messages=messages,
|
408 |
+
temperature=0.7,
|
409 |
+
max_tokens=max_output_tokens,
|
410 |
+
top_p=1,
|
411 |
+
frequency_penalty=0,
|
412 |
+
presence_penalty=0,
|
413 |
+
stop=[" Human:", " AI:"]
|
414 |
+
)
|
415 |
+
# print(f'response_3.5__:{response}')
|
416 |
+
ret = response.choices[0].message['content']
|
417 |
+
if ret.startswith("\n\n"):
|
418 |
+
ret = ret.replace("\n\n", '')
|
419 |
+
ret = ret.replace('\n', '<br>')
|
420 |
+
if ret == '':
|
421 |
+
ret = f"Openai said: I'm too tired."
|
422 |
+
return ret, response.usage
|
423 |
+
except Exception as e:
|
424 |
+
logger.info(f"openai_create_error__{e}")
|
425 |
+
ret = f"Openai said: {e} Perhaps enter your OpenAI API key."
|
426 |
+
return ret, {"completion_tokens": -1, "prompt_tokens": -1, "total_tokens": -1}
|
427 |
+
|
428 |
+
# logger.info(f'chat_history = {chat_history}')
|
429 |
+
chat_history_list = []
|
430 |
+
chat_history = chat_history.replace("<p>", "").replace("</p>", "")
|
431 |
+
if chat_history != '':
|
432 |
+
chat_history_list = json.loads(chat_history)
|
433 |
+
chat_history_list.append(f'☟:{input}')
|
434 |
+
|
435 |
+
output, response_usage = openai_create(chat_history_list, model_radio)
|
436 |
+
logger.info(f'response_usage={response_usage}')
|
437 |
+
return output
|
438 |
+
|
439 |
+
def chat(input0, input1, chat_radio, model_radio, all_chat_history, chat_history):
|
440 |
+
all_chat = []
|
441 |
+
if all_chat_history != '':
|
442 |
+
all_chat = json.loads(all_chat_history)
|
443 |
+
|
444 |
+
if len(input0) == 0:
|
445 |
+
return all_chat, json.dumps(all_chat), input0, input1
|
446 |
+
|
447 |
+
if chat_radio == "Talk to chatGPT":
|
448 |
+
response = get_response_from_openai(input0, chat_history, model_radio)
|
449 |
+
all_chat.append((input0, response))
|
450 |
+
return all_chat, json.dumps(all_chat), '', input1
|
451 |
+
else:
|
452 |
+
prompt_en = getTextTrans(input0, source='zh', target='en') + f',{random.randint(0,sys.maxsize)}'
|
453 |
+
return all_chat, json.dumps(all_chat), input0, prompt_en
|
454 |
+
|
455 |
+
def chat_radio_change(chat_radio):
|
456 |
+
if chat_radio == "Talk to chatGPT":
|
457 |
+
return gr.Radio.update(visible=True), gr.Text.update(visible=True)
|
458 |
+
else:
|
459 |
+
return gr.Radio.update(visible=False), gr.Text.update(visible=False)
|
460 |
+
|
461 |
+
with gr.Blocks(title='Talk to chatGPT') as demo:
|
462 |
+
with gr.Row(elem_id="page_0", visible=False) as page_0:
|
463 |
+
gr.HTML("<p>You can duplicating this space and use your own session token: <a style='display:inline-block' href='https://huggingface.co/spaces/yizhangliu/chatGPT?duplicate=true'><img src='https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=&logoWidth=14' alt='Duplicate Space'></a></p>")
|
464 |
+
with gr.Group(elem_id="page_1", visible=True) as page_1:
|
465 |
+
with gr.Box():
|
466 |
+
with gr.Row():
|
467 |
+
start_button = gr.Button("Let's talk to chatGPT!", elem_id="start-btn", visible=True)
|
468 |
+
start_button.click(fn=None, inputs=[], outputs=[], _js=start_work)
|
469 |
+
|
470 |
+
with gr.Row(elem_id="page_2", visible=False) as page_2:
|
471 |
+
with gr.Row(elem_id="chat_row"):
|
472 |
+
chatbot = gr.Chatbot(elem_id="chat_bot", visible=False).style(color_map=("green", "blue"))
|
473 |
+
chatbot1 = gr.Chatbot(elem_id="chat_bot1").style(color_map=("green", "blue"))
|
474 |
+
with gr.Row(elem_id="prompt_row"):
|
475 |
+
prompt_input0 = gr.Textbox(lines=2, label="input", elem_id="my_prompt", show_label=True)
|
476 |
+
prompt_input1 = gr.Textbox(lines=4, label="prompt", elem_id="my_prompt_en", visible=False)
|
477 |
+
chat_history = gr.Textbox(lines=4, label="chat_history", elem_id="chat_history", visible=False)
|
478 |
+
all_chat_history = gr.Textbox(lines=4, label="会话上下文:", elem_id="all_chat_history", visible=False)
|
479 |
+
|
480 |
+
chat_radio = gr.Radio(["Talk to chatGPT", "Text to Image"], elem_id="chat_radio",value="Talk to chatGPT", show_label=False, visible=True)
|
481 |
+
model_radio = gr.Radio(["GPT-3.0", "GPT-3.5"], elem_id="model_radio", value="GPT-3.5",
|
482 |
+
label='GPT model: ', show_label=True,interactive=True, visible=True)
|
483 |
+
openai_api_key_textbox = gr.Textbox(placeholder="Paste your OpenAI API key (sk-...) and hit Enter",
|
484 |
+
show_label=False, lines=1, type='password')
|
485 |
+
with gr.Row(elem_id="btns_row"):
|
486 |
+
with gr.Column(id="submit_col"):
|
487 |
+
submit_btn = gr.Button(value = "submit",elem_id="submit-btn").style(
|
488 |
+
margin=True,
|
489 |
+
rounded=(True, True, True, True),
|
490 |
+
width=100
|
491 |
+
)
|
492 |
+
with gr.Column(id="clear_col"):
|
493 |
+
clear_btn = gr.Button(value = "clear outputs", elem_id="clear-btn").style(
|
494 |
+
margin=True,
|
495 |
+
rounded=(True, True, True, True),
|
496 |
+
width=100
|
497 |
+
)
|
498 |
+
submit_btn.click(fn=chat,
|
499 |
+
inputs=[prompt_input0, prompt_input1, chat_radio, model_radio, all_chat_history, chat_history],
|
500 |
+
outputs=[chatbot, all_chat_history, prompt_input0, prompt_input1],
|
501 |
+
)
|
502 |
+
with gr.Row(elem_id='tab_img', visible=False).style(height=5):
|
503 |
+
tab_img = gr.TabbedInterface(tab_actions, tab_titles)
|
504 |
+
|
505 |
+
openai_api_key_textbox.change(set_openai_api_key,
|
506 |
+
inputs=[openai_api_key_textbox],
|
507 |
+
outputs=[])
|
508 |
+
openai_api_key_textbox.submit(set_openai_api_key,
|
509 |
+
inputs=[openai_api_key_textbox],
|
510 |
+
outputs=[])
|
511 |
+
chat_radio.change(fn=chat_radio_change,
|
512 |
+
inputs=[chat_radio],
|
513 |
+
outputs=[model_radio, openai_api_key_textbox],
|
514 |
+
)
|
515 |
+
|
516 |
+
demo.launch(debug = True)
|
baidu_translate/module.py
ADDED
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import random, os
|
3 |
+
from hashlib import md5
|
4 |
+
from typing import Optional
|
5 |
+
|
6 |
+
import requests
|
7 |
+
|
8 |
+
import paddlehub as hub
|
9 |
+
from paddlehub.module.module import moduleinfo
|
10 |
+
from paddlehub.module.module import runnable
|
11 |
+
from paddlehub.module.module import serving
|
12 |
+
|
13 |
+
|
14 |
+
def make_md5(s, encoding='utf-8'):
|
15 |
+
return md5(s.encode(encoding)).hexdigest()
|
16 |
+
|
17 |
+
|
18 |
+
@moduleinfo(name="baidu_translate",
|
19 |
+
version="1.0.0",
|
20 |
+
type="text/machine_translation",
|
21 |
+
summary="",
|
22 |
+
author="baidu-nlp",
|
23 |
+
author_email="paddle-dev@baidu.com")
|
24 |
+
class BaiduTranslate:
|
25 |
+
|
26 |
+
def __init__(self, appid=None, appkey=None):
|
27 |
+
"""
|
28 |
+
:param appid: appid for requesting Baidu translation service.
|
29 |
+
:param appkey: appkey for requesting Baidu translation service.
|
30 |
+
"""
|
31 |
+
appid = os.environ.get('baidu_translate_appid')
|
32 |
+
appkey = os.environ.get('baidu_translate_appkey')
|
33 |
+
# Set your own appid/appkey.
|
34 |
+
if appid is None:
|
35 |
+
self.appid = ''
|
36 |
+
else:
|
37 |
+
self.appid = appid
|
38 |
+
if appkey is None:
|
39 |
+
self.appkey = ''
|
40 |
+
else:
|
41 |
+
self.appkey = appkey
|
42 |
+
self.url = 'http://api.fanyi.baidu.com/api/trans/vip/translate'
|
43 |
+
|
44 |
+
def translate(self, query: str, from_lang: Optional[str] = "en", to_lang: Optional[int] = "zh"):
|
45 |
+
"""
|
46 |
+
Create image by text prompts using ErnieVilG model.
|
47 |
+
|
48 |
+
:param query: Text to be translated.
|
49 |
+
:param from_lang: Source language.
|
50 |
+
:param to_lang: Dst language.
|
51 |
+
|
52 |
+
Return translated string.
|
53 |
+
"""
|
54 |
+
# Generate salt and sign
|
55 |
+
salt = random.randint(32768, 65536)
|
56 |
+
sign = make_md5(self.appid + query + str(salt) + self.appkey)
|
57 |
+
|
58 |
+
# Build request
|
59 |
+
headers = {'Content-Type': 'application/x-www-form-urlencoded'}
|
60 |
+
payload = {'appid': self.appid, 'q': query, 'from': from_lang, 'to': to_lang, 'salt': salt, 'sign': sign}
|
61 |
+
|
62 |
+
# Send request
|
63 |
+
try:
|
64 |
+
r = requests.post(self.url, params=payload, headers=headers)
|
65 |
+
result = r.json()
|
66 |
+
except Exception as e:
|
67 |
+
error_msg = str(e)
|
68 |
+
raise RuntimeError(error_msg)
|
69 |
+
if 'error_code' in result:
|
70 |
+
raise RuntimeError(result['error_msg'])
|
71 |
+
return result['trans_result'][0]['dst']
|
72 |
+
|
73 |
+
@runnable
|
74 |
+
def run_cmd(self, argvs):
|
75 |
+
"""
|
76 |
+
Run as a command.
|
77 |
+
"""
|
78 |
+
self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name),
|
79 |
+
prog='hub run {}'.format(self.name),
|
80 |
+
usage='%(prog)s',
|
81 |
+
add_help=True)
|
82 |
+
self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required")
|
83 |
+
self.add_module_input_arg()
|
84 |
+
args = self.parser.parse_args(argvs)
|
85 |
+
if args.appid is not None and args.appkey is not None:
|
86 |
+
self.appid = args.appid
|
87 |
+
self.appkey = args.appkey
|
88 |
+
result = self.translate(args.query, args.from_lang, args.to_lang)
|
89 |
+
return result
|
90 |
+
|
91 |
+
@serving
|
92 |
+
def serving_method(self, query, from_lang, to_lang):
|
93 |
+
"""
|
94 |
+
Run as a service.
|
95 |
+
"""
|
96 |
+
return self.translate(query, from_lang, to_lang)
|
97 |
+
|
98 |
+
def add_module_input_arg(self):
|
99 |
+
"""
|
100 |
+
Add the command input options.
|
101 |
+
"""
|
102 |
+
self.arg_input_group.add_argument('--query', type=str)
|
103 |
+
self.arg_input_group.add_argument('--from_lang', type=str, default='en', help="源语言")
|
104 |
+
self.arg_input_group.add_argument('--to_lang', type=str, default='zh', help="目标语言")
|
105 |
+
self.arg_input_group.add_argument('--appid', type=str, default=None, help="注册得到的个人appid")
|
106 |
+
self.arg_input_group.add_argument('--appkey', type=str, default=None, help="注册得到的个人appkey")
|
encoder.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
encoder.py
ADDED
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This file includes code which was modified from https://github.com/openai/gpt-2
|
2 |
+
|
3 |
+
import tensorflow as tf
|
4 |
+
import os
|
5 |
+
import json
|
6 |
+
import regex as re
|
7 |
+
from functools import lru_cache
|
8 |
+
import requests
|
9 |
+
import boto3
|
10 |
+
import pdb
|
11 |
+
|
12 |
+
|
13 |
+
@lru_cache()
|
14 |
+
def bytes_to_unicode():
|
15 |
+
|
16 |
+
bs = (
|
17 |
+
list(range(ord("!"), ord("~") + 1))
|
18 |
+
+ list(range(ord("¡"), ord("¬") + 1))
|
19 |
+
+ list(range(ord("®"), ord("ÿ") + 1))
|
20 |
+
)
|
21 |
+
cs = bs[:]
|
22 |
+
n = 0
|
23 |
+
for b in range(2 ** 8):
|
24 |
+
if b not in bs:
|
25 |
+
bs.append(b)
|
26 |
+
cs.append(2 ** 8 + n)
|
27 |
+
n += 1
|
28 |
+
cs = [chr(n) for n in cs]
|
29 |
+
return dict(zip(bs, cs))
|
30 |
+
|
31 |
+
|
32 |
+
def get_pairs(word):
|
33 |
+
pairs = set()
|
34 |
+
prev_char = word[0]
|
35 |
+
for char in word[1:]:
|
36 |
+
pairs.add((prev_char, char))
|
37 |
+
prev_char = char
|
38 |
+
return pairs
|
39 |
+
|
40 |
+
|
41 |
+
class Encoder:
|
42 |
+
def __init__(self, encoder, bpe_merges, errors="replace"):
|
43 |
+
self.encoder = encoder
|
44 |
+
self.decoder = {v: k for k, v in self.encoder.items()}
|
45 |
+
self.errors = errors
|
46 |
+
self.byte_encoder = bytes_to_unicode()
|
47 |
+
self.byte_decoder = {v: k for k, v in self.byte_encoder.items()}
|
48 |
+
self.bpe_ranks = dict(zip(bpe_merges, range(len(bpe_merges))))
|
49 |
+
self.cache = {}
|
50 |
+
self.pat = re.compile(
|
51 |
+
r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+"""
|
52 |
+
)
|
53 |
+
|
54 |
+
def bpe(self, token):
|
55 |
+
if token in self.cache:
|
56 |
+
return self.cache[token]
|
57 |
+
word = tuple(token)
|
58 |
+
|
59 |
+
pairs = get_pairs(word)
|
60 |
+
|
61 |
+
if not pairs:
|
62 |
+
return token
|
63 |
+
|
64 |
+
while True:
|
65 |
+
bigram = min(pairs, key=lambda pair: self.bpe_ranks.get(pair, float("inf")))
|
66 |
+
if bigram not in self.bpe_ranks:
|
67 |
+
break
|
68 |
+
first, second = bigram
|
69 |
+
new_word = []
|
70 |
+
i = 0
|
71 |
+
while i < len(word):
|
72 |
+
try:
|
73 |
+
j = word.index(first, i)
|
74 |
+
new_word.extend(word[i:j])
|
75 |
+
i = j
|
76 |
+
except:
|
77 |
+
new_word.extend(word[i:])
|
78 |
+
break
|
79 |
+
|
80 |
+
if word[i] == first and i < len(word) - 1 and word[i + 1] == second:
|
81 |
+
new_word.append(first + second)
|
82 |
+
i += 2
|
83 |
+
else:
|
84 |
+
new_word.append(word[i])
|
85 |
+
i += 1
|
86 |
+
new_word = tuple(new_word)
|
87 |
+
word = new_word
|
88 |
+
if len(word) == 1:
|
89 |
+
break
|
90 |
+
else:
|
91 |
+
pairs = get_pairs(word)
|
92 |
+
|
93 |
+
word = " ".join(word)
|
94 |
+
self.cache[token] = word
|
95 |
+
return word
|
96 |
+
|
97 |
+
def encode(self, text):
|
98 |
+
bpe_tokens = []
|
99 |
+
for token in re.findall(self.pat, text):
|
100 |
+
token = "".join(self.byte_encoder[b] for b in token.encode("utf-8"))
|
101 |
+
|
102 |
+
bpe_tokens.extend(self.encoder[bpe_token] for bpe_token in self.bpe(token).split(" "))
|
103 |
+
return bpe_tokens
|
104 |
+
|
105 |
+
def decode(self, tokens):
|
106 |
+
text = "".join([self.decoder[token] for token in tokens])
|
107 |
+
text = bytearray([self.byte_decoder[c] for c in text]).decode("utf-8", errors=self.errors)
|
108 |
+
return text
|
109 |
+
|
110 |
+
|
111 |
+
def get_encoder():
|
112 |
+
with open("encoder.json", "r") as f:
|
113 |
+
encoder = json.load(f)
|
114 |
+
with open("vocab.bpe", "r", encoding="utf-8") as f:
|
115 |
+
bpe_data = f.read()
|
116 |
+
bpe_merges = [tuple(merge_str.split()) for merge_str in bpe_data.split("\n")[1:-1]]
|
117 |
+
return Encoder(encoder=encoder, bpe_merges=bpe_merges)
|
118 |
+
|
119 |
+
# encoder = get_encoder()
|
120 |
+
# print('encoded is ', encoder.encode('hello 👋 world 🌍 This is a long string to test whether or not the emoji issue was fixed!'))
|
requirements.txt
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
openai
|
2 |
+
loguru
|
3 |
+
paddlepaddle==2.3.2
|
4 |
+
paddlehub
|
5 |
+
# transformers
|
6 |
+
# torch
|
7 |
+
tensorflow
|
8 |
+
regex
|
9 |
+
boto3
|
10 |
+
gradio==3.19.1
|
11 |
+
tencentcloud-sdk-python
|
utils.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json, os
|
2 |
+
from tencentcloud.common import credential
|
3 |
+
from tencentcloud.common.profile.client_profile import ClientProfile
|
4 |
+
from tencentcloud.common.profile.http_profile import HttpProfile
|
5 |
+
from tencentcloud.common.exception.tencent_cloud_sdk_exception import TencentCloudSDKException
|
6 |
+
from tencentcloud.tmt.v20180321 import tmt_client, models
|
7 |
+
|
8 |
+
def get_tmt_client():
|
9 |
+
try:
|
10 |
+
# 实例化一个认证对象,入参需要传入腾讯云账户 SecretId 和 SecretKey,此处还需注意密钥对的保密
|
11 |
+
# 代码泄露可能会导致 SecretId 和 SecretKey 泄露,并威胁账号下所有资源的安全性。以下代码示例仅供参考,建议采用更安全的方式来使用密钥,请参见:https://cloud.tencent.com/document/product/1278/85305
|
12 |
+
# 密钥可前往官网控制台 https://console.cloud.tencent.com/cam/capi 进行获取
|
13 |
+
SecretId = os.environ.get("TENCENTCLOUD_SECRET_ID")
|
14 |
+
SecretKey = os.environ.get("TENCENTCLOUD_SECRET_KEY")
|
15 |
+
cred = credential.Credential(SecretId, SecretKey)
|
16 |
+
# 实例化一个http选项,可选的,没有特殊需求可以跳过
|
17 |
+
httpProfile = HttpProfile()
|
18 |
+
httpProfile.endpoint = "tmt.tencentcloudapi.com"
|
19 |
+
|
20 |
+
# 实例化一个client选项,可选的,没有特殊需求可以跳过
|
21 |
+
clientProfile = ClientProfile()
|
22 |
+
clientProfile.httpProfile = httpProfile
|
23 |
+
# 实例化要请求产品的client对象,clientProfile是可选的
|
24 |
+
client = tmt_client.TmtClient(cred, "ap-shanghai", clientProfile)
|
25 |
+
print(f'client_{client}')
|
26 |
+
return client
|
27 |
+
except TencentCloudSDKException as err:
|
28 |
+
print(f'client_err_{err}')
|
29 |
+
return None
|
30 |
+
|
31 |
+
def getTextTrans_tmt(tmt_client, text, source='zh', target='en'):
|
32 |
+
def is_chinese(string):
|
33 |
+
for ch in string:
|
34 |
+
if u'\u4e00' <= ch <= u'\u9fff':
|
35 |
+
return True
|
36 |
+
return False
|
37 |
+
|
38 |
+
if tmt_client is None:
|
39 |
+
return text
|
40 |
+
if not is_chinese(text) and target == 'en':
|
41 |
+
return text
|
42 |
+
try:
|
43 |
+
req = models.TextTranslateRequest()
|
44 |
+
params = {
|
45 |
+
"SourceText": text,
|
46 |
+
"Source": source,
|
47 |
+
"Target": target,
|
48 |
+
"ProjectId": 0
|
49 |
+
}
|
50 |
+
req.from_json_string(json.dumps(params))
|
51 |
+
resp = tmt_client.TextTranslate(req)
|
52 |
+
return resp.TargetText
|
53 |
+
except Exception as e:
|
54 |
+
return text
|
vocab.bpe
ADDED
The diff for this file is too large to render.
See raw diff
|
|