Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,1075 +1,12 @@
|
|
1 |
-
import
|
2 |
-
import re
|
3 |
-
import sys
|
4 |
-
sys.path.insert(0, '.')
|
5 |
-
sys.path.insert(0, '..')
|
6 |
|
7 |
-
|
8 |
-
import gradio as gr
|
9 |
-
os.environ["GRADIO_TEMP_DIR"] = os.path.join(os.getcwd(), 'tmp')
|
10 |
-
import copy
|
11 |
-
import time
|
12 |
-
import shutil
|
13 |
-
import requests
|
14 |
-
from PIL import Image, ImageFile
|
15 |
-
import torch
|
16 |
-
import transformers
|
17 |
-
from transformers import StoppingCriteriaList, AutoTokenizer, AutoModel
|
18 |
|
19 |
-
ImageFile.LOAD_TRUNCATED_IMAGES = True
|
20 |
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
from demo_asset.conversation import CONV_VISION_7132_v2, StoppingCriteriaSub
|
25 |
-
from demo_asset.download import download_image_thread
|
26 |
|
27 |
-
max_section = 60
|
28 |
-
no_change_btn = gr.Button.update()
|
29 |
-
disable_btn = gr.Button.update(interactive=False)
|
30 |
-
enable_btn = gr.Button.update(interactive=True)
|
31 |
-
chat_stream_output = True
|
32 |
-
article_stream_output = True
|
33 |
-
|
34 |
-
|
35 |
-
def get_urls(caption, exclude):
|
36 |
-
headers = {'Content-Type': 'application/json'}
|
37 |
-
json_data = {'caption': caption, 'exclude': exclude, 'need_idxs': True}
|
38 |
-
response = requests.post('https://lingbi.openxlab.org.cn/image/similar',
|
39 |
-
headers=headers,
|
40 |
-
json=json_data)
|
41 |
-
urls = response.json()['data']['image_urls']
|
42 |
-
idx = response.json()['data']['indices']
|
43 |
-
return urls, idx
|
44 |
-
|
45 |
-
|
46 |
-
class Demo_UI:
|
47 |
-
def __init__(self, folder):
|
48 |
-
self.llm_model = AutoModel.from_pretrained(folder, trust_remote_code=True)
|
49 |
-
tokenizer = AutoTokenizer.from_pretrained(folder, trust_remote_code=True)
|
50 |
-
|
51 |
-
self.llm_model.internlm_tokenizer = tokenizer
|
52 |
-
self.llm_model.tokenizer = tokenizer
|
53 |
-
self.llm_model.eval().to('cuda')
|
54 |
-
self.device = 'cuda'
|
55 |
-
print(f" load model done: ", type(self.llm_model))
|
56 |
-
|
57 |
-
self.eoh = self.llm_model.internlm_tokenizer.decode(
|
58 |
-
torch.Tensor([103027]), skip_special_tokens=True)
|
59 |
-
self.eoa = self.llm_model.internlm_tokenizer.decode(
|
60 |
-
torch.Tensor([103028]), skip_special_tokens=True)
|
61 |
-
self.soi_id = len(tokenizer) - 1
|
62 |
-
self.soi_token = '<SOI_TOKEN>'
|
63 |
-
|
64 |
-
self.vis_processor = self.llm_model.vis_processor
|
65 |
-
self.device = 'cuda'
|
66 |
-
|
67 |
-
stop_words_ids = [
|
68 |
-
torch.tensor([943]).to(self.device),
|
69 |
-
torch.tensor([2917, 44930]).to(self.device),
|
70 |
-
torch.tensor([45623]).to(self.device), ### new setting
|
71 |
-
torch.tensor([46323]).to(self.device), ### new setting
|
72 |
-
torch.tensor([103027]).to(self.device), ### new setting
|
73 |
-
torch.tensor([103028]).to(self.device), ### new setting
|
74 |
-
]
|
75 |
-
self.stopping_criteria = StoppingCriteriaList(
|
76 |
-
[StoppingCriteriaSub(stops=stop_words_ids)])
|
77 |
-
self.r2 = re.compile(r'<Seg[0-9]*>')
|
78 |
-
self.max_txt_len = 1680
|
79 |
-
|
80 |
-
def reset(self):
|
81 |
-
self.output_text = ''
|
82 |
-
self.caps = {}
|
83 |
-
self.show_caps = False
|
84 |
-
self.show_ids = {}
|
85 |
-
|
86 |
-
def get_images_xlab(self, caption, loc, exclude):
|
87 |
-
urls, idxs = get_urls(caption.strip()[:53], exclude)
|
88 |
-
print(urls[0])
|
89 |
-
print('download image with url')
|
90 |
-
download_image_thread(urls,
|
91 |
-
folder='articles/' + self.title,
|
92 |
-
index=self.show_ids[loc] * 1000 + loc,
|
93 |
-
num_processes=4)
|
94 |
-
print('image downloaded')
|
95 |
-
return idxs
|
96 |
-
|
97 |
-
def generate(self, text, random, beam, max_length, repetition):
|
98 |
-
input_tokens = self.llm_model.internlm_tokenizer(
|
99 |
-
text, return_tensors="pt",
|
100 |
-
add_special_tokens=True).to(self.llm_model.device)
|
101 |
-
img_embeds = self.llm_model.internlm_model.model.embed_tokens(
|
102 |
-
input_tokens.input_ids)
|
103 |
-
with torch.no_grad():
|
104 |
-
with self.llm_model.maybe_autocast():
|
105 |
-
outputs = self.llm_model.internlm_model.generate(
|
106 |
-
inputs_embeds=img_embeds,
|
107 |
-
stopping_criteria=self.stopping_criteria,
|
108 |
-
do_sample=random,
|
109 |
-
num_beams=beam,
|
110 |
-
max_length=max_length,
|
111 |
-
repetition_penalty=float(repetition),
|
112 |
-
)
|
113 |
-
output_text = self.llm_model.internlm_tokenizer.decode(
|
114 |
-
outputs[0][1:], add_special_tokens=False)
|
115 |
-
output_text = output_text.split('<TOKENS_UNUSED_1>')[0]
|
116 |
-
return output_text
|
117 |
-
|
118 |
-
def generate_text(self, title, beam, repetition, text_num, random):
|
119 |
-
text = ' <|User|>:根据给定标题写一个图文并茂,不重复的文章:{}\n'.format(
|
120 |
-
title) + self.eoh + ' <|Bot|>:'
|
121 |
-
print('random generate:{}'.format(random))
|
122 |
-
output_text = self.generate(text, random, beam, text_num, repetition)
|
123 |
-
return output_text
|
124 |
-
|
125 |
-
def generate_loc(self, text_sections, image_num, progress):
|
126 |
-
full_txt = ''.join(text_sections)
|
127 |
-
input_text = f' <|User|>:给定文章"{full_txt}" 根据上述文章,选择适合插入图像的{image_num}行' + ' \n<TOKENS_UNUSED_0> <|Bot|>:适合插入图像的行是'
|
128 |
-
|
129 |
-
for _ in progress.tqdm([1], desc="image spotting"):
|
130 |
-
output_text = self.generate(input_text,
|
131 |
-
random=False,
|
132 |
-
beam=5,
|
133 |
-
max_length=300,
|
134 |
-
repetition=1.)
|
135 |
-
inject_text = '适合插入图像的行是' + output_text
|
136 |
-
print(inject_text)
|
137 |
-
|
138 |
-
locs = []
|
139 |
-
for m in self.r2.findall(inject_text):
|
140 |
-
locs.append(int(m[4:-1]))
|
141 |
-
print(locs)
|
142 |
-
return inject_text, locs
|
143 |
-
|
144 |
-
def generate_cap(self, text_sections, pos, progress):
|
145 |
-
pasts = ''
|
146 |
-
caps = {}
|
147 |
-
for idx, po in progress.tqdm(enumerate(pos), desc="image captioning"):
|
148 |
-
full_txt = ''.join(text_sections[:po + 2])
|
149 |
-
if idx > 0:
|
150 |
-
past = pasts[:-2] + '。'
|
151 |
-
else:
|
152 |
-
past = pasts
|
153 |
-
|
154 |
-
input_text = f' <|User|>: 给定文章"{full_txt}" {past}给出适合在<Seg{po}>后插入的图像对应的标题。' + ' \n<TOKENS_UNUSED_0> <|Bot|>: 标题是"'
|
155 |
-
|
156 |
-
cap_text = self.generate(input_text,
|
157 |
-
random=False,
|
158 |
-
beam=1,
|
159 |
-
max_length=100,
|
160 |
-
repetition=5.)
|
161 |
-
cap_text = cap_text.split('"')[0].strip()
|
162 |
-
print(cap_text)
|
163 |
-
caps[po] = cap_text
|
164 |
-
|
165 |
-
if idx == 0:
|
166 |
-
pasts = f'现在<Seg{po}>后插入图像对应的标题是"{cap_text}", '
|
167 |
-
else:
|
168 |
-
pasts += f'<Seg{po}>后插入图像对应的标题是"{cap_text}", '
|
169 |
-
|
170 |
-
print(caps)
|
171 |
-
return caps
|
172 |
-
|
173 |
-
def generate_loc_cap(self, text_sections, image_num, progress):
|
174 |
-
inject_text, locs = self.generate_loc(text_sections, image_num,
|
175 |
-
progress)
|
176 |
-
caps = self.generate_cap(text_sections, locs, progress)
|
177 |
-
return caps
|
178 |
-
|
179 |
-
def interleav_wrap(self, img_embeds, text):
|
180 |
-
batch_size = img_embeds.shape[0]
|
181 |
-
im_len = img_embeds.shape[1]
|
182 |
-
text = text[0]
|
183 |
-
text = text.replace('<Img>', '')
|
184 |
-
text = text.replace('</Img>', '')
|
185 |
-
parts = text.split('<ImageHere>')
|
186 |
-
assert batch_size + 1 == len(parts)
|
187 |
-
warp_tokens = []
|
188 |
-
warp_embeds = []
|
189 |
-
warp_attns = []
|
190 |
-
soi = (torch.ones([1, 1]) * self.soi_id).long().to(img_embeds.device)
|
191 |
-
soi_embeds = self.llm_model.internlm_model.model.embed_tokens(soi)
|
192 |
-
temp_len = 0
|
193 |
-
|
194 |
-
for idx, part in enumerate(parts):
|
195 |
-
if len(part) > 0:
|
196 |
-
part_tokens = self.llm_model.internlm_tokenizer(
|
197 |
-
part, return_tensors="pt",
|
198 |
-
add_special_tokens=False).to(img_embeds.device)
|
199 |
-
part_embeds = self.llm_model.internlm_model.model.embed_tokens(
|
200 |
-
part_tokens.input_ids)
|
201 |
-
|
202 |
-
warp_tokens.append(part_tokens.input_ids)
|
203 |
-
warp_embeds.append(part_embeds)
|
204 |
-
temp_len += part_embeds.shape[1]
|
205 |
-
if idx < batch_size:
|
206 |
-
warp_tokens.append(soi.expand(-1, img_embeds[idx].shape[0]))
|
207 |
-
# warp_tokens.append(soi.expand(-1, img_embeds[idx].shape[0] + 1))
|
208 |
-
# warp_embeds.append(soi_embeds) ### 1, 1, C
|
209 |
-
warp_embeds.append(img_embeds[idx].unsqueeze(0)) ### 1, 34, C
|
210 |
-
temp_len += im_len
|
211 |
-
|
212 |
-
if temp_len > self.max_txt_len:
|
213 |
-
break
|
214 |
-
|
215 |
-
warp_embeds = torch.cat(warp_embeds, dim=1)
|
216 |
-
|
217 |
-
return warp_embeds[:, :self.max_txt_len].to(img_embeds.device)
|
218 |
-
|
219 |
-
def align_text(self, samples):
|
220 |
-
text_new = []
|
221 |
-
text = [t + self.eoa + ' </s>' for t in samples["text_input"]]
|
222 |
-
for i in range(len(text)):
|
223 |
-
temp = text[i]
|
224 |
-
temp = temp.replace('###Human', '<|User|>')
|
225 |
-
temp = temp.replace('### Human', '<|User|>')
|
226 |
-
temp = temp.replace('<|User|> :', '<|User|>:')
|
227 |
-
temp = temp.replace('<|User|>: ', '<|User|>:')
|
228 |
-
temp = temp.replace('<|User|>', ' <|User|>')
|
229 |
-
|
230 |
-
temp = temp.replace('###Assistant', '<|Bot|>')
|
231 |
-
temp = temp.replace('### Assistant', '<|Bot|>')
|
232 |
-
temp = temp.replace('<|Bot|> :', '<|Bot|>:')
|
233 |
-
temp = temp.replace('<|Bot|>: ', '<|Bot|>:')
|
234 |
-
temp = temp.replace('<|Bot|>', self.eoh + ' <|Bot|>')
|
235 |
-
if temp.find('<|User|>') > temp.find('<|Bot|>'):
|
236 |
-
temp = temp.replace(' <|User|>', self.eoa + ' <|User|>')
|
237 |
-
text_new.append(temp)
|
238 |
-
#print (temp)
|
239 |
-
return text_new
|
240 |
-
|
241 |
-
def model_select_image(self, output_text, caps, root, progress):
|
242 |
-
print('model_select_image')
|
243 |
-
pre_text = ''
|
244 |
-
pre_img = []
|
245 |
-
pre_text_list = []
|
246 |
-
ans2idx = {'A': 0, 'B': 1, 'C': 2, 'D': 3}
|
247 |
-
selected = {k: 0 for k in caps.keys()}
|
248 |
-
for i, text in enumerate(output_text.split('\n')):
|
249 |
-
pre_text += text + '\n'
|
250 |
-
if i in caps:
|
251 |
-
images = copy.deepcopy(pre_img)
|
252 |
-
for j in range(4):
|
253 |
-
image = Image.open(
|
254 |
-
os.path.join(
|
255 |
-
root, f'temp_{self.show_ids[i] * 1000 + i}_{j}.png'
|
256 |
-
)).convert("RGB")
|
257 |
-
image = self.vis_processor(image)
|
258 |
-
images.append(image)
|
259 |
-
images = torch.stack(images, dim=0)
|
260 |
-
|
261 |
-
pre_text_list.append(pre_text)
|
262 |
-
pre_text = ''
|
263 |
-
|
264 |
-
images = images.cuda()
|
265 |
-
instruct = ' <|User|>:根据给定上下文和候选图像,选择合适的配图:'
|
266 |
-
input_text = '<ImageHere>'.join(
|
267 |
-
pre_text_list
|
268 |
-
) + '\n\n候选图像包括: A.<ImageHere>\nB.<ImageHere>\nC.<ImageHere>\nD.<ImageHere>\n\n<TOKENS_UNUSED_0> <|Bot|>:最合适的图是'
|
269 |
-
input_text = instruct + input_text
|
270 |
-
samples = {}
|
271 |
-
samples['text_input'] = [input_text]
|
272 |
-
self.llm_model.debug_flag = 0
|
273 |
-
with torch.no_grad():
|
274 |
-
with torch.cuda.amp.autocast():
|
275 |
-
img_embeds = self.llm_model.encode_img(images)
|
276 |
-
input_text = self.align_text(samples)
|
277 |
-
img_embeds = self.interleav_wrap(
|
278 |
-
img_embeds, input_text)
|
279 |
-
bos = torch.ones(
|
280 |
-
[1, 1]) * self.llm_model.internlm_tokenizer.bos_token_id
|
281 |
-
bos = bos.long().to(images.device)
|
282 |
-
meta_embeds = self.llm_model.internlm_model.model.embed_tokens(
|
283 |
-
bos)
|
284 |
-
inputs_embeds = torch.cat([meta_embeds, img_embeds], dim=1)
|
285 |
-
|
286 |
-
with torch.cuda.amp.autocast():
|
287 |
-
outputs = self.llm_model.internlm_model.generate(
|
288 |
-
inputs_embeds=inputs_embeds[:, :-2],
|
289 |
-
do_sample=False,
|
290 |
-
num_beams=5,
|
291 |
-
max_length=10,
|
292 |
-
repetition_penalty=1.,
|
293 |
-
)
|
294 |
-
out_text = self.llm_model.internlm_tokenizer.decode(
|
295 |
-
outputs[0][1:], add_special_tokens=False)
|
296 |
-
|
297 |
-
try:
|
298 |
-
answer = out_text[1] if out_text[0] == ' ' else out_text[0]
|
299 |
-
pre_img.append(images[len(pre_img) + ans2idx[answer]].cpu())
|
300 |
-
except:
|
301 |
-
print('Select fail, use first image')
|
302 |
-
answer = 'A'
|
303 |
-
pre_img.append(images[len(pre_img) + ans2idx[answer]].cpu())
|
304 |
-
selected[i] = ans2idx[answer]
|
305 |
-
return selected
|
306 |
-
|
307 |
-
def show_md(self, text_sections, title, caps, selected, show_cap=False):
|
308 |
-
md_shows = []
|
309 |
-
ga_shows = []
|
310 |
-
btn_shows = []
|
311 |
-
cap_textboxs, cap_searchs = [], []
|
312 |
-
editers = []
|
313 |
-
for i in range(len(text_sections)):
|
314 |
-
if i in caps:
|
315 |
-
if show_cap:
|
316 |
-
md = text_sections[
|
317 |
-
i] + '\n' + '<div align="center"> <img src="file/articles/{}/temp_{}_{}.png" width = 500/> {} </div>'.format(
|
318 |
-
title, self.show_ids[i] * 1000 + i, selected[i],
|
319 |
-
caps[i])
|
320 |
-
else:
|
321 |
-
md = text_sections[
|
322 |
-
i] + '\n' + '<div align="center"> <img src="file=articles/{}/temp_{}_{}.png" width = 500/> </div>'.format(
|
323 |
-
title, self.show_ids[i] * 1000 + i, selected[i])
|
324 |
-
img_list = [('articles/{}/temp_{}_{}.png'.format(
|
325 |
-
title, self.show_ids[i] * 1000 + i,
|
326 |
-
j), 'articles/{}/temp_{}_{}.png'.format(
|
327 |
-
title, self.show_ids[i] * 1000 + i, j))
|
328 |
-
for j in range(4)]
|
329 |
-
|
330 |
-
ga_show = gr.Gallery.update(visible=True, value=img_list)
|
331 |
-
ga_shows.append(ga_show)
|
332 |
-
|
333 |
-
btn_show = gr.Button.update(visible=True,
|
334 |
-
value='\U0001f5d1\uFE0F')
|
335 |
-
|
336 |
-
cap_textboxs.append(
|
337 |
-
gr.Textbox.update(visible=True, value=caps[i]))
|
338 |
-
cap_searchs.append(gr.Button.update(visible=True))
|
339 |
-
else:
|
340 |
-
md = text_sections[i]
|
341 |
-
ga_show = gr.Gallery.update(visible=False, value=[])
|
342 |
-
ga_shows.append(ga_show)
|
343 |
-
|
344 |
-
btn_show = gr.Button.update(visible=True, value='\u2795')
|
345 |
-
cap_textboxs.append(gr.Textbox.update(visible=False))
|
346 |
-
cap_searchs.append(gr.Button.update(visible=False))
|
347 |
-
|
348 |
-
md_show = gr.Markdown.update(visible=True, value=md)
|
349 |
-
md_shows.append(md_show)
|
350 |
-
btn_shows.append(btn_show)
|
351 |
-
editers.append(gr.update(visible=True))
|
352 |
-
print(i, md)
|
353 |
-
|
354 |
-
md_hides = []
|
355 |
-
ga_hides = []
|
356 |
-
btn_hides = []
|
357 |
-
for i in range(max_section - len(text_sections)):
|
358 |
-
md_hide = gr.Markdown.update(visible=False, value='')
|
359 |
-
md_hides.append(md_hide)
|
360 |
-
|
361 |
-
btn_hide = gr.Button.update(visible=False)
|
362 |
-
btn_hides.append(btn_hide)
|
363 |
-
editers.append(gr.update(visible=False))
|
364 |
-
|
365 |
-
for i in range(max_section - len(ga_shows)):
|
366 |
-
ga_hide = gr.Gallery.update(visible=False, value=[])
|
367 |
-
ga_hides.append(ga_hide)
|
368 |
-
cap_textboxs.append(gr.Textbox.update(visible=False))
|
369 |
-
cap_searchs.append(gr.Button.update(visible=False))
|
370 |
-
|
371 |
-
return md_shows + md_hides + ga_shows + ga_hides + btn_shows + btn_hides + cap_textboxs + cap_searchs + editers, md_shows
|
372 |
-
|
373 |
-
def generate_article(self,
|
374 |
-
title,
|
375 |
-
beam,
|
376 |
-
repetition,
|
377 |
-
text_num,
|
378 |
-
msi,
|
379 |
-
random,
|
380 |
-
progress=gr.Progress()):
|
381 |
-
self.reset()
|
382 |
-
self.title = title
|
383 |
-
if article_stream_output:
|
384 |
-
text = ' <|User|>:根据给定标题写一个图文并茂,不重复的文章:{}\n'.format(
|
385 |
-
title) + self.eoh + ' <|Bot|>:'
|
386 |
-
input_tokens = self.llm_model.internlm_tokenizer(
|
387 |
-
text, return_tensors="pt",
|
388 |
-
add_special_tokens=True).to(self.llm_model.device)
|
389 |
-
img_embeds = self.llm_model.internlm_model.model.embed_tokens(
|
390 |
-
input_tokens.input_ids)
|
391 |
-
generate_params = dict(
|
392 |
-
inputs_embeds=img_embeds,
|
393 |
-
num_beams=beam,
|
394 |
-
do_sample=random,
|
395 |
-
stopping_criteria=self.stopping_criteria,
|
396 |
-
repetition_penalty=float(repetition),
|
397 |
-
max_length=text_num,
|
398 |
-
bos_token_id=self.llm_model.internlm_tokenizer.bos_token_id,
|
399 |
-
eos_token_id=self.llm_model.internlm_tokenizer.eos_token_id,
|
400 |
-
pad_token_id=self.llm_model.internlm_tokenizer.pad_token_id,
|
401 |
-
)
|
402 |
-
output_text = "▌"
|
403 |
-
with self.generate_with_streaming(**generate_params) as generator:
|
404 |
-
for output in generator:
|
405 |
-
decoded_output = self.llm_model.internlm_tokenizer.decode(
|
406 |
-
output[1:])
|
407 |
-
if output[-1] in [
|
408 |
-
self.llm_model.internlm_tokenizer.eos_token_id
|
409 |
-
]:
|
410 |
-
break
|
411 |
-
output_text = decoded_output.replace('\n', '\n\n') + "▌"
|
412 |
-
yield (output_text,) + (gr.Markdown.update(visible=False),) * (max_section - 1) + (gr.Gallery.update(visible=False),) * max_section + \
|
413 |
-
(gr.Button.update(visible=False),) * max_section + (gr.Textbox.update(visible=False),) * max_section + (gr.Button.update(visible=False),) * max_section + \
|
414 |
-
(gr.update(visible=False),) * max_section + (disable_btn,) * 2
|
415 |
-
time.sleep(0.03)
|
416 |
-
output_text = output_text[:-1]
|
417 |
-
yield (output_text,) + (gr.Markdown.update(visible=False),) * (max_section - 1) + (gr.Gallery.update(visible=False),) * max_section + \
|
418 |
-
(gr.Button.update(visible=False),) * max_section + (gr.Textbox.update(visible=False),) * max_section + (gr.Button.update(visible=False),) * max_section +\
|
419 |
-
(gr.update(visible=False),) * max_section + (disable_btn,) * 2
|
420 |
-
else:
|
421 |
-
output_text = self.generate_text(title, beam, repetition, text_num,
|
422 |
-
random)
|
423 |
-
|
424 |
-
print(output_text)
|
425 |
-
output_text = re.sub(r'(\n[ \t]*)+', '\n', output_text)
|
426 |
-
if output_text[-1] == '\n':
|
427 |
-
output_text = output_text[:-1]
|
428 |
-
print(output_text)
|
429 |
-
output_text = '\n'.join(output_text.split('\n')[:max_section])
|
430 |
-
|
431 |
-
text_sections = output_text.split('\n')
|
432 |
-
idx_text_sections = [
|
433 |
-
f'<Seg{i}>' + ' ' + it + '\n' for i, it in enumerate(text_sections)
|
434 |
-
]
|
435 |
-
caps = self.generate_loc_cap(idx_text_sections, '', progress)
|
436 |
-
#caps = {0: '成都的三日游路线图,包括春熙路、太古里、IFS国金中心、大慈寺、宽窄巷子、奎星楼街、九眼桥(酒吧一条街)、武侯祠、锦里、杜甫草堂、浣花溪公园、青羊宫、金沙遗址博物馆、文殊院、人民公园、熊猫基地、望江楼公园、东郊记忆、建设路小吃街、电子科大清水河校区、三圣乡万福花卉市场、龙湖滨江天街购物广场和返程。', 2: '春熙路的繁华景象,各种时尚潮流的品牌店和美食餐厅鳞次栉比。', 4: 'IFS国金中心的豪华购物中心,拥有众多国际知名品牌的旗舰店和专卖店,同时还有电影院、��身房 配套设施。', 6: '春熙路上的著名景点——太古里,是一个集购物、餐饮、娱乐于一体的高端时尚街区,也是成都著名的网红打卡地之一。', 8: '大慈寺的外观,是一座历史悠久的佛教寺庙,始建于唐朝,有着深厚的文化底蕴和历史价值。'}
|
437 |
-
#self.show_ids = {k:0 for k in caps.keys()}
|
438 |
-
self.show_ids = {k: 1 for k in caps.keys()}
|
439 |
-
|
440 |
-
print(caps)
|
441 |
-
self.ex_idxs = []
|
442 |
-
for loc, cap in progress.tqdm(caps.items(), desc="download image"):
|
443 |
-
#self.show_ids[loc] += 1
|
444 |
-
idxs = self.get_images_xlab(cap, loc, self.ex_idxs)
|
445 |
-
self.ex_idxs.extend(idxs)
|
446 |
-
|
447 |
-
if msi:
|
448 |
-
self.selected = self.model_select_image(output_text, caps,
|
449 |
-
'articles/' + title,
|
450 |
-
progress)
|
451 |
-
else:
|
452 |
-
self.selected = {k: 0 for k in caps.keys()}
|
453 |
-
components, md_shows = self.show_md(text_sections, title, caps,
|
454 |
-
self.selected)
|
455 |
-
self.show_caps = False
|
456 |
-
|
457 |
-
self.output_text = output_text
|
458 |
-
self.caps = caps
|
459 |
-
if article_stream_output:
|
460 |
-
yield components + [enable_btn] * 2
|
461 |
-
else:
|
462 |
-
return components + [enable_btn] * 2
|
463 |
-
|
464 |
-
def adjust_img(self, img_num, progress=gr.Progress()):
|
465 |
-
text_sections = self.output_text.split('\n')
|
466 |
-
idx_text_sections = [
|
467 |
-
f'<Seg{i}>' + ' ' + it + '\n' for i, it in enumerate(text_sections)
|
468 |
-
]
|
469 |
-
img_num = min(img_num, len(text_sections))
|
470 |
-
caps = self.generate_loc_cap(idx_text_sections, int(img_num), progress)
|
471 |
-
#caps = {1:'318川藏线沿途的风景照片', 4:'泸定桥的全景照片', 6:'折多山垭口的全景照片', 8:'稻城亚丁机场的全景照片', 10:'姊妹湖的全景照片'}
|
472 |
-
|
473 |
-
print(caps)
|
474 |
-
sidxs = []
|
475 |
-
for loc, cap in caps.items():
|
476 |
-
if loc in self.show_ids:
|
477 |
-
self.show_ids[loc] += 1
|
478 |
-
else:
|
479 |
-
self.show_ids[loc] = 1
|
480 |
-
idxs = self.get_images_xlab(cap, loc, sidxs)
|
481 |
-
sidxs.extend(idxs)
|
482 |
-
self.sidxs = sidxs
|
483 |
-
|
484 |
-
self.selected = {k: 0 for k in caps.keys()}
|
485 |
-
components, md_shows = self.show_md(text_sections, self.title, caps,
|
486 |
-
self.selected)
|
487 |
-
|
488 |
-
self.caps = caps
|
489 |
-
return components
|
490 |
-
|
491 |
-
def add_delete_image(self, text, status, index):
|
492 |
-
index = int(index)
|
493 |
-
if status == '\U0001f5d1\uFE0F':
|
494 |
-
if index in self.caps:
|
495 |
-
self.caps.pop(index)
|
496 |
-
self.selected.pop(index)
|
497 |
-
md_show = gr.Markdown.update(value=text.split('\n')[0])
|
498 |
-
gallery = gr.Gallery.update(visible=False, value=[])
|
499 |
-
btn_show = gr.Button.update(value='\u2795')
|
500 |
-
cap_textbox = gr.Textbox.update(visible=False)
|
501 |
-
cap_search = gr.Button.update(visible=False)
|
502 |
-
else:
|
503 |
-
md_show = gr.Markdown.update()
|
504 |
-
gallery = gr.Gallery.update(visible=True, value=[])
|
505 |
-
btn_show = gr.Button.update(value='\U0001f5d1\uFE0F')
|
506 |
-
cap_textbox = gr.Textbox.update(visible=True)
|
507 |
-
cap_search = gr.Button.update(visible=True)
|
508 |
-
|
509 |
-
return md_show, gallery, btn_show, cap_textbox, cap_search
|
510 |
-
|
511 |
-
def search_image(self, text, index):
|
512 |
-
index = int(index)
|
513 |
-
if text == '':
|
514 |
-
return gr.Gallery.update()
|
515 |
-
|
516 |
-
if index in self.show_ids:
|
517 |
-
self.show_ids[index] += 1
|
518 |
-
else:
|
519 |
-
self.show_ids[index] = 1
|
520 |
-
self.caps[index] = text
|
521 |
-
idxs = self.get_images_xlab(text, index, self.ex_idxs)
|
522 |
-
self.ex_idxs.extend(idxs)
|
523 |
-
|
524 |
-
img_list = [('articles/{}/temp_{}_{}.png'.format(
|
525 |
-
self.title, self.show_ids[index] * 1000 + index,
|
526 |
-
j), 'articles/{}/temp_{}_{}.png'.format(
|
527 |
-
self.title, self.show_ids[index] * 1000 + index, j))
|
528 |
-
for j in range(4)]
|
529 |
-
ga_show = gr.Gallery.update(visible=True, value=img_list)
|
530 |
-
return ga_show
|
531 |
-
|
532 |
-
def replace_image(self, article, index, evt: gr.SelectData):
|
533 |
-
index = int(index)
|
534 |
-
self.selected[index] = evt.index
|
535 |
-
if '<div align="center">' in article:
|
536 |
-
return re.sub(r'file=.*.png', 'file={}'.format(evt.value), article)
|
537 |
-
else:
|
538 |
-
return article + '\n' + '<div align="center"> <img src="file={}" width = 500/> </div>'.format(
|
539 |
-
evt.value)
|
540 |
-
|
541 |
-
def add_delete_caption(self):
|
542 |
-
self.show_caps = False if self.show_caps else True
|
543 |
-
text_sections = self.output_text.split('\n')
|
544 |
-
components, _ = self.show_md(text_sections,
|
545 |
-
self.title,
|
546 |
-
self.caps,
|
547 |
-
selected=self.selected,
|
548 |
-
show_cap=self.show_caps)
|
549 |
-
return components
|
550 |
-
|
551 |
-
def save(self):
|
552 |
-
folder = 'save_articles/' + self.title
|
553 |
-
if os.path.exists(folder):
|
554 |
-
for item in os.listdir(folder):
|
555 |
-
os.remove(os.path.join(folder, item))
|
556 |
-
os.makedirs(folder, exist_ok=True)
|
557 |
-
|
558 |
-
save_text = ''
|
559 |
-
count = 0
|
560 |
-
if len(self.output_text) > 0:
|
561 |
-
text_sections = self.output_text.split('\n')
|
562 |
-
for i in range(len(text_sections)):
|
563 |
-
if i in self.caps:
|
564 |
-
if self.show_caps:
|
565 |
-
md = text_sections[
|
566 |
-
i] + '\n' + '<div align="center"> <img src="temp_{}_{}.png" width = 500/> {} </div>'.format(
|
567 |
-
self.show_ids[i] * 1000 + i, self.selected[i],
|
568 |
-
self.caps[i])
|
569 |
-
else:
|
570 |
-
md = text_sections[
|
571 |
-
i] + '\n' + '<div align="center"> <img src="temp_{}_{}.png" width = 500/> </div>'.format(
|
572 |
-
self.show_ids[i] * 1000 + i, self.selected[i])
|
573 |
-
count += 1
|
574 |
-
else:
|
575 |
-
md = text_sections[i]
|
576 |
-
|
577 |
-
save_text += md + '\n\n'
|
578 |
-
save_text = save_text[:-2]
|
579 |
-
|
580 |
-
with open(os.path.join(folder, 'io.MD'), 'w') as f:
|
581 |
-
f.writelines(save_text)
|
582 |
-
|
583 |
-
for k in self.caps.keys():
|
584 |
-
shutil.copy(
|
585 |
-
os.path.join(
|
586 |
-
'articles', self.title,
|
587 |
-
f'temp_{self.show_ids[k] * 1000 + k}_{self.selected[k]}.png'
|
588 |
-
), folder)
|
589 |
-
archived = shutil.make_archive(folder, 'zip', folder)
|
590 |
-
return archived
|
591 |
-
|
592 |
-
def get_context_emb(self, state, img_list):
|
593 |
-
prompt = state.get_prompt()
|
594 |
-
print(prompt)
|
595 |
-
prompt_segs = prompt.split('<Img><ImageHere></Img>')
|
596 |
-
|
597 |
-
assert len(prompt_segs) == len(
|
598 |
-
img_list
|
599 |
-
) + 1, "Unmatched numbers of image placeholders and images."
|
600 |
-
seg_tokens = [
|
601 |
-
self.llm_model.internlm_tokenizer(seg,
|
602 |
-
return_tensors="pt",
|
603 |
-
add_special_tokens=i == 0).to(
|
604 |
-
self.device).input_ids
|
605 |
-
for i, seg in enumerate(prompt_segs)
|
606 |
-
]
|
607 |
-
seg_embs = [
|
608 |
-
self.llm_model.internlm_model.model.embed_tokens(seg_t)
|
609 |
-
for seg_t in seg_tokens
|
610 |
-
]
|
611 |
-
mixed_embs = [
|
612 |
-
emb for pair in zip(seg_embs[:-1], img_list) for emb in pair
|
613 |
-
] + [seg_embs[-1]]
|
614 |
-
mixed_embs = torch.cat(mixed_embs, dim=1)
|
615 |
-
return mixed_embs
|
616 |
-
|
617 |
-
def chat_ask(self, state, img_list, text, image):
|
618 |
-
print(1111)
|
619 |
-
state.skip_next = False
|
620 |
-
if len(text) <= 0 and image is None:
|
621 |
-
state.skip_next = True
|
622 |
-
return (state, img_list, state.to_gradio_chatbot(), "",
|
623 |
-
None) + (no_change_btn, ) * 2
|
624 |
-
|
625 |
-
if image is not None:
|
626 |
-
image_pt = self.vis_processor(image).unsqueeze(0).to(0)
|
627 |
-
image_emb = self.llm_model.encode_img(image_pt)
|
628 |
-
img_list.append(image_emb)
|
629 |
-
|
630 |
-
state.append_message(state.roles[0],
|
631 |
-
["<Img><ImageHere></Img>", image])
|
632 |
-
|
633 |
-
if len(state.messages) > 0 and state.messages[-1][0] == state.roles[
|
634 |
-
0] and isinstance(state.messages[-1][1], list):
|
635 |
-
#state.messages[-1][1] = ' '.join([state.messages[-1][1], text])
|
636 |
-
state.messages[-1][1][0] = ' '.join(
|
637 |
-
[state.messages[-1][1][0], text])
|
638 |
-
else:
|
639 |
-
state.append_message(state.roles[0], text)
|
640 |
-
|
641 |
-
print(state.messages)
|
642 |
-
|
643 |
-
state.append_message(state.roles[1], None)
|
644 |
-
|
645 |
-
return (state, img_list, state.to_gradio_chatbot(), "",
|
646 |
-
None) + (disable_btn, ) * 2
|
647 |
-
|
648 |
-
def generate_with_callback(self, callback=None, **kwargs):
|
649 |
-
kwargs.setdefault("stopping_criteria",
|
650 |
-
transformers.StoppingCriteriaList())
|
651 |
-
kwargs["stopping_criteria"].append(Stream(callback_func=callback))
|
652 |
-
with torch.no_grad():
|
653 |
-
with self.llm_model.maybe_autocast():
|
654 |
-
self.llm_model.internlm_model.generate(**kwargs)
|
655 |
-
|
656 |
-
def generate_with_streaming(self, **kwargs):
|
657 |
-
return Iteratorize(self.generate_with_callback, kwargs, callback=None)
|
658 |
-
|
659 |
-
def chat_answer(self, state, img_list, max_output_tokens,
|
660 |
-
repetition_penalty, num_beams, do_sample):
|
661 |
-
# text = '图片中是一幅油画,描绘了红军长征的场景。画面中,一群红军战士正在穿过一片草地,他们身后的旗帜在风中飘扬。'
|
662 |
-
# for i in range(len(text)):
|
663 |
-
# state.messages[-1][-1] = text[:i+1] + "▌"
|
664 |
-
# yield (state, state.to_gradio_chatbot()) + (disable_btn,) * 2
|
665 |
-
# state.messages[-1][-1] = text[:i + 1]
|
666 |
-
# yield (state, state.to_gradio_chatbot()) + (enable_btn, ) * 2
|
667 |
-
# return
|
668 |
-
|
669 |
-
if state.skip_next:
|
670 |
-
return (state, state.to_gradio_chatbot()) + (no_change_btn, ) * 2
|
671 |
-
|
672 |
-
embs = self.get_context_emb(state, img_list)
|
673 |
-
if chat_stream_output:
|
674 |
-
generate_params = dict(
|
675 |
-
inputs_embeds=embs,
|
676 |
-
num_beams=num_beams,
|
677 |
-
do_sample=do_sample,
|
678 |
-
stopping_criteria=self.stopping_criteria,
|
679 |
-
repetition_penalty=float(repetition_penalty),
|
680 |
-
max_length=max_output_tokens,
|
681 |
-
bos_token_id=self.llm_model.internlm_tokenizer.bos_token_id,
|
682 |
-
eos_token_id=self.llm_model.internlm_tokenizer.eos_token_id,
|
683 |
-
pad_token_id=self.llm_model.internlm_tokenizer.pad_token_id,
|
684 |
-
)
|
685 |
-
state.messages[-1][-1] = "▌"
|
686 |
-
with self.generate_with_streaming(**generate_params) as generator:
|
687 |
-
for output in generator:
|
688 |
-
decoded_output = self.llm_model.internlm_tokenizer.decode(
|
689 |
-
output[1:])
|
690 |
-
if output[-1] in [
|
691 |
-
self.llm_model.internlm_tokenizer.eos_token_id, 333, 497
|
692 |
-
]:
|
693 |
-
break
|
694 |
-
state.messages[-1][-1] = decoded_output + "▌"
|
695 |
-
yield (state,
|
696 |
-
state.to_gradio_chatbot()) + (disable_btn, ) * 2
|
697 |
-
time.sleep(0.03)
|
698 |
-
state.messages[-1][-1] = state.messages[-1][-1][:-1]
|
699 |
-
yield (state, state.to_gradio_chatbot()) + (enable_btn, ) * 2
|
700 |
-
return
|
701 |
-
else:
|
702 |
-
outputs = self.llm_model.internlm_model.generate(
|
703 |
-
inputs_embeds=embs,
|
704 |
-
max_new_tokens=max_output_tokens,
|
705 |
-
stopping_criteria=self.stopping_criteria,
|
706 |
-
num_beams=num_beams,
|
707 |
-
#temperature=float(temperature),
|
708 |
-
do_sample=do_sample,
|
709 |
-
repetition_penalty=float(repetition_penalty),
|
710 |
-
bos_token_id=self.llm_model.internlm_tokenizer.bos_token_id,
|
711 |
-
eos_token_id=self.llm_model.internlm_tokenizer.eos_token_id,
|
712 |
-
pad_token_id=self.llm_model.internlm_tokenizer.pad_token_id,
|
713 |
-
)
|
714 |
-
|
715 |
-
output_token = outputs[0]
|
716 |
-
if output_token[0] == 0:
|
717 |
-
output_token = output_token[1:]
|
718 |
-
output_text = self.llm_model.internlm_tokenizer.decode(
|
719 |
-
output_token, add_special_tokens=False)
|
720 |
-
print(output_text)
|
721 |
-
output_text = output_text.split('<TOKENS_UNUSED_1>')[
|
722 |
-
0] # remove the stop sign '###'
|
723 |
-
output_text = output_text.split('Assistant:')[-1].strip()
|
724 |
-
output_text = output_text.replace("<s>", "")
|
725 |
-
state.messages[-1][1] = output_text
|
726 |
-
|
727 |
-
return (state, state.to_gradio_chatbot()) + (enable_btn, ) * 2
|
728 |
-
|
729 |
-
def clear_answer(self, state):
|
730 |
-
state.messages[-1][-1] = None
|
731 |
-
return (state, state.to_gradio_chatbot())
|
732 |
-
|
733 |
-
def chat_clear_history(self):
|
734 |
-
state = CONV_VISION_7132_v2.copy()
|
735 |
-
return (state, [], state.to_gradio_chatbot(), "",
|
736 |
-
None) + (disable_btn, ) * 2
|
737 |
-
|
738 |
-
|
739 |
-
def load_demo():
|
740 |
-
state = CONV_VISION_7132_v2.copy()
|
741 |
-
|
742 |
-
return (state, [], gr.Chatbot.update(visible=True),
|
743 |
-
gr.Textbox.update(visible=True), gr.Button.update(visible=True),
|
744 |
-
gr.Row.update(visible=True), gr.Accordion.update(visible=True))
|
745 |
-
|
746 |
-
|
747 |
-
def change_language(lang):
|
748 |
-
if lang == '中文':
|
749 |
-
lang_btn = gr.update(value='English')
|
750 |
-
title = gr.update(label='根据给定标题写一个图文并茂的文章:')
|
751 |
-
btn = gr.update(value='生成')
|
752 |
-
parameter_article = gr.update(label='高级设置')
|
753 |
-
|
754 |
-
beam = gr.update(label='集束大小')
|
755 |
-
repetition = gr.update(label='重复惩罚')
|
756 |
-
text_num = gr.update(label='最多输出字数')
|
757 |
-
msi = gr.update(label='模型选图')
|
758 |
-
random = gr.update(label='采样')
|
759 |
-
img_num = gr.update(label='生成文章后,可选择全文配图数量')
|
760 |
-
adjust_btn = gr.update(value='固定数量配图')
|
761 |
-
cap_searchs, editers = [], []
|
762 |
-
for _ in range(max_section):
|
763 |
-
cap_searchs.append(gr.update(value='搜索'))
|
764 |
-
editers.append(gr.update(label='编辑'))
|
765 |
-
|
766 |
-
save_btn = gr.update(value='文章下载')
|
767 |
-
save_file = gr.update(label='文章下载')
|
768 |
-
|
769 |
-
parameter_chat = gr.update(label='参数')
|
770 |
-
chat_text_num = gr.update(label='最多输出字数')
|
771 |
-
chat_beam = gr.update(label='集束大小')
|
772 |
-
chat_repetition = gr.update(label='重复惩罚')
|
773 |
-
chat_random = gr.update(label='采样')
|
774 |
-
|
775 |
-
chat_textbox = gr.update(placeholder='输入聊天内容并回车')
|
776 |
-
submit_btn = gr.update(value='提交')
|
777 |
-
regenerate_btn = gr.update(value='🔄 重新生成')
|
778 |
-
clear_btn = gr.update(value='🗑️ 清空聊天框')
|
779 |
-
elif lang == 'English':
|
780 |
-
lang_btn = gr.update(value='中文')
|
781 |
-
title = gr.update(
|
782 |
-
label='Write an illustrated article based on the given title:')
|
783 |
-
btn = gr.update(value='Submit')
|
784 |
-
parameter_article = gr.update(label='Advanced Settings')
|
785 |
-
|
786 |
-
beam = gr.update(label='Beam Size')
|
787 |
-
repetition = gr.update(label='Repetition_penalty')
|
788 |
-
text_num = gr.update(label='Max output tokens')
|
789 |
-
msi = gr.update(label='Model selects images')
|
790 |
-
random = gr.update(label='Do_sample')
|
791 |
-
img_num = gr.update(
|
792 |
-
label=
|
793 |
-
'Select the number of the inserted image after article generation.'
|
794 |
-
)
|
795 |
-
adjust_btn = gr.update(value='Insert a fixed number of images')
|
796 |
-
cap_searchs, editers = [], []
|
797 |
-
for _ in range(max_section):
|
798 |
-
cap_searchs.append(gr.update(value='Search'))
|
799 |
-
editers.append(gr.update(label='edit'))
|
800 |
-
|
801 |
-
save_btn = gr.update(value='Save article')
|
802 |
-
save_file = gr.update(label='Save article')
|
803 |
-
|
804 |
-
parameter_chat = gr.update(label='Parameters')
|
805 |
-
chat_text_num = gr.update(label='Max output tokens')
|
806 |
-
chat_beam = gr.update(label='Beam Size')
|
807 |
-
chat_repetition = gr.update(label='Repetition_penalty')
|
808 |
-
chat_random = gr.update(label='Do_sample')
|
809 |
-
|
810 |
-
chat_textbox = gr.update(placeholder='Enter text and press ENTER')
|
811 |
-
submit_btn = gr.update(value='Submit')
|
812 |
-
regenerate_btn = gr.update(value='🔄 Regenerate')
|
813 |
-
clear_btn = gr.update(value='🗑️ Clear history')
|
814 |
-
|
815 |
-
return [lang_btn, title, btn, parameter_article, beam, repetition, text_num, msi, random, img_num, adjust_btn] +\
|
816 |
-
cap_searchs + editers + [save_btn, save_file] +[parameter_chat, chat_text_num, chat_beam, chat_repetition, chat_random] + \
|
817 |
-
[chat_textbox, submit_btn, regenerate_btn, clear_btn]
|
818 |
-
|
819 |
-
|
820 |
-
parser = argparse.ArgumentParser()
|
821 |
-
parser.add_argument("--folder", default='internlm/internlm-xcomposer-7b')
|
822 |
-
parser.add_argument("--private", default=False, action='store_true')
|
823 |
-
args = parser.parse_args()
|
824 |
-
demo_ui = Demo_UI(args.folder)
|
825 |
-
|
826 |
-
with gr.Blocks(css=custom_css, title='浦语·灵笔 (InternLM-XComposer)') as demo:
|
827 |
-
with gr.Row():
|
828 |
-
with gr.Column(scale=20):
|
829 |
-
#gr.HTML("""<h1 align="center" id="space-title" style="font-size:35px;">🤗 浦语·灵笔 (InternLM-XComposer)</h1>""")
|
830 |
-
gr.HTML(
|
831 |
-
"""<h1 align="center"><img src="https://raw.githubusercontent.com/panzhang0212/interleaved_io/main/logo.png", alt="InternLM-XComposer" border="0" style="margin: 0 auto; height: 200px;" /></a> </h1>"""
|
832 |
-
)
|
833 |
-
with gr.Column(scale=1, min_width=100):
|
834 |
-
lang_btn = gr.Button("中文")
|
835 |
-
|
836 |
-
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
837 |
-
with gr.TabItem("📝 创作图文并茂文章 (Write Interleaved-text-image Article)"):
|
838 |
-
with gr.Row():
|
839 |
-
title = gr.Textbox(
|
840 |
-
label=
|
841 |
-
'Write an illustrated article based on the given title:',
|
842 |
-
scale=2)
|
843 |
-
btn = gr.Button("Submit", scale=1)
|
844 |
-
|
845 |
-
with gr.Row():
|
846 |
-
img_num = gr.Slider(
|
847 |
-
minimum=1.0,
|
848 |
-
maximum=30.0,
|
849 |
-
value=5.0,
|
850 |
-
step=1.0,
|
851 |
-
scale=2,
|
852 |
-
label=
|
853 |
-
'Select the number of the inserted image after article generation.'
|
854 |
-
)
|
855 |
-
adjust_btn = gr.Button('Insert a fixed number of images',
|
856 |
-
interactive=False,
|
857 |
-
scale=1)
|
858 |
-
|
859 |
-
with gr.Row():
|
860 |
-
with gr.Column(scale=1):
|
861 |
-
with gr.Accordion("Advanced Settings",
|
862 |
-
open=False,
|
863 |
-
visible=True) as parameter_article:
|
864 |
-
beam = gr.Slider(minimum=1.0,
|
865 |
-
maximum=6.0,
|
866 |
-
value=5.0,
|
867 |
-
step=1.0,
|
868 |
-
label='Beam Size')
|
869 |
-
repetition = gr.Slider(minimum=0.0,
|
870 |
-
maximum=10.0,
|
871 |
-
value=5.0,
|
872 |
-
step=0.1,
|
873 |
-
label='Repetition_penalty')
|
874 |
-
text_num = gr.Slider(minimum=100.0,
|
875 |
-
maximum=2000.0,
|
876 |
-
value=1000.0,
|
877 |
-
step=1.0,
|
878 |
-
label='Max output tokens')
|
879 |
-
msi = gr.Checkbox(value=True,
|
880 |
-
label='Model selects images')
|
881 |
-
random = gr.Checkbox(label='Do_sample')
|
882 |
-
|
883 |
-
with gr.Column(scale=1):
|
884 |
-
gr.Examples(
|
885 |
-
examples=[["又见��煌"], ["星链新闻稿"], ["如何养好一只宠物"],
|
886 |
-
["Shanghai Travel Guide in English"], ["Travel guidance of London in English"], ["Advertising for Genshin Impact in English"]],
|
887 |
-
inputs=[title],
|
888 |
-
)
|
889 |
-
|
890 |
-
articles = []
|
891 |
-
gallerys = []
|
892 |
-
add_delete_btns = []
|
893 |
-
cap_textboxs = []
|
894 |
-
cap_searchs = []
|
895 |
-
editers = []
|
896 |
-
with gr.Column():
|
897 |
-
for i in range(max_section):
|
898 |
-
with gr.Row():
|
899 |
-
visible = True if i == 0 else False
|
900 |
-
with gr.Column(scale=2):
|
901 |
-
article = gr.Markdown(visible=visible,
|
902 |
-
elem_classes='feedback')
|
903 |
-
articles.append(article)
|
904 |
-
|
905 |
-
with gr.Column(scale=1):
|
906 |
-
with gr.Accordion('edit',
|
907 |
-
open=False,
|
908 |
-
visible=False) as editer:
|
909 |
-
with gr.Row():
|
910 |
-
cap_textbox = gr.Textbox(show_label=False,
|
911 |
-
interactive=True,
|
912 |
-
scale=6,
|
913 |
-
visible=False)
|
914 |
-
cap_search = gr.Button(value="Search",
|
915 |
-
visible=False,
|
916 |
-
scale=1)
|
917 |
-
with gr.Row():
|
918 |
-
gallery = gr.Gallery(visible=False,
|
919 |
-
columns=2,
|
920 |
-
height='auto')
|
921 |
-
|
922 |
-
add_delete_btn = gr.Button(visible=False)
|
923 |
-
|
924 |
-
gallery.select(demo_ui.replace_image, [
|
925 |
-
articles[i],
|
926 |
-
gr.Number(value=i, visible=False)
|
927 |
-
], articles[i])
|
928 |
-
gallerys.append(gallery)
|
929 |
-
add_delete_btns.append(add_delete_btn)
|
930 |
-
|
931 |
-
cap_textboxs.append(cap_textbox)
|
932 |
-
cap_searchs.append(cap_search)
|
933 |
-
editers.append(editer)
|
934 |
-
|
935 |
-
save_btn = gr.Button("Save article")
|
936 |
-
save_file = gr.File(label="Save article")
|
937 |
-
|
938 |
-
for i in range(max_section):
|
939 |
-
add_delete_btns[i].click(demo_ui.add_delete_image,
|
940 |
-
inputs=[
|
941 |
-
articles[i],
|
942 |
-
add_delete_btns[i],
|
943 |
-
gr.Number(value=i,
|
944 |
-
visible=False)
|
945 |
-
],
|
946 |
-
outputs=[
|
947 |
-
articles[i], gallerys[i],
|
948 |
-
add_delete_btns[i],
|
949 |
-
cap_textboxs[i],
|
950 |
-
cap_searchs[i]
|
951 |
-
])
|
952 |
-
cap_searchs[i].click(demo_ui.search_image,
|
953 |
-
inputs=[
|
954 |
-
cap_textboxs[i],
|
955 |
-
gr.Number(value=i, visible=False)
|
956 |
-
],
|
957 |
-
outputs=gallerys[i])
|
958 |
-
|
959 |
-
btn.click(
|
960 |
-
demo_ui.generate_article,
|
961 |
-
inputs=[title, beam, repetition, text_num, msi, random],
|
962 |
-
outputs=articles + gallerys + add_delete_btns +
|
963 |
-
cap_textboxs + cap_searchs + editers + [btn, adjust_btn])
|
964 |
-
# cap_btn.click(demo_ui.add_delete_caption, inputs=None, outputs=articles)
|
965 |
-
save_btn.click(demo_ui.save, inputs=None, outputs=save_file)
|
966 |
-
adjust_btn.click(demo_ui.adjust_img,
|
967 |
-
inputs=img_num,
|
968 |
-
outputs=articles + gallerys +
|
969 |
-
add_delete_btns + cap_textboxs + cap_searchs +
|
970 |
-
editers)
|
971 |
-
|
972 |
-
with gr.TabItem("💬 多模态对话 (Multimodal Chat)", elem_id="chat", id=0):
|
973 |
-
chat_state = gr.State()
|
974 |
-
img_list = gr.State()
|
975 |
-
with gr.Row():
|
976 |
-
with gr.Column(scale=3):
|
977 |
-
imagebox = gr.Image(type="pil")
|
978 |
-
|
979 |
-
with gr.Accordion("Parameters", open=True,
|
980 |
-
visible=False) as parameter_row:
|
981 |
-
chat_max_output_tokens = gr.Slider(
|
982 |
-
minimum=0,
|
983 |
-
maximum=1024,
|
984 |
-
value=512,
|
985 |
-
step=64,
|
986 |
-
interactive=True,
|
987 |
-
label="Max output tokens",
|
988 |
-
)
|
989 |
-
chat_num_beams = gr.Slider(
|
990 |
-
minimum=1,
|
991 |
-
maximum=5,
|
992 |
-
value=3,
|
993 |
-
step=1,
|
994 |
-
interactive=True,
|
995 |
-
label="Beam Size",
|
996 |
-
)
|
997 |
-
chat_repetition_penalty = gr.Slider(
|
998 |
-
minimum=1,
|
999 |
-
maximum=5,
|
1000 |
-
value=1,
|
1001 |
-
step=0.1,
|
1002 |
-
interactive=True,
|
1003 |
-
label="Repetition_penalty",
|
1004 |
-
)
|
1005 |
-
# chat_temperature = gr.Slider(minimum=0, maximum=1, value=1, step=0.1, interactive=True,
|
1006 |
-
# label="Temperature", )
|
1007 |
-
chat_do_sample = gr.Checkbox(interactive=True,
|
1008 |
-
value=True,
|
1009 |
-
label="Do_sample")
|
1010 |
-
|
1011 |
-
with gr.Column(scale=6):
|
1012 |
-
chatbot = grChatbot(elem_id="chatbot",
|
1013 |
-
visible=False,
|
1014 |
-
height=750)
|
1015 |
-
with gr.Row():
|
1016 |
-
with gr.Column(scale=8):
|
1017 |
-
chat_textbox = gr.Textbox(
|
1018 |
-
show_label=False,
|
1019 |
-
placeholder="Enter text and press ENTER",
|
1020 |
-
visible=False).style(container=False)
|
1021 |
-
with gr.Column(scale=1, min_width=60):
|
1022 |
-
submit_btn = gr.Button(value="Submit",
|
1023 |
-
visible=False)
|
1024 |
-
with gr.Row(visible=True) as button_row:
|
1025 |
-
regenerate_btn = gr.Button(value="🔄 Regenerate",
|
1026 |
-
interactive=False)
|
1027 |
-
clear_btn = gr.Button(value="🗑️ Clear history",
|
1028 |
-
interactive=False)
|
1029 |
-
|
1030 |
-
btn_list = [regenerate_btn, clear_btn]
|
1031 |
-
parameter_list = [
|
1032 |
-
chat_max_output_tokens, chat_repetition_penalty,
|
1033 |
-
chat_num_beams, chat_do_sample
|
1034 |
-
]
|
1035 |
-
|
1036 |
-
chat_textbox.submit(
|
1037 |
-
demo_ui.chat_ask,
|
1038 |
-
[chat_state, img_list, chat_textbox, imagebox],
|
1039 |
-
[chat_state, img_list, chatbot, chat_textbox, imagebox] +
|
1040 |
-
btn_list).then(demo_ui.chat_answer,
|
1041 |
-
[chat_state, img_list] + parameter_list,
|
1042 |
-
[chat_state, chatbot] + btn_list)
|
1043 |
-
submit_btn.click(
|
1044 |
-
demo_ui.chat_ask,
|
1045 |
-
[chat_state, img_list, chat_textbox, imagebox],
|
1046 |
-
[chat_state, img_list, chatbot, chat_textbox, imagebox] +
|
1047 |
-
btn_list).then(demo_ui.chat_answer,
|
1048 |
-
[chat_state, img_list] + parameter_list,
|
1049 |
-
[chat_state, chatbot] + btn_list)
|
1050 |
-
|
1051 |
-
regenerate_btn.click(demo_ui.clear_answer, chat_state,
|
1052 |
-
[chat_state, chatbot]).then(
|
1053 |
-
demo_ui.chat_answer,
|
1054 |
-
[chat_state, img_list] + parameter_list,
|
1055 |
-
[chat_state, chatbot] + btn_list)
|
1056 |
-
clear_btn.click(
|
1057 |
-
demo_ui.chat_clear_history, None,
|
1058 |
-
[chat_state, img_list, chatbot, chat_textbox, imagebox] +
|
1059 |
-
btn_list)
|
1060 |
-
|
1061 |
-
demo.load(load_demo, None, [
|
1062 |
-
chat_state, img_list, chatbot, chat_textbox, submit_btn,
|
1063 |
-
parameter_row
|
1064 |
-
])
|
1065 |
-
|
1066 |
-
lang_btn.click(change_language, inputs=lang_btn, outputs=[lang_btn, title, btn, parameter_article] +\
|
1067 |
-
[beam, repetition, text_num, msi, random, img_num, adjust_btn] + cap_searchs + editers +\
|
1068 |
-
[save_btn, save_file] + [parameter_row, chat_max_output_tokens, chat_num_beams, chat_repetition_penalty, chat_do_sample] +\
|
1069 |
-
[chat_textbox, submit_btn, regenerate_btn, clear_btn])
|
1070 |
-
demo.queue(concurrency_count=8, status_update_rate=10, api_open=False)
|
1071 |
|
1072 |
if __name__ == "__main__":
|
1073 |
-
|
1074 |
-
|
1075 |
-
|
|
|
1 |
+
from flask import Flask, render_template
|
|
|
|
|
|
|
|
|
2 |
|
3 |
+
app = Flask(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
|
|
|
5 |
|
6 |
+
@app.route("/")
|
7 |
+
def index():
|
8 |
+
return render_template("index.html")
|
|
|
|
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
if __name__ == "__main__":
|
12 |
+
app.run(debug=False, port=7860, host="0.0.0.0")
|
|
|
|