File size: 32,673 Bytes
de7836d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95878c0
 
de7836d
 
 
 
95878c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
de7836d
e4ab5ef
de7836d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e4ab5ef
 
de7836d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ddfd056
de7836d
 
 
 
 
 
 
 
 
 
95878c0
e4ab5ef
de7836d
e4ab5ef
 
de7836d
e4ab5ef
 
 
de7836d
 
 
e4ab5ef
 
de7836d
 
e4ab5ef
 
de7836d
 
e4ab5ef
de7836d
 
 
 
 
e4ab5ef
 
de7836d
 
 
 
e4ab5ef
 
de7836d
e4ab5ef
 
de7836d
e4ab5ef
de7836d
 
 
e4ab5ef
 
de7836d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ddfd056
 
 
 
de7836d
ddfd056
de7836d
 
 
 
95878c0
de7836d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e4ab5ef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
de7836d
 
 
 
e4ab5ef
de7836d
 
95878c0
de7836d
 
 
 
 
 
 
 
 
 
e4ab5ef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
de7836d
 
 
 
e4ab5ef
de7836d
1927c6e
e4ab5ef
de7836d
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
'''
AnyText: Multilingual Visual Text Generation And Editing
Paper: https://arxiv.org/abs/2311.03054
Code: https://github.com/tyxsspa/AnyText
Copyright (c) Alibaba, Inc. and its affiliates.
'''
import os
from modelscope.pipelines import pipeline
import cv2
import gradio as gr
import numpy as np
import re
from gradio.components import Component
from util import check_channels, resize_image, save_images
import json
import argparse


BBOX_MAX_NUM = 8
img_save_folder = 'SaveImages'
load_model = True


def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--use_fp32",
        action="store_true",
        default=False,
        help="Whether or not to use fp32 during inference."
    )
    parser.add_argument(
        "--no_translator",
        action="store_true",
        default=False,
        help="Whether or not to use the CH->EN translator, which enable input Chinese prompt and cause ~4GB VRAM."
    )
    parser.add_argument(
        "--font_path",
        type=str,
        default='font/Arial_Unicode.ttf',
        help="path of a font file"
    )
    args = parser.parse_args()
    return args


args = parse_args()
if load_model:
    inference = pipeline('my-anytext-task', model='damo/cv_anytext_text_generation_editing', model_revision='v1.1.2', use_fp16=not args.use_fp32, use_translator=not args.no_translator, font_path=args.font_path)


def count_lines(prompt):
    prompt = prompt.replace('“', '"')
    prompt = prompt.replace('”', '"')
    p = '"(.*?)"'
    strs = re.findall(p, prompt)
    if len(strs) == 0:
        strs = [' ']
    return len(strs)


def generate_rectangles(w, h, n, max_trys=200):
    img = np.zeros((h, w, 1), dtype=np.uint8)
    rectangles = []
    attempts = 0
    n_pass = 0
    low_edge = int(max(w, h)*0.3 if n <= 3 else max(w, h)*0.2)  # ~150, ~100
    while attempts < max_trys:
        rect_w = min(np.random.randint(max((w*0.5)//n, low_edge), w), int(w*0.8))
        ratio = np.random.uniform(4, 10)
        rect_h = max(low_edge, int(rect_w/ratio))
        rect_h = min(rect_h, int(h*0.8))
        # gen rotate angle
        rotation_angle = 0
        rand_value = np.random.rand()
        if rand_value < 0.7:
            pass
        elif rand_value < 0.8:
            rotation_angle = np.random.randint(0, 40)
        elif rand_value < 0.9:
            rotation_angle = np.random.randint(140, 180)
        else:
            rotation_angle = np.random.randint(85, 95)
        # rand position
        x = np.random.randint(0, w - rect_w)
        y = np.random.randint(0, h - rect_h)
        # get vertex
        rect_pts = cv2.boxPoints(((rect_w/2, rect_h/2), (rect_w, rect_h), rotation_angle))
        rect_pts = np.int32(rect_pts)
        # move
        rect_pts += (x, y)
        # check boarder
        if np.any(rect_pts < 0) or np.any(rect_pts[:, 0] >= w) or np.any(rect_pts[:, 1] >= h):
            attempts += 1
            continue
        # check overlap
        if any(check_overlap_polygon(rect_pts, rp) for rp in rectangles):
            attempts += 1
            continue
        n_pass += 1
        cv2.fillPoly(img, [rect_pts], 255)
        rectangles.append(rect_pts)
        if n_pass == n:
            break
    print("attempts:", attempts)
    if len(rectangles) != n:
        raise gr.Error(f'Failed in auto generate positions after {attempts} attempts, try again!')
    return img


def check_overlap_polygon(rect_pts1, rect_pts2):
    poly1 = cv2.convexHull(rect_pts1)
    poly2 = cv2.convexHull(rect_pts2)
    rect1 = cv2.boundingRect(poly1)
    rect2 = cv2.boundingRect(poly2)
    if rect1[0] + rect1[2] >= rect2[0] and rect2[0] + rect2[2] >= rect1[0] and rect1[1] + rect1[3] >= rect2[1] and rect2[1] + rect2[3] >= rect1[1]:
        return True
    return False


def draw_rects(width, height, rects):
    img = np.zeros((height, width, 1), dtype=np.uint8)
    for rect in rects:
        x1 = int(rect[0] * width)
        y1 = int(rect[1] * height)
        w = int(rect[2] * width)
        h = int(rect[3] * height)
        x2 = x1 + w
        y2 = y1 + h
        cv2.rectangle(img, (x1, y1), (x2, y2), 255, -1)
    return img


def process(mode, prompt, pos_radio, sort_radio, revise_pos, show_debug, draw_img, rect_img, ref_img, ori_img, img_count, ddim_steps, w, h, strength, cfg_scale, seed, eta, a_prompt, n_prompt, *rect_list):
    n_lines = count_lines(prompt)
    # Text Generation
    if mode == 'gen':
        # create pos_imgs
        if pos_radio == 'Manual-draw(手绘)':
            if draw_img is not None:
                pos_imgs = 255 - draw_img['image']
                if 'mask' in draw_img:
                    pos_imgs = pos_imgs.astype(np.float32) + draw_img['mask'][..., 0:3].astype(np.float32)
                    pos_imgs = pos_imgs.clip(0, 255).astype(np.uint8)
            else:
                pos_imgs = np.zeros((w, h, 1))
        elif pos_radio == 'Manual-rect(拖框)':
            rect_check = rect_list[:BBOX_MAX_NUM]
            rect_xywh = rect_list[BBOX_MAX_NUM:]
            checked_rects = []
            for idx, c in enumerate(rect_check):
                if c:
                    _xywh = rect_xywh[4*idx:4*(idx+1)]
                    checked_rects += [_xywh]
            pos_imgs = draw_rects(w, h, checked_rects)
        elif pos_radio == 'Auto-rand(随机)':
            pos_imgs = generate_rectangles(w, h, n_lines, max_trys=500)
    # Text Editing
    elif mode == 'edit':
        revise_pos = False  # disable pos revise in edit mode
        if ref_img is None or ori_img is None:
            raise gr.Error('No reference image, please upload one for edit!')
        edit_image = ori_img.clip(1, 255)  # for mask reason
        edit_image = check_channels(edit_image)
        edit_image = resize_image(edit_image, max_length=768)
        h, w = edit_image.shape[:2]
        if isinstance(ref_img, dict) and 'mask' in ref_img and ref_img['mask'].mean() > 0:
            pos_imgs = 255 - edit_image
            edit_mask = cv2.resize(ref_img['mask'][..., 0:3], (w, h))
            pos_imgs = pos_imgs.astype(np.float32) + edit_mask.astype(np.float32)
            pos_imgs = pos_imgs.clip(0, 255).astype(np.uint8)
        else:
            if isinstance(ref_img, dict) and 'image' in ref_img:
                ref_img = ref_img['image']
            pos_imgs = 255 - ref_img  # example input ref_img is used as pos
    cv2.imwrite('pos_imgs.png', 255-pos_imgs[..., ::-1])
    params = {
        "sort_priority": sort_radio,
        "show_debug": show_debug,
        "revise_pos": revise_pos,
        "image_count": img_count,
        "ddim_steps": ddim_steps,
        "image_width": w,
        "image_height": h,
        "strength": strength,
        "cfg_scale": cfg_scale,
        "eta": eta,
        "a_prompt": a_prompt,
        "n_prompt": n_prompt
    }
    input_data = {
        "prompt": prompt,
        "seed": seed,
        "draw_pos": pos_imgs,
        "ori_image": ori_img,
    }
    results, rtn_code, rtn_warning, debug_info = inference(input_data, mode=mode, **params)
    if rtn_code >= 0:
        save_images(results, img_save_folder)
        print(f'Done, result images are saved in: {img_save_folder}')
        if rtn_warning:
            gr.Warning(rtn_warning)
    else:
        raise gr.Error(rtn_warning)
    return results, gr.Markdown(debug_info, visible=show_debug)


def create_canvas(w=512, h=512, c=3, line=5):
    image = np.full((h, w, c), 200, dtype=np.uint8)
    for i in range(h):
        if i % (w//line) == 0:
            image[i, :, :] = 150
    for j in range(w):
        if j % (w//line) == 0:
            image[:, j, :] = 150
    image[h//2-8:h//2+8, w//2-8:w//2+8, :] = [200, 0, 0]
    return image


def resize_w(w, img1, img2):
    if isinstance(img2, dict):
        img2 = img2['image']
    return [cv2.resize(img1, (w, img1.shape[0])), cv2.resize(img2, (w, img2.shape[0]))]


def resize_h(h, img1, img2):
    if isinstance(img2, dict):
        img2 = img2['image']
    return [cv2.resize(img1, (img1.shape[1], h)), cv2.resize(img2, (img2.shape[1], h))]


is_t2i = 'true'
block = gr.Blocks(css='style.css', theme=gr.themes.Soft()).queue()

with open('javascript/bboxHint.js', 'r') as file:
    value = file.read()
escaped_value = json.dumps(value)

with block:
    block.load(fn=None,
               _js=f"""() => {{
               const script = document.createElement("script");
               const text =  document.createTextNode({escaped_value});
               script.appendChild(text);
               document.head.appendChild(script);
               }}""")
    gr.HTML('<div style="text-align: center; margin: 20px auto;"> \
            <img id="banner" src="https://modelscope.cn/api/v1/studio/damo/studio_anytext/repo?Revision=master&FilePath=example_images/banner.png&View=true" alt="anytext"> <br>  \
            [<a href="https://arxiv.org/abs/2311.03054" style="color:blue; font-size:18px;">arXiv</a>] \
            [<a href="https://github.com/tyxsspa/AnyText" style="color:blue; font-size:18px;">Code</a>] \
            [<a href="https://modelscope.cn/models/damo/cv_anytext_text_generation_editing/summary" style="color:blue; font-size:18px;">ModelScope</a>]\
            [<a href="https://huggingface.co/spaces/modelscope/AnyText" style="color:blue; font-size:18px;">HuggingFace</a>]\
            version: 1.1.2 </div>')
    with gr.Row(variant='compact'):
        with gr.Column() as left_part:
            pass
        with gr.Column():
            result_gallery = gr.Gallery(label='Result(结果)', show_label=True, preview=True, columns=2, allow_preview=True, height=600)
            result_info = gr.Markdown('', visible=False)
        with left_part:
            with gr.Accordion('🕹Instructions(说明)', open=False,):
                with gr.Tabs():
                    with gr.Tab("English"):
                        gr.Markdown('<span style="color:#3B5998;font-size:20px">Run Examples</span>')
                        gr.Markdown('<span style="color:#575757;font-size:16px">AnyText has two modes: Text Generation and Text Editing, and we provides a variety of examples. Select one, click on [Run!] button to run.</span>')
                        gr.Markdown('<span style="color:gray;font-size:12px">Please note, before running examples, ensure the manual draw area is empty, otherwise may get wrong results. Additionally, different examples use \
                                     different parameters (such as resolution, seed, etc.). When generate your own, please pay attention to the parameter changes, or refresh the page to restore the default parameters.</span>')
                        gr.Markdown('<span style="color:#3B5998;font-size:20px">Text Generation</span>')
                        gr.Markdown('<span style="color:#575757;font-size:16px">Enter the textual description (in Chinese or English) of the image you want to generate in [Prompt]. Each text line that needs to be generated should be \
                                     enclosed in double quotes. Then, manually draw the specified position for each text line to generate the image.</span>\
                                     <span style="color:red;font-size:16px">The drawing of text positions is crucial to the quality of the resulting image</span>, \
                                     <span style="color:#575757;font-size:16px">please do not draw too casually or too small. The number of positions should match the number of text lines, and the size of each position should be matched \
                                     as closely as possible to the length or width of the corresponding text line. If [Manual-draw] is inconvenient, you can try dragging rectangles [Manual-rect] or random positions [Auto-rand].</span>')
                        gr.Markdown('<span style="color:gray;font-size:12px">When generating multiple lines, each position is matched with the text line according to a certain rule. The [Sort Position] option is used to \
                                     determine whether to prioritize sorting from top to bottom or from left to right. You can open the [Show Debug] option in the parameter settings to observe the text position and glyph image \
                                     in the result. You can also select the [Revise Position] which uses the bounding box of the rendered text as the revised position. However, it is occasionally found that the creativity of the \
                                     generated text is slightly lower using this method.</span>')
                        gr.Markdown('<span style="color:#3B5998;font-size:20px">Text Editing</span>')
                        gr.Markdown('<span style="color:#575757;font-size:16px">Please upload an image in [Ref] as a reference image, then adjust the brush size, and mark the area(s) to be edited. Input the textual description and \
                                     the new text to be modified in [Prompt], then generate the image.</span>')
                        gr.Markdown('<span style="color:gray;font-size:12px">The reference image can be of any resolution, but it will be internally processed with a limit that the longer side cannot exceed 768 pixels, and the \
                                     width and height will both be scaled to multiples of 64.</span>')
                    with gr.Tab("简体中文"):
                        gr.Markdown('<span style="color:#3B5998;font-size:20px">运行示例</span>')
                        gr.Markdown('<span style="color:#575757;font-size:16px">AnyText有两种运行模式:文字生成和文字编辑,每种模式下提供了丰富的示例,选择一个,点击[Run!]即可。</span>')
                        gr.Markdown('<span style="color:gray;font-size:12px">请注意,运行示例前确保手绘位置区域是空的,防止影响示例结果,另外不同示例使用不同的参数(如分辨率,种子数等),如果要自行生成时,请留意参数变化,或刷新页面恢复到默认参数。</span>')
                        gr.Markdown('<span style="color:#3B5998;font-size:20px">文字生成</span>')
                        gr.Markdown('<span style="color:#575757;font-size:16px">在Prompt中输入描述提示词(支持中英文),需要生成的每一行文字用双引号包裹,然后依次手绘指定每行文字的位置,生成图片。</span>\
                                     <span style="color:red;font-size:16px">文字位置的绘制对成图质量很关键</span>, \
                                     <span style="color:#575757;font-size:16px">请不要画的太随意或太小,位置的数量要与文字行数量一致,每个位置的尺寸要与对应的文字行的长短或宽高尽量匹配。如果手绘(Manual-draw)不方便,\
                                     可以尝试拖框矩形(Manual-rect)或随机生成(Auto-rand)。</span>')
                        gr.Markdown('<span style="color:gray;font-size:12px">多行生成时,每个位置按照一定规则排序后与文字行做对应,Sort Position选项用于确定排序时优先从上到下还是从左到右。\
                                     可以在参数设置中打开Show Debug选项,在结果图像中观察文字位置和字形图。也可以勾选Revise Position选项,这样会用渲染文字的外接矩形作为修正后的位置,不过偶尔发现这样生成的文字创造性略低。</span>')
                        gr.Markdown('<span style="color:#3B5998;font-size:20px">文字编辑</span>')
                        gr.Markdown('<span style="color:#575757;font-size:16px">请上传一张待编辑的图片作为参考图(Ref),然后调整笔触大小后,在参考图上涂抹要编辑的位置,在Prompt中输入描述提示词和要修改的文字内容,生成图片。</span>')
                        gr.Markdown('<span style="color:gray;font-size:12px">参考图可以为任意分辨率,但内部处理时会限制长边不能超过768,并且宽高都被缩放为64的整数倍。</span>')
            with gr.Accordion('🛠Parameters(参数)', open=False):
                with gr.Row(variant='compact'):
                    img_count = gr.Slider(label="Image Count(图片数)", minimum=1, maximum=12, value=4, step=1)
                    ddim_steps = gr.Slider(label="Steps(步数)", minimum=1, maximum=100, value=20, step=1)
                with gr.Row(variant='compact'):
                    image_width = gr.Slider(label="Image Width(宽度)", minimum=256, maximum=768, value=512, step=64)
                    image_height = gr.Slider(label="Image Height(高度)", minimum=256, maximum=768, value=512, step=64)
                with gr.Row(variant='compact'):
                    strength = gr.Slider(label="Strength(控制力度)", minimum=0.0, maximum=2.0, value=1.0, step=0.01)
                    cfg_scale = gr.Slider(label="CFG-Scale(CFG强度)", minimum=0.1, maximum=30.0, value=9.0, step=0.1)
                with gr.Row(variant='compact'):
                    seed = gr.Slider(label="Seed(种子数)", minimum=-1, maximum=99999999, step=1, randomize=False, value=-1)
                    eta = gr.Number(label="eta (DDIM)", value=0.0)
                with gr.Row(variant='compact'):
                    show_debug = gr.Checkbox(label='Show Debug(调试信息)', value=False)
                    gr.Markdown('<span style="color:silver;font-size:12px">whether show glyph image and debug information in the result(是否在结果中显示glyph图以及调试信息)</span>')
                a_prompt = gr.Textbox(label="Added Prompt(附加提示词)", value='best quality, extremely detailed,4k, HD, supper legible text,  clear text edges,  clear strokes, neat writing, no watermarks')
                n_prompt = gr.Textbox(label="Negative Prompt(负向提示词)", value='low-res, bad anatomy, extra digit, fewer digits, cropped, worst quality, low quality, watermark, unreadable text, messy words, distorted text, disorganized writing, advertising picture')
            prompt = gr.Textbox(label="Prompt(提示词)")
            with gr.Tabs() as tab_modes:
                with gr.Tab("🖼Text Generation(文字生成)", elem_id='MD-tab-t2i') as mode_gen:
                    pos_radio = gr.Radio(["Manual-draw(手绘)", "Manual-rect(拖框)", "Auto-rand(随机)"], value='Manual-draw(手绘)', label="Pos-Method(位置方式)", info="choose a method to specify text positions(选择方法用于指定文字位置).")
                    with gr.Row():
                        sort_radio = gr.Radio(["↕", "↔"], value='↕', label="Sort Position(位置排序)", info="position sorting priority(位置排序时的优先级)")
                        revise_pos = gr.Checkbox(label='Revise Position(修正位置)', value=False)
                        # gr.Markdown('<span style="color:silver;font-size:12px">try to revise according to text\'s bounding rectangle(尝试通过渲染后的文字行的外接矩形框修正位置)</span>')
                    with gr.Row(variant='compact'):
                        rect_cb_list: list[Component] = []
                        rect_xywh_list: list[Component] = []
                        for i in range(BBOX_MAX_NUM):
                            e = gr.Checkbox(label=f'{i}', value=False, visible=False, min_width='10')
                            x = gr.Slider(label='x', value=0.4, minimum=0.0, maximum=1.0, step=0.0001, elem_id=f'MD-t2i-{i}-x', visible=False)
                            y = gr.Slider(label='y', value=0.4, minimum=0.0, maximum=1.0, step=0.0001, elem_id=f'MD-t2i-{i}-y',  visible=False)
                            w = gr.Slider(label='w', value=0.2, minimum=0.0, maximum=1.0, step=0.0001, elem_id=f'MD-t2i-{i}-w',  visible=False)
                            h = gr.Slider(label='h', value=0.2, minimum=0.0, maximum=1.0, step=0.0001, elem_id=f'MD-t2i-{i}-h',  visible=False)
                            x.change(fn=None, inputs=x, outputs=x, _js=f'v => onBoxChange({is_t2i}, {i}, "x", v)', show_progress=False, queue=False)
                            y.change(fn=None, inputs=y, outputs=y, _js=f'v => onBoxChange({is_t2i}, {i}, "y", v)', show_progress=False, queue=False)
                            w.change(fn=None, inputs=w, outputs=w, _js=f'v => onBoxChange({is_t2i}, {i}, "w", v)', show_progress=False, queue=False)
                            h.change(fn=None, inputs=h, outputs=h, _js=f'v => onBoxChange({is_t2i}, {i}, "h", v)', show_progress=False, queue=False)

                            e.change(fn=None, inputs=e, outputs=e, _js=f'e => onBoxEnableClick({is_t2i}, {i}, e)', queue=False)
                            rect_cb_list.extend([e])
                            rect_xywh_list.extend([x, y, w, h])

                    rect_img = gr.Image(value=create_canvas(), label="Rext Position(方框位置)", elem_id="MD-bbox-rect-t2i", show_label=False, visible=False)
                    draw_img = gr.Image(value=create_canvas(), label="Draw Position(绘制位置)", visible=True, tool='sketch', show_label=False, brush_radius=100)

                    def re_draw():
                        return [gr.Image(value=create_canvas(), tool='sketch'), gr.Slider(value=512), gr.Slider(value=512)]
                    draw_img.clear(re_draw, None, [draw_img, image_width, image_height])
                    image_width.release(resize_w, [image_width, rect_img, draw_img], [rect_img, draw_img])
                    image_height.release(resize_h, [image_height, rect_img, draw_img], [rect_img, draw_img])

                    def change_options(selected_option):
                        return [gr.Checkbox(visible=selected_option == 'Manual-rect(拖框)')] * BBOX_MAX_NUM + \
                                [gr.Image(visible=selected_option == 'Manual-rect(拖框)'),
                                 gr.Image(visible=selected_option == 'Manual-draw(手绘)'),
                                 gr.Radio(visible=selected_option != 'Auto-rand(随机)'),
                                 gr.Checkbox(value=selected_option == 'Auto-rand(随机)')]
                    pos_radio.change(change_options, pos_radio, rect_cb_list + [rect_img, draw_img, sort_radio, revise_pos], show_progress=False, queue=False)
                    with gr.Row():
                        gr.Markdown("")
                        run_gen = gr.Button(value="Run(运行)!", scale=0.3, elem_classes='run')
                        gr.Markdown("")

                    def exp_gen_click():
                        return [gr.Slider(value=512), gr.Slider(value=512)]  # all examples are 512x512, refresh draw_img
                    with gr.Tab("English Examples"):
                        exp_gen_en = gr.Examples(
                            [
                                ['A raccoon stands in front of the blackboard with the words "Deep Learning" written on it', "example_images/gen17.png", "Manual-draw(手绘)", "↕", False, 4, 33789703],
                                ['A crayon drawing by child,  a snowman with a Santa hat, pine trees, outdoors in heavy snowfall, titled "Snowman"', "example_images/gen18.png", "Manual-draw(手绘)", "↕", False, 4, 35621187],
                                ['A meticulously designed logo, a minimalist brain, stick drawing style, simplistic style,  refined with minimal strokes, black and white color, white background,  futuristic sense, exceptional design, logo name is "NextAI"', "example_images/gen19.png", "Manual-draw(手绘)", "↕", False, 4, 2563689],
                                ['A photograph of the colorful graffiti art on the wall with the words "Hi~" "Get Ready" "to" "Party"', "example_images/gen21.png", "Manual-draw(手绘)", "↕", False, 4, 88952132],
                                ['photo of caramel macchiato coffee on the table, top-down perspective, with "Any" "Text" written on it using cream', "example_images/gen9.png", "Manual-draw(手绘)", "↕", False, 4, 66273235],
                                ['A fine sweater with knitted text: "Have" "A" "Good Day"', "example_images/gen20.png", "Manual-draw(手绘)", "↕", False, 4, 35107824],
                                ['Sign on the clean building that reads "科学" and "과학"  and "ステップ" and "SCIENCE"', "example_images/gen6.png", "Manual-draw(手绘)", "↕", True, 4, 13246309],
                                ['A delicate square cake, cream and fruit, with "CHEERS" "to the" and "GRADUATE" written in chocolate', "example_images/gen8.png", "Manual-draw(手绘)", "↕", False, 4, 93424638],
                                ['A nice drawing in pencil of Michael Jackson,  with the words "Micheal" and "Jackson" written on it', "example_images/gen7.png", "Manual-draw(手绘)", "↕", False, 4, 83866922],
                                ['a well crafted ice sculpture that made with "Happy" and "Holidays". Dslr photo, perfect illumination', "example_images/gen11.png", "Manual-draw(手绘)", "↕", True, 4, 64901362],
                            ],
                            [prompt, draw_img, pos_radio, sort_radio, revise_pos, img_count, seed],
                            examples_per_page=5,
                            label=''
                        )
                        exp_gen_en.dataset.click(exp_gen_click, None, [image_width, image_height])
                    with gr.Tab("中文示例"):
                        exp_gen_ch = gr.Examples(
                            [
                                ['一只浣熊站在黑板前,上面写着"深度学习"', "example_images/gen1.png", "Manual-draw(手绘)", "↕", False, 4, 81808278],
                                ['一个儿童蜡笔画,森林里有一个可爱的蘑菇形状的房子,标题是"森林小屋"', "example_images/gen16.png", "Manual-draw(手绘)", "↕", False, 4, 40173333],
                                ['一个精美设计的logo,画的是一个黑白风格的厨师,带着厨师帽,logo下方写着“深夜食堂”', "example_images/gen14.png", "Manual-draw(手绘)", "↕", False, 4, 6970544],
                                ['一张户外雪地靴的电商广告,上面写着 “双12大促!”,“立减50”,“加绒加厚”,“穿脱方便”,“温暖24小时送达”, “包邮”,高级设计感,精美构图', "example_images/gen15.png", "Manual-draw(手绘)", "↕", False, 4, 66980376],
                                ['一个精致的马克杯,上面雕刻着一首中国古诗,内容是 "花落知多少" "夜来风雨声" "处处闻啼鸟" "春眠不觉晓"', "example_images/gen3.png", "Manual-draw(手绘)", "↔", False, 4, 60358279],
                                ['一件精美的毛衣,上面有针织的文字:"通义丹青"', "example_images/gen4.png", "Manual-draw(手绘)", "↕", False, 4, 48769450],
                                ['一个双肩包的特写照,上面用针织文字写着”为了无法“ ”计算的价值“', "example_images/gen12.png", "Manual-draw(手绘)", "↕", False, 4, 35552323],
                                ['一个漂亮的蜡笔画,有行星,宇航员,还有宇宙飞船,上面写的是"去火星旅行", "王小明", "11月1日"', "example_images/gen5.png", "Manual-draw(手绘)", "↕", False, 4, 42328250],
                                ['一个装饰华丽的蛋糕,上面用奶油写着“阿里云”和"APSARA"', "example_images/gen13.png", "Manual-draw(手绘)", "↕", False, 4, 62357019],
                                ['一张关于墙上的彩色涂鸦艺术的摄影作品,上面写着“人工智能" 和 "神经网络"', "example_images/gen10.png", "Manual-draw(手绘)", "↕", False, 4, 64722007],
                                ['一枚中国古代铜钱,  上面的文字是 "康"  "寶" "通" "熙"', "example_images/gen2.png", "Manual-draw(手绘)", "↕", False, 4, 24375031],
                            ],
                            [prompt, draw_img, pos_radio, sort_radio, revise_pos, img_count, seed],
                            examples_per_page=5,
                            label=''
                        )
                        exp_gen_ch.dataset.click(exp_gen_click, None, [image_width, image_height])

                with gr.Tab("🎨Text Editing(文字编辑)") as mode_edit:
                    with gr.Row(variant='compact'):
                        ref_img = gr.Image(label='Ref(参考图)', source='upload')
                        ori_img = gr.Image(label='Ori(原图)', scale=0.4)

                    def upload_ref(x):
                        return [gr.Image(type="numpy", brush_radius=100, tool='sketch'),
                                gr.Image(value=x)]

                    def clear_ref(x):
                        return gr.Image(source='upload', tool=None)
                    ref_img.upload(upload_ref, ref_img, [ref_img, ori_img])
                    ref_img.clear(clear_ref, ref_img, ref_img)
                    with gr.Row():
                        gr.Markdown("")
                        run_edit = gr.Button(value="Run(运行)!", scale=0.3, elem_classes='run')
                        gr.Markdown("")
                    with gr.Tab("English Examples"):
                        gr.Examples(
                            [
                                ['A Minion meme that says "wrong"', "example_images/ref15.jpeg", "example_images/edit15.png", 4, 39934684],
                                ['A pile of fruit with "UIT" written in the middle', "example_images/ref13.jpg", "example_images/edit13.png", 4, 54263567],
                                ['Characters written in chalk on the blackboard that says "DADDY"', "example_images/ref8.jpg", "example_images/edit8.png", 4, 73556391],
                                ['The blackboard says "Here"', "example_images/ref11.jpg", "example_images/edit11.png", 2, 15353513],
                                ['A letter picture that says "THER"', "example_images/ref6.jpg", "example_images/edit6.png", 4, 72321415],
                                ['A cake with colorful characters that reads "EVERYDAY"', "example_images/ref7.jpg", "example_images/edit7.png", 4, 8943410],
                                ['photo of clean sandy beach," " " "', "example_images/ref16.jpeg", "example_images/edit16.png", 4, 85664100],
                            ],
                            [prompt, ori_img, ref_img, img_count, seed],
                            examples_per_page=5,
                            label=''
                        )
                    with gr.Tab("中文示例"):
                        gr.Examples(
                            [
                                ['精美的书法作品,上面写着“志” “存” “高” ”远“', "example_images/ref10.jpg", "example_images/edit10.png", 4, 98053044],
                                ['一个表情包,小猪说 "下班"', "example_images/ref2.jpg", "example_images/edit2.png", 2, 43304008],
                                ['一个中国古代铜钱,上面写着"乾" "隆"', "example_images/ref12.png", "example_images/edit12.png", 4, 89159482],
                                ['一个漫画,上面写着" "', "example_images/ref14.png", "example_images/edit14.png", 4, 94081527],
                                ['一个黄色标志牌,上边写着"不要" 和 "大意"', "example_images/ref3.jpg", "example_images/edit3.png", 2, 64010349],
                                ['一个青铜鼎,上面写着"  "和"  "', "example_images/ref4.jpg", "example_images/edit4.png", 4, 71139289],
                                ['一个建筑物前面的字母标牌, 上面写着 " "', "example_images/ref5.jpg", "example_images/edit5.png", 4, 50416289],
                            ],
                            [prompt, ori_img, ref_img, img_count, seed],
                            examples_per_page=5,
                            label=''
                        )
    ips = [prompt, pos_radio, sort_radio, revise_pos, show_debug, draw_img, rect_img, ref_img, ori_img, img_count, ddim_steps, image_width, image_height, strength, cfg_scale, seed, eta, a_prompt, n_prompt, *(rect_cb_list+rect_xywh_list)]
    run_gen.click(fn=process, inputs=[gr.State('gen')] + ips, outputs=[result_gallery, result_info])
    run_edit.click(fn=process, inputs=[gr.State('edit')] + ips, outputs=[result_gallery, result_info])


block.launch(
    server_name='0.0.0.0', #if os.getenv('GRADIO_LISTEN', '') != '' else "127.0.0.1",
    share=False,
    root_path=f"/{os.getenv('GRADIO_PROXY_PATH')}" if os.getenv('GRADIO_PROXY_PATH') else ""
)