File size: 21,235 Bytes
dbac7c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
523e312
dbac7c5
 
 
 
 
523e312
 
 
 
 
 
 
dbac7c5
 
 
 
47bf3ee
 
 
 
 
 
dbac7c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47bf3ee
dbac7c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f0ce711
dbac7c5
 
 
 
 
 
 
 
 
 
f0ce711
dbac7c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c85903e
dbac7c5
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
##!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time    : 2024-07-31
# @Author  : Junjie He
import gradio as gr

from src.process import (
    text_to_single_id_generation_process,
    text_to_multi_id_generation_process,
    image_to_single_id_generation_process,
)


def text_to_single_id_generation_block():
    gr.Markdown("## Text-to-Single-ID Generation")
    gr.HTML(text_to_single_id_description)
    gr.HTML(text_to_single_id_tips)
    with gr.Row():
        with gr.Column(scale=1, min_width=100):
            prompt = gr.Textbox(value="", label='Prompt', lines=2)
            negative_prompt = gr.Textbox(value="nsfw", label='Negative Prompt')
            image_resolution = gr.Dropdown(choices=["768x512", "512x512", "512x768"], value="512x512",
                                           label="Image Resolution (HxW)")
            run_button = gr.Button(value="Run")

            with gr.Accordion("Advanced Options", open=True):
                seed = gr.Slider(label="Seed (-1 indicates random)", minimum=-1, maximum=2147483647, step=1, value=-1)

                faceid_scale = gr.Slider(label="Face ID Scale", minimum=0.0, maximum=1.0, step=0.01, value=0.7)
                face_structure_scale = gr.Slider(label="Face Structure Scale", minimum=0.0, maximum=1.0,
                                                 step=0.01, value=0.1)

                style_scale = gr.Slider(label="style_scale", minimum=0.0, maximum=1.0, step=0.01, value=0.7)

                use_sr = gr.Checkbox(label="RealESRGAN 2x", value=True)

        with gr.Column(scale=3, min_width=100):
            with gr.Row(equal_height=False):
                pil_faceid = gr.Image(type="pil", label="ID Image")
                with gr.Accordion("ID Supplements", open=True):
                    with gr.Row():
                        pil_supp_faceids = gr.File(file_count="multiple", file_types=["image"],
                                                   type="filepath", label="Additional ID Images")
                    with gr.Row():
                        with gr.Column(scale=1, min_width=100):
                            pil_mix_faceid_1 = gr.Image(type="pil", label="Mix ID 1")
                            mix_scale_1 = gr.Slider(label="Mix Scale 1", minimum=0.0, maximum=1.0, step=0.01, value=0.0)
                        with gr.Column(scale=1, min_width=100):
                            pil_mix_faceid_2 = gr.Image(type="pil", label="Mix ID 2")
                            mix_scale_2 = gr.Slider(label="Mix Scale 2", minimum=0.0, maximum=1.0, step=0.01, value=0.0)
                pil_style = gr.Image(type="pil", label="Style")

            with gr.Row():
                example_output = gr.Image(type="pil", label="(Example Output)", visible=False)
                result_gallery = gr.Gallery(label='Output', show_label=True, elem_id="gallery", columns=4, preview=True,
                                            format="png")
    with gr.Row():
        examples = [
            [
                "A man with short black hair, was paired with a blue denim jacket with yellow details.",
                "assets/examples/1-newton.jpg",
                "assets/No-Image-Placeholder.png",
                "assets/examples/1-output-1.png",
            ],
            [
                "A little boy with short black hair, was paired with a blue denim jacket with yellow details.",
                "assets/examples/1-newton.jpg",
                "assets/No-Image-Placeholder.png",
                "assets/examples/1-output-4.png",
            ],
            [
                "A man with short black hair, was paired with a blue denim jacket with yellow details.",
                "assets/examples/1-newton.jpg",
                "assets/examples/1-style-1.jpg",
                "assets/examples/1-output-2.png",
            ],
            [
                "A man with short black hair, was paired with a blue denim jacket with yellow details.",
                "assets/examples/1-newton.jpg",
                "assets/examples/1-style-2.jpg",
                "assets/examples/1-output-5.png",
            ],
        ]
        gr.Examples(
            label="Examples",
            examples=examples,
            inputs=[prompt, pil_faceid, pil_style, example_output],
        )
    ips = [
        pil_faceid, pil_supp_faceids,
        pil_mix_faceid_1, mix_scale_1,
        pil_mix_faceid_2, mix_scale_2,
        faceid_scale, face_structure_scale,
        prompt, negative_prompt,
        pil_style, style_scale,
        seed, image_resolution, use_sr,
    ]
    run_button.click(fn=text_to_single_id_generation_process, inputs=ips, outputs=[result_gallery])


def text_to_multi_id_generation_block():
    gr.Markdown("## Text-to-Multi-ID Generation")
    gr.HTML(text_to_multi_id_description)
    gr.HTML(text_to_multi_id_tips)
    with gr.Row():
        with gr.Column(scale=1, min_width=100):
            prompt = gr.Textbox(value="", label='Prompt', lines=2)
            negative_prompt = gr.Textbox(value="nsfw", label='Negative Prompt')
            image_resolution = gr.Dropdown(choices=["768x512", "512x512", "512x768"], value="512x512",
                                           label="Image Resolution (HxW)")
            run_button = gr.Button(value="Run")

            with gr.Accordion("Advanced Options", open=True):
                seed = gr.Slider(label="Seed (-1 indicates random)", minimum=-1, maximum=2147483647, step=1, value=-1)

                faceid_scale = gr.Slider(label="Face ID Scale", minimum=0.0, maximum=1.0, step=0.01, value=0.7)
                face_structure_scale = gr.Slider(label="Face Structure Scale", minimum=0.0, maximum=1.0,
                                                 step=0.01, value=0.3)
                style_scale = gr.Slider(label="style_scale", minimum=0.0, maximum=1.0, step=0.01, value=0.7)

                use_sr = gr.Checkbox(label="RealESRGAN 2x", value=True)

        with gr.Column(scale=3, min_width=100):
            with gr.Row(equal_height=False):
                with gr.Column(scale=1, min_width=100):
                    pil_faceid_1st = gr.Image(type="pil", label="First ID")
                    with gr.Accordion("First ID Supplements", open=False):
                        with gr.Row():
                            pil_supp_faceids_1st = gr.File(file_count="multiple", file_types=["image"],
                                                           type="filepath", label="Additional ID Images")
                        with gr.Row():
                            with gr.Column(scale=1, min_width=100):
                                pil_mix_faceid_1_1st = gr.Image(type="pil", label="Mix ID 1")
                                mix_scale_1_1st = gr.Slider(label="Mix Scale 1", minimum=0.0, maximum=1.0, step=0.01,
                                                            value=0.0)
                            with gr.Column(scale=1, min_width=100):
                                pil_mix_faceid_2_1st = gr.Image(type="pil", label="Mix ID 2")
                                mix_scale_2_1st = gr.Slider(label="Mix Scale 2", minimum=0.0, maximum=1.0, step=0.01,
                                                            value=0.0)
                with gr.Column(scale=1, min_width=100):
                    pil_faceid_2nd = gr.Image(type="pil", label="Second ID")
                    with gr.Accordion("Second ID Supplements", open=False):
                        with gr.Row():
                            pil_supp_faceids_2nd = gr.File(file_count="multiple", file_types=["image"],
                                                           type="filepath", label="Additional ID Images")
                        with gr.Row():
                            with gr.Column(scale=1, min_width=100):
                                pil_mix_faceid_1_2nd = gr.Image(type="pil", label="Mix ID 1")
                                mix_scale_1_2nd = gr.Slider(label="Mix Scale 1", minimum=0.0, maximum=1.0, step=0.01,
                                                            value=0.0)
                            with gr.Column(scale=1, min_width=100):
                                pil_mix_faceid_2_2nd = gr.Image(type="pil", label="Mix ID 2")
                                mix_scale_2_2nd = gr.Slider(label="Mix Scale 2", minimum=0.0, maximum=1.0, step=0.01,
                                                            value=0.0)
                with gr.Column(scale=1, min_width=100):
                    pil_style = gr.Image(type="pil", label="Style")

            with gr.Row():
                example_output = gr.Image(type="pil", label="(Example Output)", visible=False)
                result_gallery = gr.Gallery(label='Output', show_label=True, elem_id="gallery", columns=4, preview=True,
                                            format="png")
    with gr.Row():
        examples = [
            [
                "The two women are captured in laughter and joy, their faces brimming with sincere happiness against the backdrop of a peaceful beach at sunset. The painting, depicted in soft style, captures the warmth and tranquility of the moment.",
                "assets/examples/2-stylegan2-ffhq-0100.png",
                "assets/examples/2-stylegan2-ffhq-0293.png",
                "assets/No-Image-Placeholder.png",
                "assets/examples/2-output-1.png",
            ],
            [
                "The two female models are drinking coffee. The background was off-white.",
                "assets/examples/2-stylegan2-ffhq-0100.png",
                "assets/examples/2-stylegan2-ffhq-0293.png",
                "assets/examples/2-style-1.jpg",
                "assets/examples/2-output-2.png",
            ],
        ]
        gr.Examples(
            label="Examples",
            examples=examples,
            inputs=[prompt, pil_faceid_1st, pil_faceid_2nd, pil_style, example_output],
        )
    with gr.Row():
        examples = [
            [
                "Two men in an American poster.",
                "assets/examples/Trump-1.jpg",
                ["assets/examples/Trump-2.jpg", "assets/examples/Trump-3.jpg", "assets/examples/Trump-4.jpg"],
                "assets/examples/Biden-1.jpg",
                ["assets/examples/Biden-2.jpg", "assets/examples/Biden-3.jpg", "assets/examples/Biden-4.jpg"],
                "assets/examples/2-output-4.png",
            ],
            [
                "Two men engaged in a vigorous handshake, both wearing expressions of enthusiasm and determination, set against a backdrop of a bustling business district. The image is crafted in a sleek and modern digital art style, conveying the dynamic and competitive nature of their interaction.",
                "assets/examples/Trump-1.jpg",
                ["assets/examples/Trump-2.jpg", "assets/examples/Trump-3.jpg", "assets/examples/Trump-4.jpg"],
                "assets/examples/Biden-1.jpg",
                ["assets/examples/Biden-2.jpg", "assets/examples/Biden-3.jpg", "assets/examples/Biden-4.jpg"],
                "assets/examples/2-output-3.png",
            ],
        ]
        gr.Examples(
            label="Examples (Multiple References)",
            examples=examples,
            inputs=[prompt, pil_faceid_1st, pil_supp_faceids_1st, pil_faceid_2nd, pil_supp_faceids_2nd, example_output],
        )
    ips = [
        pil_faceid_1st, pil_supp_faceids_1st,
        pil_mix_faceid_1_1st, mix_scale_1_1st,
        pil_mix_faceid_2_1st, mix_scale_2_1st,
        pil_faceid_2nd, pil_supp_faceids_2nd,
        pil_mix_faceid_1_2nd, mix_scale_1_2nd,
        pil_mix_faceid_2_2nd, mix_scale_2_2nd,
        faceid_scale, face_structure_scale,
        prompt, negative_prompt,
        pil_style, style_scale,
        seed, image_resolution, use_sr,
    ]
    run_button.click(fn=text_to_multi_id_generation_process, inputs=ips, outputs=[result_gallery])


def image_to_single_id_generation_block():
    gr.Markdown("## Image-to-Single-ID Generation")
    gr.HTML(image_to_single_id_description)
    gr.HTML(image_to_single_id_tips)
    with gr.Row():
        with gr.Column(scale=1, min_width=100):
            image_resolution = gr.Dropdown(choices=["768x512", "512x512", "512x768"], value="512x512",
                                           label="Image Resolution (HxW)")
            run_button = gr.Button(value="Run")

            with gr.Accordion("Advanced Options", open=True):
                seed = gr.Slider(label="Seed (-1 indicates random)", minimum=-1, maximum=2147483647, step=1, value=-1)

                style_scale = gr.Slider(label="Reference Scale", minimum=0.0, maximum=1.0, step=0.01, value=0.7)
                faceid_scale = gr.Slider(label="Face ID Scale", minimum=0.0, maximum=1.0, step=0.01, value=0.7)
                face_structure_scale = gr.Slider(label="Face Structure Scale", minimum=0.0, maximum=1.0, step=0.01,
                                                 value=0.3)

                use_sr = gr.Checkbox(label="RealESRGAN 2x", value=True)

        with gr.Column(scale=3, min_width=100):
            with gr.Row(equal_height=False):
                pil_style = gr.Image(type="pil", label="Portrait Reference")
                pil_faceid = gr.Image(type="pil", label="ID Image")
                with gr.Accordion("ID Supplements", open=True):
                    with gr.Row():
                        pil_supp_faceids = gr.File(file_count="multiple", file_types=["image"],
                                                   type="filepath", label="Additional ID Images")
                    with gr.Row():
                        with gr.Column(scale=1, min_width=100):
                            pil_mix_faceid_1 = gr.Image(type="pil", label="Mix ID 1")
                            mix_scale_1 = gr.Slider(label="Mix Scale 1", minimum=0.0, maximum=1.0, step=0.01, value=0.0)
                        with gr.Column(scale=1, min_width=100):
                            pil_mix_faceid_2 = gr.Image(type="pil", label="Mix ID 2")
                            mix_scale_2 = gr.Slider(label="Mix Scale 2", minimum=0.0, maximum=1.0, step=0.01, value=0.0)
            with gr.Row():
                with gr.Column(scale=3, min_width=100):
                    example_output = gr.Image(type="pil", label="(Example Output)", visible=False)
                    result_gallery = gr.Gallery(label='Output', show_label=True, elem_id="gallery", columns=4,
                                                preview=True, format="png")
    with gr.Row():
        examples = [
            [
                "assets/examples/3-style-1.png",
                "assets/examples/3-stylegan2-ffhq-0293.png",
                0.7,
                0.3,
                "assets/examples/3-output-1.png",
            ],
            [
                "assets/examples/3-style-1.png",
                "assets/examples/3-stylegan2-ffhq-0293.png",
                0.6,
                0.0,
                "assets/examples/3-output-2.png",
            ],
            [
                "assets/examples/3-style-2.jpg",
                "assets/examples/3-stylegan2-ffhq-0381.png",
                0.7,
                0.3,
                "assets/examples/3-output-3.png",
            ],
            [
                "assets/examples/3-style-3.jpg",
                "assets/examples/3-stylegan2-ffhq-0381.png",
                0.6,
                0.0,
                "assets/examples/3-output-4.png",
            ],
        ]
        gr.Examples(
            label="Examples",
            examples=examples,
            inputs=[pil_style, pil_faceid, faceid_scale, face_structure_scale, example_output],
        )
    ips = [
        pil_faceid, pil_supp_faceids,
        pil_mix_faceid_1, mix_scale_1,
        pil_mix_faceid_2, mix_scale_2,
        faceid_scale, face_structure_scale,
        pil_style, style_scale,
        seed, image_resolution, use_sr,
    ]
    run_button.click(fn=image_to_single_id_generation_process, inputs=ips, outputs=[result_gallery])


if __name__ == "__main__":
    title = r"""
            <div style="text-align: center;">
                <h1> UniPortrait: A Unified Framework for Identity-Preserving Single- and Multi-Human Image Personalization </h1>
                <div style="display: flex; justify-content: center; align-items: center; text-align: center;">
                    <a href="https://arxiv.org/pdf/2408.05939"><img src="https://img.shields.io/badge/arXiv-2408.05939-red"></a>
                    &nbsp;
                    <a href='https://aigcdesigngroup.github.io/UniPortrait-Page/'><img src='https://img.shields.io/badge/Project_Page-UniPortrait-green' alt='Project Page'></a>
                    &nbsp;
                    <a href="https://github.com/junjiehe96/UniPortrait"><img src="https://img.shields.io/badge/Github-Code-blue"></a>
                </div>
                </br>
            </div>
        """

    title_description = r"""
        This is the <b>official πŸ€— Gradio demo</b> for <a href='https://arxiv.org/pdf/2408.05939' target='_blank'><b>UniPortrait: A Unified Framework for Identity-Preserving Single- and Multi-Human Image Personalization</b></a>.<br>
        The demo provides three capabilities: text-to-single-ID personalization, text-to-multi-ID personalization, and image-to-single-ID personalization. All of these are based on the Stable Diffusion v1-5 model. Feel free to give them a try! 😊
        """

    text_to_single_id_description = r"""πŸš€πŸš€πŸš€Quick start:<br>
        1. Enter a text prompt (Chinese or English), Upload an image with a face, and Click the <b>Run</b> button.<br>
        2. (Optional) You can also upload an image as the style reference for the results. πŸ€—<br>
        """

    text_to_single_id_tips = r"""πŸ’‘πŸ’‘πŸ’‘Tips:<br>
        1. Try to avoid creating too small faces, as this may lead to some artifacts. (Currently, the short side length of the generated image is limited to 512)<br>
        2. It's a good idea to upload multiple reference photos of your face to improve the prompt and ID consistency. Additional references can be uploaded in the "ID supplements".<br>
        3. The appropriate values of "Face ID Scale" and "Face Structure Scale" are important for balancing the ID and text alignment. We recommend using "Face ID Scale" (0.5~0.7) and "Face Structure Scale" (0.0~0.4).<br>
        """

    text_to_multi_id_description = r"""πŸš€πŸš€πŸš€Quick start:<br>
        1. Enter a text prompt (Chinese or English), Upload an image with a face in "First ID" and "Second ID" blocks respectively, and Click the <b>Run</b> button.<br>
        2. (Optional) You can also upload an image as the style reference for the results. πŸ€—<br>
        """

    text_to_multi_id_tips = r"""πŸ’‘πŸ’‘πŸ’‘Tips:<br>
        1. Try to avoid creating too small faces, as this may lead to some artifacts. (Currently, the short side length of the generated image is limited to 512)<br>
        2. It's a good idea to upload multiple reference photos of your face to improve the prompt and ID consistency. Additional references can be uploaded in the "ID supplements".<br>
        3. The appropriate values of "Face ID Scale" and "Face Structure Scale" are important for balancing the ID and text alignment. We recommend using "Face ID Scale" (0.3~0.7) and "Face Structure Scale" (0.0~0.4).<br>
        """

    image_to_single_id_description = r"""πŸš€πŸš€πŸš€Quick start: Upload an image as the portrait reference (can be any style), Upload a face image, and Click the <b>Run</b> button. πŸ€—<br>"""

    image_to_single_id_tips = r"""πŸ’‘πŸ’‘πŸ’‘Tips:<br>
        1. Try to avoid creating too small faces, as this may lead to some artifacts. (Currently, the short side length of the generated image is limited to 512)<br>
        2. It's a good idea to upload multiple reference photos of your face to improve ID consistency. Additional references can be uploaded in the "ID supplements".<br>
        3. The appropriate values of "Face ID Scale" and "Face Structure Scale" are important for balancing the portrait reference and ID alignment. We recommend using "Face ID Scale" (0.5~0.7) and "Face Structure Scale" (0.0~0.4).<br>
        """

    citation = r"""
        ---
        πŸ“ **Citation**
        <br>
        If our work is helpful for your research or applications, please cite us via:
        ```bibtex
        ```
        πŸ“§ **Contact**
        <br>
        If you have any questions, please feel free to open an issue or directly reach us out at <b>he_junjie@zju.edu.cn</b>.
        """

    block = gr.Blocks(title="UniPortrait").queue()
    with block:
        gr.HTML(title)
        gr.HTML(title_description)

        with gr.TabItem("Text-to-Single-ID"):
            text_to_single_id_generation_block()

        with gr.TabItem("Text-to-Multi-ID"):
            text_to_multi_id_generation_block()

        with gr.TabItem("Image-to-Single-ID (Stylization)"):
            image_to_single_id_generation_block()

    block.launch(share=True, max_threads=2)
    # block.launch(server_name='0.0.0.0', share=False, server_port=9999, allowed_paths=["/"])
    # block.launch(server_name='127.0.0.1', share=False, server_port=9999, allowed_paths=["/"])