##!/usr/bin/python3 # -*- coding: utf-8 -*- # @Time : 2024-07-31 # @Author : Junjie He import gradio as gr from src.process import ( text_to_single_id_generation_process, text_to_multi_id_generation_process, image_to_single_id_generation_process, ) def text_to_single_id_generation_block(): gr.Markdown("## Text-to-Single-ID Generation") gr.HTML(text_to_single_id_description) gr.HTML(text_to_single_id_tips) with gr.Row(): with gr.Column(scale=1, min_width=100): prompt = gr.Textbox(value="", label='Prompt', lines=2) negative_prompt = gr.Textbox(value="nsfw", label='Negative Prompt') image_resolution = gr.Dropdown(choices=["768x512", "512x512", "512x768"], value="512x512", label="Image Resolution (HxW)") run_button = gr.Button(value="Run") with gr.Accordion("Advanced Options", open=True): seed = gr.Slider(label="Seed (-1 indicates random)", minimum=-1, maximum=2147483647, step=1, value=-1) faceid_scale = gr.Slider(label="Face ID Scale", minimum=0.0, maximum=1.0, step=0.01, value=0.7) face_structure_scale = gr.Slider(label="Face Structure Scale", minimum=0.0, maximum=1.0, step=0.01, value=0.1) style_scale = gr.Slider(label="style_scale", minimum=0.0, maximum=1.0, step=0.01, value=0.7) use_sr = gr.Checkbox(label="RealESRGAN 2x", value=True) with gr.Column(scale=3, min_width=100): with gr.Row(equal_height=False): pil_faceid = gr.Image(type="pil", label="ID Image") with gr.Accordion("ID Supplements", open=True): with gr.Row(): pil_supp_faceids = gr.File(file_count="multiple", file_types=["image"], type="filepath", label="Additional ID Images") with gr.Row(): with gr.Column(scale=1, min_width=100): pil_mix_faceid_1 = gr.Image(type="pil", label="Mix ID 1") mix_scale_1 = gr.Slider(label="Mix Scale 1", minimum=0.0, maximum=1.0, step=0.01, value=0.0) with gr.Column(scale=1, min_width=100): pil_mix_faceid_2 = gr.Image(type="pil", label="Mix ID 2") mix_scale_2 = gr.Slider(label="Mix Scale 2", minimum=0.0, maximum=1.0, step=0.01, value=0.0) pil_style = gr.Image(type="pil", label="Style") with gr.Row(): example_output = gr.Image(type="pil", label="(Example Output)", visible=False) result_gallery = gr.Gallery(label='Output', show_label=True, elem_id="gallery", columns=4, preview=True, format="png") with gr.Row(): examples = [ [ "A man with short black hair, was paired with a blue denim jacket with yellow details.", "assets/examples/1-newton.jpg", "assets/No-Image-Placeholder.png", "assets/examples/1-output-1.png", ], [ "A little boy with short black hair, was paired with a blue denim jacket with yellow details.", "assets/examples/1-newton.jpg", "assets/No-Image-Placeholder.png", "assets/examples/1-output-4.png", ], [ "A man with short black hair, was paired with a blue denim jacket with yellow details.", "assets/examples/1-newton.jpg", "assets/examples/1-style-1.jpg", "assets/examples/1-output-2.png", ], ] gr.Examples( label="Examples", examples=examples, inputs=[prompt, pil_faceid, pil_style, example_output], ) ips = [ pil_faceid, pil_supp_faceids, pil_mix_faceid_1, mix_scale_1, pil_mix_faceid_2, mix_scale_2, faceid_scale, face_structure_scale, prompt, negative_prompt, pil_style, style_scale, seed, image_resolution, use_sr, ] run_button.click(fn=text_to_single_id_generation_process, inputs=ips, outputs=[result_gallery]) def text_to_multi_id_generation_block(): gr.Markdown("## Text-to-Multi-ID Generation") gr.HTML(text_to_multi_id_description) gr.HTML(text_to_multi_id_tips) with gr.Row(): with gr.Column(scale=1, min_width=100): prompt = gr.Textbox(value="", label='Prompt', lines=2) negative_prompt = gr.Textbox(value="nsfw", label='Negative Prompt') image_resolution = gr.Dropdown(choices=["768x512", "512x512", "512x768"], value="512x512", label="Image Resolution (HxW)") run_button = gr.Button(value="Run") with gr.Accordion("Advanced Options", open=True): seed = gr.Slider(label="Seed (-1 indicates random)", minimum=-1, maximum=2147483647, step=1, value=-1) faceid_scale = gr.Slider(label="Face ID Scale", minimum=0.0, maximum=1.0, step=0.01, value=0.7) face_structure_scale = gr.Slider(label="Face Structure Scale", minimum=0.0, maximum=1.0, step=0.01, value=0.3) style_scale = gr.Slider(label="style_scale", minimum=0.0, maximum=1.0, step=0.01, value=0.7) use_sr = gr.Checkbox(label="RealESRGAN 2x", value=True) with gr.Column(scale=3, min_width=100): with gr.Row(equal_height=False): with gr.Column(scale=1, min_width=100): pil_faceid_1st = gr.Image(type="pil", label="First ID") with gr.Accordion("First ID Supplements", open=False): with gr.Row(): pil_supp_faceids_1st = gr.File(file_count="multiple", file_types=["image"], type="filepath", label="Additional ID Images") with gr.Row(): with gr.Column(scale=1, min_width=100): pil_mix_faceid_1_1st = gr.Image(type="pil", label="Mix ID 1") mix_scale_1_1st = gr.Slider(label="Mix Scale 1", minimum=0.0, maximum=1.0, step=0.01, value=0.0) with gr.Column(scale=1, min_width=100): pil_mix_faceid_2_1st = gr.Image(type="pil", label="Mix ID 2") mix_scale_2_1st = gr.Slider(label="Mix Scale 2", minimum=0.0, maximum=1.0, step=0.01, value=0.0) with gr.Column(scale=1, min_width=100): pil_faceid_2nd = gr.Image(type="pil", label="Second ID") with gr.Accordion("Second ID Supplements", open=False): with gr.Row(): pil_supp_faceids_2nd = gr.File(file_count="multiple", file_types=["image"], type="filepath", label="Additional ID Images") with gr.Row(): with gr.Column(scale=1, min_width=100): pil_mix_faceid_1_2nd = gr.Image(type="pil", label="Mix ID 1") mix_scale_1_2nd = gr.Slider(label="Mix Scale 1", minimum=0.0, maximum=1.0, step=0.01, value=0.0) with gr.Column(scale=1, min_width=100): pil_mix_faceid_2_2nd = gr.Image(type="pil", label="Mix ID 2") mix_scale_2_2nd = gr.Slider(label="Mix Scale 2", minimum=0.0, maximum=1.0, step=0.01, value=0.0) with gr.Column(scale=1, min_width=100): pil_style = gr.Image(type="pil", label="Style") with gr.Row(): example_output = gr.Image(type="pil", label="(Example Output)", visible=False) result_gallery = gr.Gallery(label='Output', show_label=True, elem_id="gallery", columns=4, preview=True, format="png") with gr.Row(): examples = [ [ "两个女人在欢笑和快乐中被捕捉到,他们的脸上洋溢着真挚的幸福,背景是日落时分的宁静海滩。这幅画以柔和的风格描绘,捕捉了这一刻的温暖和宁静。", "assets/examples/2-stylegan2-ffhq-0100.png", "assets/examples/2-stylegan2-ffhq-0293.png", "assets/No-Image-Placeholder.png", "assets/examples/2-output-1.png", ], [ "The two female models are drinking coffee. The background was off-white.", "assets/examples/2-stylegan2-ffhq-0100.png", "assets/examples/2-stylegan2-ffhq-0293.png", "assets/examples/2-style-1.jpg", "assets/examples/2-output-2.png", ], ] gr.Examples( label="Examples", examples=examples, inputs=[prompt, pil_faceid_1st, pil_faceid_2nd, pil_style, example_output], ) with gr.Row(): examples = [ [ "Two men in an American poster.", "assets/examples/Trump-1.jpg", ["assets/examples/Trump-2.jpg", "assets/examples/Trump-3.jpg", "assets/examples/Trump-4.jpg"], "assets/examples/Biden-1.jpg", ["assets/examples/Biden-2.jpg", "assets/examples/Biden-3.jpg", "assets/examples/Biden-4.jpg"], "assets/examples/2-output-4.png", ], [ "Two men engaged in a vigorous handshake, both wearing expressions of enthusiasm and determination, set against a backdrop of a bustling business district. The image is crafted in a sleek and modern digital art style, conveying the dynamic and competitive nature of their interaction.", "assets/examples/Trump-1.jpg", ["assets/examples/Trump-2.jpg", "assets/examples/Trump-3.jpg", "assets/examples/Trump-4.jpg"], "assets/examples/Biden-1.jpg", ["assets/examples/Biden-2.jpg", "assets/examples/Biden-3.jpg", "assets/examples/Biden-4.jpg"], "assets/examples/2-output-3.png", ], ] gr.Examples( label="Examples (Multiple References)", examples=examples, inputs=[prompt, pil_faceid_1st, pil_supp_faceids_1st, pil_faceid_2nd, pil_supp_faceids_2nd, example_output], ) ips = [ pil_faceid_1st, pil_supp_faceids_1st, pil_mix_faceid_1_1st, mix_scale_1_1st, pil_mix_faceid_2_1st, mix_scale_2_1st, pil_faceid_2nd, pil_supp_faceids_2nd, pil_mix_faceid_1_2nd, mix_scale_1_2nd, pil_mix_faceid_2_2nd, mix_scale_2_2nd, faceid_scale, face_structure_scale, prompt, negative_prompt, pil_style, style_scale, seed, image_resolution, use_sr, ] run_button.click(fn=text_to_multi_id_generation_process, inputs=ips, outputs=[result_gallery]) def image_to_single_id_generation_block(): gr.Markdown("## Image-to-Single-ID Generation") gr.HTML(image_to_single_id_description) gr.HTML(image_to_single_id_tips) with gr.Row(): with gr.Column(scale=1, min_width=100): image_resolution = gr.Dropdown(choices=["768x512", "512x512", "512x768"], value="512x512", label="Image Resolution (HxW)") run_button = gr.Button(value="Run") with gr.Accordion("Advanced Options", open=True): seed = gr.Slider(label="Seed (-1 indicates random)", minimum=-1, maximum=2147483647, step=1, value=-1) style_scale = gr.Slider(label="Reference Scale", minimum=0.0, maximum=1.0, step=0.01, value=0.7) faceid_scale = gr.Slider(label="Face ID Scale", minimum=0.0, maximum=1.0, step=0.01, value=0.7) face_structure_scale = gr.Slider(label="Face Structure Scale", minimum=0.0, maximum=1.0, step=0.01, value=0.3) use_sr = gr.Checkbox(label="RealESRGAN 2x", value=True) with gr.Column(scale=3, min_width=100): with gr.Row(equal_height=False): pil_style = gr.Image(type="pil", label="Portrait Reference") pil_faceid = gr.Image(type="pil", label="ID Image") with gr.Accordion("ID Supplements", open=True): with gr.Row(): pil_supp_faceids = gr.File(file_count="multiple", file_types=["image"], type="filepath", label="Additional ID Images") with gr.Row(): with gr.Column(scale=1, min_width=100): pil_mix_faceid_1 = gr.Image(type="pil", label="Mix ID 1") mix_scale_1 = gr.Slider(label="Mix Scale 1", minimum=0.0, maximum=1.0, step=0.01, value=0.0) with gr.Column(scale=1, min_width=100): pil_mix_faceid_2 = gr.Image(type="pil", label="Mix ID 2") mix_scale_2 = gr.Slider(label="Mix Scale 2", minimum=0.0, maximum=1.0, step=0.01, value=0.0) with gr.Row(): with gr.Column(scale=3, min_width=100): example_output = gr.Image(type="pil", label="(Example Output)", visible=False) result_gallery = gr.Gallery(label='Output', show_label=True, elem_id="gallery", columns=4, preview=True, format="png") with gr.Row(): examples = [ [ "assets/examples/3-style-1.png", "assets/examples/3-stylegan2-ffhq-0293.png", 0.7, 0.3, "assets/examples/3-output-1.png", ], [ "assets/examples/3-style-1.png", "assets/examples/3-stylegan2-ffhq-0293.png", 0.6, 0.0, "assets/examples/3-output-2.png", ], [ "assets/examples/3-style-2.jpg", "assets/examples/3-stylegan2-ffhq-0381.png", 0.7, 0.3, "assets/examples/3-output-3.png", ], [ "assets/examples/3-style-3.jpg", "assets/examples/3-stylegan2-ffhq-0381.png", 0.6, 0.0, "assets/examples/3-output-4.png", ], ] gr.Examples( label="Examples", examples=examples, inputs=[pil_style, pil_faceid, faceid_scale, face_structure_scale, example_output], ) ips = [ pil_faceid, pil_supp_faceids, pil_mix_faceid_1, mix_scale_1, pil_mix_faceid_2, mix_scale_2, faceid_scale, face_structure_scale, pil_style, style_scale, seed, image_resolution, use_sr, ] run_button.click(fn=image_to_single_id_generation_process, inputs=ips, outputs=[result_gallery]) if __name__ == "__main__": title = r"""

UniPortrait: A Unified Framework for Identity-Preserving Single- and Multi-Human Image Personalization

  Project Page  

""" title_description = r""" This is the official 🤗 Gradio demo for UniPortrait: A Unified Framework for Identity-Preserving Single- and Multi-Human Image Personalization.
The demo provides three capabilities: text-to-single-ID personalization, text-to-multi-ID personalization, and image-to-single-ID personalization. All of these are based on the Stable Diffusion v1-5 model. Feel free to give them a try! 😊 """ text_to_single_id_description = r"""🚀🚀🚀Quick start:
1. Enter a text prompt (Chinese or English), Upload an image with a face, and Click the Run button.
2. (Optional) You can also upload an image as the style reference for the results. 🤗
""" text_to_single_id_tips = r"""💡💡💡Tips:
1. Try to avoid creating too small faces, as this may lead to some artifacts. (Currently, the short side length of the generated image is limited to 512)
2. It's a good idea to upload multiple reference photos of your face to improve the prompt and ID consistency. Additional references can be uploaded in the "ID supplements".
3. The appropriate values of "Face ID Scale" and "Face Structure Scale" are important for balancing the ID and text alignment. We recommend using "Face ID Scale" (0.5~0.7) and "Face Structure Scale" (0.0~0.4).
""" text_to_multi_id_description = r"""🚀🚀🚀Quick start:
1. Enter a text prompt (Chinese or English), Upload an image with a face in "First ID" and "Second ID" blocks respectively, and Click the Run button.
2. (Optional) You can also upload an image as the style reference for the results. 🤗
""" text_to_multi_id_tips = r"""💡💡💡Tips:
1. Try to avoid creating too small faces, as this may lead to some artifacts. (Currently, the short side length of the generated image is limited to 512)
2. It's a good idea to upload multiple reference photos of your face to improve the prompt and ID consistency. Additional references can be uploaded in the "ID supplements".
3. The appropriate values of "Face ID Scale" and "Face Structure Scale" are important for balancing the ID and text alignment. We recommend using "Face ID Scale" (0.3~0.7) and "Face Structure Scale" (0.0~0.4).
""" image_to_single_id_description = r"""🚀🚀🚀Quick start: Upload an image as the portrait reference (can be any style), Upload a face image, and Click the Run button. 🤗
""" image_to_single_id_tips = r"""💡💡💡Tips:
1. Try to avoid creating too small faces, as this may lead to some artifacts. (Currently, the short side length of the generated image is limited to 512)
2. It's a good idea to upload multiple reference photos of your face to improve ID consistency. Additional references can be uploaded in the "ID supplements".
3. The appropriate values of "Face ID Scale" and "Face Structure Scale" are important for balancing the portrait reference and ID alignment. We recommend using "Face ID Scale" (0.5~0.7) and "Face Structure Scale" (0.0~0.4).
""" citation = r""" --- 📝 **Citation**
If our work is helpful for your research or applications, please cite us via: ```bibtex @article{wang2024instantid, title={InstantID: Zero-shot Identity-Preserving Generation in Seconds}, author={Wang, Qixun and Bai, Xu and Wang, Haofan and Qin, Zekui and Chen, Anthony}, journal={arXiv preprint arXiv:2401.07519}, year={2024} } ``` 📧 **Contact**
If you have any questions, please feel free to open an issue or directly reach us out at he_junjie@zju.edu.cn. """ block = gr.Blocks(title="UniPortrait").queue() with block: gr.HTML(title) gr.HTML(title_description) with gr.TabItem("Text-to-Single-ID"): text_to_single_id_generation_block() with gr.TabItem("Text-to-Multi-ID"): text_to_multi_id_generation_block() with gr.TabItem("Image-to-Single-ID (Stylization)"): image_to_single_id_generation_block() block.launch(share=True) # block.launch(server_name='0.0.0.0', share=False, server_port=9999, allowed_paths=["/"]) # block.launch(server_name='127.0.0.1', share=False, server_port=9999, allowed_paths=["/"])