Spaces:

SunderAli17
/

SAKFaceTransform

Running on Zero

App Files Files Community

SunderAli17 commited on Sep 4, 2024

Commit

34431b1

verified ·

1 Parent(s): ebfac98

Create app.py

Browse files

Files changed (1) hide show

app.py +209 -0

app.py ADDED Viewed

	@@ -0,0 +1,209 @@

+import spaces
+import random
+import torch
+import cv2
+import insightface
+import gradio as gr
+import numpy as np
+import os
+from huggingface_hub import snapshot_download
+from transformers import CLIPVisionModelWithProjection,CLIPImageProcessor
+from SAK.pipelines.pipeline_stable_diffusion_xl_chatglm_256_ipadapter_FaceID import StableDiffusionXLPipeline
+from SAK.models.modeling_chatglm import ChatGLMModel
+from SAK.models.tokenization_chatglm import ChatGLMTokenizer
+from diffusers import AutoencoderKL
+from SAK.models.unet_2d_condition import UNet2DConditionModel
+from diffusers import EulerDiscreteScheduler
+from PIL import Image
+from insightface.app import FaceAnalysis
+from insightface.data import get_image as ins_get_image
+device = "cuda"
+# ckpt_dir = snapshot_download(repo_id="Kwai-Kolors/Kolors")
+# ckpt_dir_faceid = snapshot_download(repo_id="Kwai-Kolors/Kolors-IP-Adapter-FaceID-Plus")
+text_encoder = ChatGLMModel.from_pretrained(f'{ckpt_dir}/text_encoder', torch_dtype=torch.float16).half().to(device)
+tokenizer = ChatGLMTokenizer.from_pretrained(f'{ckpt_dir}/text_encoder')
+vae = AutoencoderKL.from_pretrained(f"{ckpt_dir}/vae", revision=None).half().to(device)
+scheduler = EulerDiscreteScheduler.from_pretrained(f"{ckpt_dir}/scheduler")
+unet = UNet2DConditionModel.from_pretrained(f"{ckpt_dir}/unet", revision=None).half().to(device)
+clip_image_encoder = CLIPVisionModelWithProjection.from_pretrained(f'{ckpt_dir_faceid}/clip-vit-large-patch14-336', ignore_mismatched_sizes=True)
+clip_image_encoder.to(device)
+clip_image_processor = CLIPImageProcessor(size = 336, crop_size = 336)
+pipe = StableDiffusionXLPipeline(
+    vae = vae,
+    text_encoder = text_encoder,
+    tokenizer = tokenizer,
+    unet = unet,
+    scheduler = scheduler,
+    face_clip_encoder = clip_image_encoder,
+    face_clip_processor = clip_image_processor,
+    force_zeros_for_empty_prompt = False,
+)
+class FaceInfoGenerator():
+    def __init__(self, root_dir = "./.insightface/"):
+        self.app = FaceAnalysis(name = 'antelopev2', root = root_dir, providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
+        self.app.prepare(ctx_id = 0, det_size = (640, 640))
+    def get_faceinfo_one_img(self, face_image):
+        face_info = self.app.get(cv2.cvtColor(np.array(face_image), cv2.COLOR_RGB2BGR))
+        if len(face_info) == 0:
+            face_info = None
+        else:
+            face_info = sorted(face_info, key=lambda x:(x['bbox'][2]-x['bbox'][0])*(x['bbox'][3]-x['bbox'][1]))[-1]  # only use the maximum face
+        return face_info
+def face_bbox_to_square(bbox):
+    ## l, t, r, b to square l, t, r, b
+    l,t,r,b = bbox
+    cent_x = (l + r) / 2
+    cent_y = (t + b) / 2
+    w, h = r - l, b - t
+    r = max(w, h) / 2
+    l0 = cent_x - r
+    r0 = cent_x + r
+    t0 = cent_y - r
+    b0 = cent_y + r
+    return [l0, t0, r0, b0]
+MAX_SEED = np.iinfo(np.int32).max
+MAX_IMAGE_SIZE = 1024
+face_info_generator = FaceInfoGenerator()
+@spaces.GPU
+def infer(prompt,
+          image = None,
+          negative_prompt = "nsfw，Face shadows，Low resolution，JPEG artifacts、Vague、bad，Neon lights",
+          seed = 66,
+          randomize_seed = False,
+          guidance_scale = 5.0,
+          num_inference_steps = 50
+        ):
+    if randomize_seed:
+        seed = random.randint(0, MAX_SEED)
+    generator = torch.Generator().manual_seed(seed)
+    global pipe
+    pipe = pipe.to(device)
+    pipe.load_ip_adapter_faceid_plus(f'{ckpt_dir_faceid}/ipa-faceid-plus.bin', device = device)
+    scale = 0.8
+    pipe.set_face_fidelity_scale(scale)
+    face_info = face_info_generator.get_faceinfo_one_img(image)
+    face_bbox_square = face_bbox_to_square(face_info["bbox"])
+    crop_image = image.crop(face_bbox_square)
+    crop_image = crop_image.resize((336, 336))
+    crop_image = [crop_image]
+    face_embeds = torch.from_numpy(np.array([face_info["embedding"]]))
+    face_embeds = face_embeds.to(device, dtype = torch.float16)
+    image = pipe(
+        prompt = prompt,
+        negative_prompt = negative_prompt,
+        height = 1024,
+        width = 1024,
+        num_inference_steps= num_inference_steps,
+        guidance_scale = guidance_scale,
+        num_images_per_prompt = 1,
+        generator = generator,
+        face_crop_image = crop_image,
+        face_insightface_embeds = face_embeds
+    ).images[0]
+    return image, seed
+examples = [
+    ["wearing a full suit sitting in a restaurant with candle lights ", "image/image1.png"],
+    ["Cowboy, cowboy hat, Wild Cowboy, background is a western town, cactus, sunset, warm colors, shot with XT4 film, noise, vignette, Kodak film, vintage", "image/image2.png"]
+]
+css="""
+#col-left {
+    margin: 0 auto;
+    max-width: 600px;
+}
+#col-right {
+    margin: 0 auto;
+    max-width: 750px;
+}
+#button {
+    color: blue;
+}
+"""
+def load_description(fp):
+    with open(fp, 'r', encoding='utf-8') as f:
+        content = f.read()
+    return content
+with gr.Blocks(css=css) as Kolors:
+    gr.HTML(load_description("assets/title.md"))
+    with gr.Row():
+        with gr.Column(elem_id="col-left"):
+            with gr.Row():
+                prompt = gr.Textbox(
+                    label="Prompt",
+                    placeholder="Enter your prompt",
+                    lines=2
+                )
+            with gr.Row():
+                image = gr.Image(label="Image", type="pil")
+            with gr.Accordion("Advanced Settings", open=False):
+                negative_prompt = gr.Textbox(
+                    label="Negative prompt",
+                    placeholder="Enter a negative prompt",
+                    visible=True,
+                )
+                seed = gr.Slider(
+                    label="Seed",
+                    minimum=0,
+                    maximum=MAX_SEED,
+                    step=1,
+                    value=0,
+                )
+                randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
+                with gr.Row():
+                    guidance_scale = gr.Slider(
+                        label="Guidance scale",
+                        minimum=0.0,
+                        maximum=10.0,
+                        step=0.1,
+                        value=5.0,
+                    )
+                    num_inference_steps = gr.Slider(
+                        label="Number of inference steps",
+                        minimum=10,
+                        maximum=50,
+                        step=1,
+                        value=25,
+                    )
+            with gr.Row():
+                button = gr.Button("Run", elem_id="button")
+        with gr.Column(elem_id="col-right"):
+            result = gr.Image(label="Result", show_label=False)
+            seed_used = gr.Number(label="Seed Used")
+    with gr.Row():
+        gr.Examples(
+                fn = infer,
+                examples = examples,
+                inputs = [prompt, image],
+                outputs = [result, seed_used],
+            )
+    button.click(
+        fn = infer,
+        inputs = [prompt, image, negative_prompt, seed, randomize_seed, guidance_scale, num_inference_steps],
+        outputs = [result, seed_used]
+    )
+SAK.queue().launch(debug=True)