Spaces:

CyranoB
/

SDXL-Lightning

Running

App Files Files Community

Ubuntu commited on Feb 25, 2024

Commit

ae4c73e

1 Parent(s): 2b96898

Original version

Browse files

Files changed (5) hide show

README.md +5 -7
app.py +169 -0
requirements.txt +7 -0
safety_checker.py +137 -0
style.css +12 -0

README.md CHANGED Viewed

@@ -1,13 +1,11 @@
 ---
 title: SDXL Lightning
-emoji: 👁
-colorFrom: indigo
-colorTo: blue
 sdk: gradio
-sdk_version: 4.19.2
 app_file: app.py
 pinned: false
-license: openrail++
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: SDXL Lightning
+emoji: ⚡
+colorFrom: yellow
+colorTo: gray
 sdk: gradio
+sdk_version: 4.19.1
 app_file: app.py
 pinned: false
+license: openrail
 ---

app.py ADDED Viewed

	@@ -0,0 +1,169 @@

+import gradio as gr
+import torch
+from diffusers import StableDiffusionXLPipeline, EulerDiscreteScheduler
+from huggingface_hub import hf_hub_download
+from safetensors.torch import load_file
+import spaces
+import os
+from PIL import Image, ImageFilter
+from typing import List, Tuple
+SAFETY_CHECKER = os.environ.get("SAFETY_CHECKER", "0") == "1"
+# Constants
+base = "stabilityai/stable-diffusion-xl-base-1.0"
+repo = "ByteDance/SDXL-Lightning"
+checkpoints = {
+    "1-Step" : ["sdxl_lightning_1step_unet_x0.safetensors", 1],
+    "2-Step" : ["sdxl_lightning_2step_unet.safetensors", 2],
+    "4-Step" : ["sdxl_lightning_4step_unet.safetensors", 4],
+    "8-Step" : ["sdxl_lightning_8step_unet.safetensors", 8],
+}
+aspect_ratios = {
+    "21:9": (21, 9),
+    "2:1": (2, 1),
+    "16:9": (16, 9),
+    "5:4": (5, 4),
+    "4:3": (4, 3),
+    "3:2": (3, 2),
+    "1:1": (1, 1),
+}
+# Function to calculate resolution
+def calculate_resolution(aspect_ratio, mode='landscape', total_pixels=1024*1024, divisibility=8):
+    if aspect_ratio not in aspect_ratios:
+        raise ValueError(f"Invalid aspect ratio: {aspect_ratio}")
+    width_multiplier, height_multiplier = aspect_ratios[aspect_ratio]
+    ratio = width_multiplier / height_multiplier
+    if mode == 'portrait':
+        # Swap the ratio for portrait mode
+        ratio = 1 / ratio
+    height = int((total_pixels / ratio) ** 0.5)
+    height -= height % divisibility
+    width = int(height * ratio)
+    width -= width % divisibility
+    while width * height > total_pixels:
+        height -= divisibility
+        width = int(height * ratio)
+        width -= width % divisibility
+    return width, height
+# Example prompts with ckpt, aspect, and mode
+examples = [
+    {"prompt": "A futuristic cityscape at sunset", "ckpt": "4-Step", "aspect": "16:9", "mode": "landscape"},
+    {"prompt": "pair of shoes made of dried fruit skins, 3d render, bright colours, clean composition, beautiful artwork, logo", "ckpt": "2-Step", "aspect": "1:1", "mode": "portrait"},
+    {"prompt": "A portrait of a robot in the style of Renaissance art", "ckpt": "2-Step", "aspect": "1:1", "mode": "portrait"},
+    {"prompt": "full body of alien shaped like woman, big golden eyes, mars planet, photo, digital art, fantasy", "ckpt": "4-Step", "aspect": "1:1", "mode": "portrait"},
+    {"prompt": "A serene landscape with mountains and a river", "ckpt": "8-Step", "aspect": "3:2", "mode": "landscape"},
+    {"prompt": "post-apocalyptic wasteland, the most delicate beautiful flower with green leaves growing from dust and rubble, vibrant colours, cinematic", "ckpt": "8-Step", "aspect": "16:9", "mode": "landscape"}
+]
+# Define a function to set the example inputs
+def set_example(selected_prompt):
+    # Find the example that matches the selected prompt
+    for example in examples:
+        if example["prompt"] == selected_prompt:
+            return example["prompt"], example["ckpt"], example["aspect"], example["mode"]
+    return None, None, None, None  # Default values if not found
+# Ensure model and scheduler are initialized in GPU-enabled function
+if torch.cuda.is_available():
+    pipe = StableDiffusionXLPipeline.from_pretrained(base, torch_dtype=torch.float16, variant="fp16").to("cuda")
+if SAFETY_CHECKER:
+    from safety_checker import StableDiffusionSafetyChecker
+    from transformers import CLIPFeatureExtractor
+    safety_checker = StableDiffusionSafetyChecker.from_pretrained(
+        "CompVis/stable-diffusion-safety-checker"
+    ).to("cuda")
+    feature_extractor = CLIPFeatureExtractor.from_pretrained(
+        "openai/clip-vit-base-patch32"
+    )
+def check_nsfw_images(
+    images: List[Image.Image]
+) -> Tuple[List[Image.Image], List[bool]]:
+    # Assuming feature_extractor and safety_checker are defined and initialized elsewhere
+    # Convert PIL Images to the format expected by the feature extractor
+    # This often involves converting them to tensors, but the exact method
+    # depends on the feature_extractor's requirements
+    safety_checker_inputs = [feature_extractor(image, return_tensors="pt").to("cuda") for image in images]
+    # Get NSFW concepts for each image
+    has_nsfw_concepts = [safety_checker(
+        images=[image],
+        clip_input=safety_checker_input.pixel_values.to("cuda")
+    ) for image, safety_checker_input in zip(images, safety_checker_inputs)]
+    # Flatten the has_nsfw_concepts list if it's nested
+    has_nsfw_concepts = [item for sublist in has_nsfw_concepts for item in sublist]
+    return images, has_nsfw_concepts
+# Function
+@spaces.GPU(enable_queue=True)
+def generate_image(prompt, ckpt, aspect_ratio, mode):
+    width, height = calculate_resolution(aspect_ratio, mode)  # Calculate resolution based on the aspect ratio
+    checkpoint = checkpoints[ckpt][0]
+    num_inference_steps = checkpoints[ckpt][1]
+    if num_inference_steps==1:
+        # Ensure sampler uses "trailing" timesteps and "sample" prediction type for 1-step inference.
+        pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing", prediction_type="sample")
+    else:
+        # Ensure sampler uses "trailing" timesteps.
+        pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")
+    pipe.unet.load_state_dict(load_file(hf_hub_download(repo, checkpoint), device="cuda"))
+    results = pipe(prompt, num_inference_steps=num_inference_steps, guidance_scale=0, width=width, height=height )
+    if SAFETY_CHECKER:
+        images, has_nsfw_concepts = check_nsfw_images(results.images)
+        if any(has_nsfw_concepts):
+            gr.Warning("NSFW content detected.")
+            # Apply a blur filter to the first image in the results
+            blurred_image = images[0].filter(ImageFilter.GaussianBlur(16))  # Adjust the radius as needed
+            return blurred_image
+        return images[0]
+    return results.images[0]
+# Gradio Interface
+description = """
+SDXL-Lightning ByteDance model demo. Link to model: https://huggingface.co/ByteDance/SDXL-Lightning
+"""
+with gr.Blocks(css="style.css") as demo:
+    gr.HTML("<h1><center>Text-to-Image with SDXL-Lightning ⚡</center></h1>")
+    gr.Markdown(description)
+    with gr.Group():
+        with gr.Row():
+            prompt = gr.Textbox(label='Enter you image prompt:', scale=8)
+        with gr.Row():
+            ckpt = gr.Dropdown(label='Select inference steps',choices=['1-Step', '2-Step', '4-Step', '8-Step'], value='4-Step', interactive=True)
+            aspect = gr.Dropdown(label='Aspect Ratio', choices=list(aspect_ratios.keys()), value='1:1', interactive=True)
+            mode = gr.Dropdown(label='Mode', choices=['landscape', 'portrait'], value='landscape')  # Mode as a dropdown
+            submit = gr.Button(scale=1, variant='primary')
+    img = gr.Image(label='SDXL-Lightning Generated Image')
+    prompt.submit(fn=generate_image,
+                 inputs=[prompt, ckpt, aspect, mode],
+                 outputs=img,
+                 )
+    submit.click(fn=generate_image,
+                 inputs=[prompt, ckpt, aspect, mode],
+                 outputs=img,
+                 )
+    # Dropdown for selecting examples
+    example_dropdown = gr.Dropdown(label='Select an Example', choices=[e["prompt"] for e in examples])
+    example_dropdown.change(fn=set_example, inputs=example_dropdown, outputs=[prompt, ckpt, aspect, mode])
+demo.queue().launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+transformers
+diffusers
+torch
+accelerate
+gradio
+pillow
+spaces

safety_checker.py ADDED Viewed

	@@ -0,0 +1,137 @@

+# Copyright 2023 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+import torch
+import torch.nn as nn
+from transformers import CLIPConfig, CLIPVisionModel, PreTrainedModel
+def cosine_distance(image_embeds, text_embeds):
+    normalized_image_embeds = nn.functional.normalize(image_embeds)
+    normalized_text_embeds = nn.functional.normalize(text_embeds)
+    return torch.mm(normalized_image_embeds, normalized_text_embeds.t())
+class StableDiffusionSafetyChecker(PreTrainedModel):
+    config_class = CLIPConfig
+    _no_split_modules = ["CLIPEncoderLayer"]
+    def __init__(self, config: CLIPConfig):
+        super().__init__(config)
+        self.vision_model = CLIPVisionModel(config.vision_config)
+        self.visual_projection = nn.Linear(
+            config.vision_config.hidden_size, config.projection_dim, bias=False
+        )
+        self.concept_embeds = nn.Parameter(
+            torch.ones(17, config.projection_dim), requires_grad=False
+        )
+        self.special_care_embeds = nn.Parameter(
+            torch.ones(3, config.projection_dim), requires_grad=False
+        )
+        self.concept_embeds_weights = nn.Parameter(torch.ones(17), requires_grad=False)
+        self.special_care_embeds_weights = nn.Parameter(
+            torch.ones(3), requires_grad=False
+        )
+    @torch.no_grad()
+    def forward(self, clip_input, images):
+        pooled_output = self.vision_model(clip_input)[1]  # pooled_output
+        image_embeds = self.visual_projection(pooled_output)
+        # we always cast to float32 as this does not cause significant overhead and is compatible with bfloat16
+        special_cos_dist = (
+            cosine_distance(image_embeds, self.special_care_embeds)
+            .cpu()
+            .float()
+            .numpy()
+        )
+        cos_dist = (
+            cosine_distance(image_embeds, self.concept_embeds).cpu().float().numpy()
+        )
+        result = []
+        batch_size = image_embeds.shape[0]
+        for i in range(batch_size):
+            result_img = {
+                "special_scores": {},
+                "special_care": [],
+                "concept_scores": {},
+                "bad_concepts": [],
+            }
+            # increase this value to create a stronger `nfsw` filter
+            # at the cost of increasing the possibility of filtering benign images
+            adjustment = 0.0
+            for concept_idx in range(len(special_cos_dist[0])):
+                concept_cos = special_cos_dist[i][concept_idx]
+                concept_threshold = self.special_care_embeds_weights[concept_idx].item()
+                result_img["special_scores"][concept_idx] = round(
+                    concept_cos - concept_threshold + adjustment, 3
+                )
+                if result_img["special_scores"][concept_idx] > 0:
+                    result_img["special_care"].append(
+                        {concept_idx, result_img["special_scores"][concept_idx]}
+                    )
+                    adjustment = 0.01
+            for concept_idx in range(len(cos_dist[0])):
+                concept_cos = cos_dist[i][concept_idx]
+                concept_threshold = self.concept_embeds_weights[concept_idx].item()
+                result_img["concept_scores"][concept_idx] = round(
+                    concept_cos - concept_threshold + adjustment, 3
+                )
+                if result_img["concept_scores"][concept_idx] > 0:
+                    result_img["bad_concepts"].append(concept_idx)
+            result.append(result_img)
+        has_nsfw_concepts = [len(res["bad_concepts"]) > 0 for res in result]
+        return has_nsfw_concepts
+    @torch.no_grad()
+    def forward_onnx(self, clip_input: torch.FloatTensor, images: torch.FloatTensor):
+        pooled_output = self.vision_model(clip_input)[1]  # pooled_output
+        image_embeds = self.visual_projection(pooled_output)
+        special_cos_dist = cosine_distance(image_embeds, self.special_care_embeds)
+        cos_dist = cosine_distance(image_embeds, self.concept_embeds)
+        # increase this value to create a stronger `nsfw` filter
+        # at the cost of increasing the possibility of filtering benign images
+        adjustment = 0.0
+        special_scores = (
+            special_cos_dist - self.special_care_embeds_weights + adjustment
+        )
+        # special_scores = special_scores.round(decimals=3)
+        special_care = torch.any(special_scores > 0, dim=1)
+        special_adjustment = special_care * 0.01
+        special_adjustment = special_adjustment.unsqueeze(1).expand(
+            -1, cos_dist.shape[1]
+        )
+        concept_scores = (cos_dist - self.concept_embeds_weights) + special_adjustment
+        # concept_scores = concept_scores.round(decimals=3)
+        has_nsfw_concepts = torch.any(concept_scores > 0, dim=1)
+        images[has_nsfw_concepts] = 0.0  # black image
+        return images, has_nsfw_concepts

style.css ADDED Viewed

	@@ -0,0 +1,12 @@

+.gradio-container {
+  max-width: 690px! important;
+}
+#share-btn-container{padding-left: 0.5rem !important; padding-right: 0.5rem !important; background-color: #000000; justify-content: center; align-items: center; border-radius: 9999px !important; max-width: 13rem; margin-left: auto;margin-top: 0.35em;}
+div#share-btn-container > div {flex-direction: row;background: black;align-items: center}
+#share-btn-container:hover {background-color: #060606}
+#share-btn {all: initial; color: #ffffff;font-weight: 600; cursor:pointer; font-family: 'IBM Plex Sans', sans-serif; margin-left: 0.5rem !important; padding-top: 0.5rem !important; padding-bottom: 0.5rem !important;right:0;font-size: 15px;}
+#share-btn * {all: unset}
+#share-btn-container div:nth-child(-n+2){width: auto !important;min-height: 0px !important;}
+#share-btn-container .wrap {display: none !important}
+#share-btn-container.hidden {display: none!important}