Kolors

Running on Zero

File size: 4,941 Bytes

c7b13d5
 
 
 
90da0e7
f03bfaf
 
 
 
 
 
 
90da0e7
 
 
 
f03bfaf
90da0e7
f03bfaf
90da0e7
 
 
 
 
f03bfaf
 
 
 
 
 
 
 
 
9ae66c1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d9ebcb0
9ae66c1
 
52dd68a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9ae66c1
 
599dd3b
9ae66c1
 
715103d
29f3401
dff35d4
 
 
 
52dd68a
 
 
 
 
d9ebcb0
 
9ae66c1
d9ebcb0
9ae66c1
 
 
 
 
 
 
b2a59e0
d9ebcb0
 
f03bfaf
c259892
 
 
 
 
 
 
 
 
 
f03bfaf
 
 
 
 
0f4b6ed
 
 
dff35d4
 
c259892
f03bfaf
dff35d4
 
0f4b6ed
f03bfaf
0f4b6ed
f03bfaf
dff35d4
 
f03bfaf
c259892
 
94b54b7
f03bfaf
 
90da0e7

import spaces
import os
import torch
import random
from huggingface_hub import snapshot_download
from kolors.pipelines.pipeline_stable_diffusion_xl_chatglm_256 import StableDiffusionXLPipeline
from kolors.models.modeling_chatglm import ChatGLMModel
from kolors.models.tokenization_chatglm import ChatGLMTokenizer
from diffusers import UNet2DConditionModel, AutoencoderKL
from diffusers import EulerDiscreteScheduler
import gradio as gr

# Download the model files
ckpt_dir = snapshot_download(repo_id="Kwai-Kolors/Kolors")

# Load the models
text_encoder = ChatGLMModel.from_pretrained(
    os.path.join(ckpt_dir, 'text_encoder'),
    torch_dtype=torch.float16).half()
tokenizer = ChatGLMTokenizer.from_pretrained(os.path.join(ckpt_dir, 'text_encoder'))
vae = AutoencoderKL.from_pretrained(os.path.join(ckpt_dir, "vae"), revision=None).half()
scheduler = EulerDiscreteScheduler.from_pretrained(os.path.join(ckpt_dir, "scheduler"))
unet = UNet2DConditionModel.from_pretrained(os.path.join(ckpt_dir, "unet"), revision=None).half()

pipe = StableDiffusionXLPipeline(
        vae=vae,
        text_encoder=text_encoder,
        tokenizer=tokenizer,
        unet=unet,
        scheduler=scheduler,
        force_zeros_for_empty_prompt=False)
pipe = pipe.to("cuda")



import gradio as gr
import numpy as np
import random
import torch
from diffusers import AutoPipelineForText2Image

import spaces

device = "cuda" if torch.cuda.is_available() else "cpu"
dtype = torch.float16


repo = "SG161222/RealVisXL_V4.0"

# pipeline_real = AutoPipelineForText2Image.from_pretrained(repo, torch_dtype=torch.float16).to('cuda')  


def adjust_to_nearest_multiple(value, divisor=8):
    """
    Adjusts the input value to the nearest multiple of the divisor.
    
    Args:
    value (int): The value to adjust.
    divisor (int): The divisor to which the value should be divisible. Default is 8.
    Returns:
    int: The nearest multiple of the divisor.
    """
    if value % divisor == 0:
        return value
    else:
        # Round to the nearest multiple of divisor
        return round(value / divisor) * divisor

def adjust_dimensions(height, width):
    """
    Adjusts the height and width to be divisible by 8.
    
    Args:
    height (int): The height to adjust.
    width (int): The width to adjust.
    Returns:
    tuple: Adjusted height and width.
    """
    new_height = adjust_to_nearest_multiple(height)
    new_width = adjust_to_nearest_multiple(width)
    
    return new_height, new_width


MAX_SEED = np.iinfo(np.int32).max
MAX_IMAGE_SIZE = 4100


@spaces.GPU(duration=60)
def generate_image(prompt, negative_prompt, height, width, num_inference_steps, guidance_scale, num_images_per_prompt, use_random_seed, seed, progress=gr.Progress(track_tqdm=True)):
    if use_random_seed:
        seed = random.randint(0, 2**32 - 1)
    else:
        seed = int(seed)  # Ensure seed is an integer

    width = min(width, MAX_IMAGE_SIZE // 2)
    height = min(height, MAX_IMAGE_SIZE // 2)
    height, width = adjust_dimensions(height, width)
    
    
    image = pipe(
            prompt=prompt,
            negative_prompt=negative_prompt,
            height=height,
            width=width,
            num_inference_steps=num_inference_steps,
            guidance_scale=guidance_scale,
            num_images_per_prompt=num_images_per_prompt,
            generator=torch.Generator(pipe.device).manual_seed(seed)
        ).images
    return image, seed
    
    

description = """
<p align="center">Effective Training of Diffusion Model for Photorealistic Text-to-Image Synthesis</p>
<p><center>
<a href="https://kolors.kuaishou.com/" target="_blank">[Official Website]</a>
<a href="https://github.com/Kwai-Kolors/Kolors/blob/master/imgs/Kolors_paper.pdf" target="_blank">[Tech Report]</a>
<a href="https://huggingface.co/Kwai-Kolors/Kolors" target="_blank">[Model Page]</a>
<a href="https://github.com/Kwai-Kolors/Kolors" target="_blank">[Github]</a>
</center></p>
"""

# Gradio interface
iface = gr.Interface(
    fn=generate_image,
    inputs=[
        gr.Textbox(label="Prompt"),
        gr.Textbox(label="Negative Prompt")
    ],
    additional_inputs=[
        gr.Slider(512, 2048, 1024, step=64, label="Height"),
        gr.Slider(512, 2048, 1024, step=64, label="Width"),
        gr.Slider(20, 50, 20, step=1, label="Number of Inference Steps"),
        gr.Slider(1, 20, 5, step=0.5, label="Guidance Scale"),
        gr.Slider(1, 4, 1, step=1, label="Number of images per prompt"),
        gr.Checkbox(label="Use Random Seed", value=True),
        gr.Number(label="Seed", value=0, precision=0)
    ],
    additional_inputs_accordion=gr.Accordion(label="Advanced settings", open=False),
    outputs=[
        gr.Gallery(label="Result", elem_id="gallery", show_label=False),
        gr.Number(label="Seed Used")
    ],
    title="Kolors",
    description=description,
    theme='bethecloud/storj_theme',
)

iface.launch(debug=True)