File size: 4,941 Bytes
c7b13d5
 
 
 
90da0e7
f03bfaf
 
 
 
 
 
 
90da0e7
 
 
 
f03bfaf
90da0e7
f03bfaf
90da0e7
 
 
 
 
f03bfaf
 
 
 
 
 
 
 
 
9ae66c1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d9ebcb0
9ae66c1
 
52dd68a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9ae66c1
 
599dd3b
9ae66c1
 
715103d
29f3401
dff35d4
 
 
 
52dd68a
 
 
 
 
d9ebcb0
 
9ae66c1
d9ebcb0
9ae66c1
 
 
 
 
 
 
b2a59e0
d9ebcb0
 
f03bfaf
c259892
 
 
 
 
 
 
 
 
 
f03bfaf
 
 
 
 
0f4b6ed
 
 
dff35d4
 
c259892
f03bfaf
dff35d4
 
0f4b6ed
f03bfaf
0f4b6ed
f03bfaf
dff35d4
 
f03bfaf
c259892
 
94b54b7
f03bfaf
 
90da0e7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import spaces
import os
import torch
import random
from huggingface_hub import snapshot_download
from kolors.pipelines.pipeline_stable_diffusion_xl_chatglm_256 import StableDiffusionXLPipeline
from kolors.models.modeling_chatglm import ChatGLMModel
from kolors.models.tokenization_chatglm import ChatGLMTokenizer
from diffusers import UNet2DConditionModel, AutoencoderKL
from diffusers import EulerDiscreteScheduler
import gradio as gr

# Download the model files
ckpt_dir = snapshot_download(repo_id="Kwai-Kolors/Kolors")

# Load the models
text_encoder = ChatGLMModel.from_pretrained(
    os.path.join(ckpt_dir, 'text_encoder'),
    torch_dtype=torch.float16).half()
tokenizer = ChatGLMTokenizer.from_pretrained(os.path.join(ckpt_dir, 'text_encoder'))
vae = AutoencoderKL.from_pretrained(os.path.join(ckpt_dir, "vae"), revision=None).half()
scheduler = EulerDiscreteScheduler.from_pretrained(os.path.join(ckpt_dir, "scheduler"))
unet = UNet2DConditionModel.from_pretrained(os.path.join(ckpt_dir, "unet"), revision=None).half()

pipe = StableDiffusionXLPipeline(
        vae=vae,
        text_encoder=text_encoder,
        tokenizer=tokenizer,
        unet=unet,
        scheduler=scheduler,
        force_zeros_for_empty_prompt=False)
pipe = pipe.to("cuda")



import gradio as gr
import numpy as np
import random
import torch
from diffusers import AutoPipelineForText2Image

import spaces

device = "cuda" if torch.cuda.is_available() else "cpu"
dtype = torch.float16


repo = "SG161222/RealVisXL_V4.0"

# pipeline_real = AutoPipelineForText2Image.from_pretrained(repo, torch_dtype=torch.float16).to('cuda')  


def adjust_to_nearest_multiple(value, divisor=8):
    """
    Adjusts the input value to the nearest multiple of the divisor.
    
    Args:
    value (int): The value to adjust.
    divisor (int): The divisor to which the value should be divisible. Default is 8.
    Returns:
    int: The nearest multiple of the divisor.
    """
    if value % divisor == 0:
        return value
    else:
        # Round to the nearest multiple of divisor
        return round(value / divisor) * divisor

def adjust_dimensions(height, width):
    """
    Adjusts the height and width to be divisible by 8.
    
    Args:
    height (int): The height to adjust.
    width (int): The width to adjust.
    Returns:
    tuple: Adjusted height and width.
    """
    new_height = adjust_to_nearest_multiple(height)
    new_width = adjust_to_nearest_multiple(width)
    
    return new_height, new_width


MAX_SEED = np.iinfo(np.int32).max
MAX_IMAGE_SIZE = 4100


@spaces.GPU(duration=60)
def generate_image(prompt, negative_prompt, height, width, num_inference_steps, guidance_scale, num_images_per_prompt, use_random_seed, seed, progress=gr.Progress(track_tqdm=True)):
    if use_random_seed:
        seed = random.randint(0, 2**32 - 1)
    else:
        seed = int(seed)  # Ensure seed is an integer

    width = min(width, MAX_IMAGE_SIZE // 2)
    height = min(height, MAX_IMAGE_SIZE // 2)
    height, width = adjust_dimensions(height, width)
    
    
    image = pipe(
            prompt=prompt,
            negative_prompt=negative_prompt,
            height=height,
            width=width,
            num_inference_steps=num_inference_steps,
            guidance_scale=guidance_scale,
            num_images_per_prompt=num_images_per_prompt,
            generator=torch.Generator(pipe.device).manual_seed(seed)
        ).images
    return image, seed
    
    

description = """
<p align="center">Effective Training of Diffusion Model for Photorealistic Text-to-Image Synthesis</p>
<p><center>
<a href="https://kolors.kuaishou.com/" target="_blank">[Official Website]</a>
<a href="https://github.com/Kwai-Kolors/Kolors/blob/master/imgs/Kolors_paper.pdf" target="_blank">[Tech Report]</a>
<a href="https://huggingface.co/Kwai-Kolors/Kolors" target="_blank">[Model Page]</a>
<a href="https://github.com/Kwai-Kolors/Kolors" target="_blank">[Github]</a>
</center></p>
"""

# Gradio interface
iface = gr.Interface(
    fn=generate_image,
    inputs=[
        gr.Textbox(label="Prompt"),
        gr.Textbox(label="Negative Prompt")
    ],
    additional_inputs=[
        gr.Slider(512, 2048, 1024, step=64, label="Height"),
        gr.Slider(512, 2048, 1024, step=64, label="Width"),
        gr.Slider(20, 50, 20, step=1, label="Number of Inference Steps"),
        gr.Slider(1, 20, 5, step=0.5, label="Guidance Scale"),
        gr.Slider(1, 4, 1, step=1, label="Number of images per prompt"),
        gr.Checkbox(label="Use Random Seed", value=True),
        gr.Number(label="Seed", value=0, precision=0)
    ],
    additional_inputs_accordion=gr.Accordion(label="Advanced settings", open=False),
    outputs=[
        gr.Gallery(label="Result", elem_id="gallery", show_label=False),
        gr.Number(label="Seed Used")
    ],
    title="Kolors",
    description=description,
    theme='bethecloud/storj_theme',
)

iface.launch(debug=True)