File size: 4,941 Bytes
c7b13d5 90da0e7 f03bfaf 90da0e7 f03bfaf 90da0e7 f03bfaf 90da0e7 f03bfaf 9ae66c1 d9ebcb0 9ae66c1 52dd68a 9ae66c1 599dd3b 9ae66c1 715103d 29f3401 dff35d4 52dd68a d9ebcb0 9ae66c1 d9ebcb0 9ae66c1 b2a59e0 d9ebcb0 f03bfaf c259892 f03bfaf 0f4b6ed dff35d4 c259892 f03bfaf dff35d4 0f4b6ed f03bfaf 0f4b6ed f03bfaf dff35d4 f03bfaf c259892 94b54b7 f03bfaf 90da0e7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
import spaces
import os
import torch
import random
from huggingface_hub import snapshot_download
from kolors.pipelines.pipeline_stable_diffusion_xl_chatglm_256 import StableDiffusionXLPipeline
from kolors.models.modeling_chatglm import ChatGLMModel
from kolors.models.tokenization_chatglm import ChatGLMTokenizer
from diffusers import UNet2DConditionModel, AutoencoderKL
from diffusers import EulerDiscreteScheduler
import gradio as gr
# Download the model files
ckpt_dir = snapshot_download(repo_id="Kwai-Kolors/Kolors")
# Load the models
text_encoder = ChatGLMModel.from_pretrained(
os.path.join(ckpt_dir, 'text_encoder'),
torch_dtype=torch.float16).half()
tokenizer = ChatGLMTokenizer.from_pretrained(os.path.join(ckpt_dir, 'text_encoder'))
vae = AutoencoderKL.from_pretrained(os.path.join(ckpt_dir, "vae"), revision=None).half()
scheduler = EulerDiscreteScheduler.from_pretrained(os.path.join(ckpt_dir, "scheduler"))
unet = UNet2DConditionModel.from_pretrained(os.path.join(ckpt_dir, "unet"), revision=None).half()
pipe = StableDiffusionXLPipeline(
vae=vae,
text_encoder=text_encoder,
tokenizer=tokenizer,
unet=unet,
scheduler=scheduler,
force_zeros_for_empty_prompt=False)
pipe = pipe.to("cuda")
import gradio as gr
import numpy as np
import random
import torch
from diffusers import AutoPipelineForText2Image
import spaces
device = "cuda" if torch.cuda.is_available() else "cpu"
dtype = torch.float16
repo = "SG161222/RealVisXL_V4.0"
# pipeline_real = AutoPipelineForText2Image.from_pretrained(repo, torch_dtype=torch.float16).to('cuda')
def adjust_to_nearest_multiple(value, divisor=8):
"""
Adjusts the input value to the nearest multiple of the divisor.
Args:
value (int): The value to adjust.
divisor (int): The divisor to which the value should be divisible. Default is 8.
Returns:
int: The nearest multiple of the divisor.
"""
if value % divisor == 0:
return value
else:
# Round to the nearest multiple of divisor
return round(value / divisor) * divisor
def adjust_dimensions(height, width):
"""
Adjusts the height and width to be divisible by 8.
Args:
height (int): The height to adjust.
width (int): The width to adjust.
Returns:
tuple: Adjusted height and width.
"""
new_height = adjust_to_nearest_multiple(height)
new_width = adjust_to_nearest_multiple(width)
return new_height, new_width
MAX_SEED = np.iinfo(np.int32).max
MAX_IMAGE_SIZE = 4100
@spaces.GPU(duration=60)
def generate_image(prompt, negative_prompt, height, width, num_inference_steps, guidance_scale, num_images_per_prompt, use_random_seed, seed, progress=gr.Progress(track_tqdm=True)):
if use_random_seed:
seed = random.randint(0, 2**32 - 1)
else:
seed = int(seed) # Ensure seed is an integer
width = min(width, MAX_IMAGE_SIZE // 2)
height = min(height, MAX_IMAGE_SIZE // 2)
height, width = adjust_dimensions(height, width)
image = pipe(
prompt=prompt,
negative_prompt=negative_prompt,
height=height,
width=width,
num_inference_steps=num_inference_steps,
guidance_scale=guidance_scale,
num_images_per_prompt=num_images_per_prompt,
generator=torch.Generator(pipe.device).manual_seed(seed)
).images
return image, seed
description = """
<p align="center">Effective Training of Diffusion Model for Photorealistic Text-to-Image Synthesis</p>
<p><center>
<a href="https://kolors.kuaishou.com/" target="_blank">[Official Website]</a>
<a href="https://github.com/Kwai-Kolors/Kolors/blob/master/imgs/Kolors_paper.pdf" target="_blank">[Tech Report]</a>
<a href="https://huggingface.co/Kwai-Kolors/Kolors" target="_blank">[Model Page]</a>
<a href="https://github.com/Kwai-Kolors/Kolors" target="_blank">[Github]</a>
</center></p>
"""
# Gradio interface
iface = gr.Interface(
fn=generate_image,
inputs=[
gr.Textbox(label="Prompt"),
gr.Textbox(label="Negative Prompt")
],
additional_inputs=[
gr.Slider(512, 2048, 1024, step=64, label="Height"),
gr.Slider(512, 2048, 1024, step=64, label="Width"),
gr.Slider(20, 50, 20, step=1, label="Number of Inference Steps"),
gr.Slider(1, 20, 5, step=0.5, label="Guidance Scale"),
gr.Slider(1, 4, 1, step=1, label="Number of images per prompt"),
gr.Checkbox(label="Use Random Seed", value=True),
gr.Number(label="Seed", value=0, precision=0)
],
additional_inputs_accordion=gr.Accordion(label="Advanced settings", open=False),
outputs=[
gr.Gallery(label="Result", elem_id="gallery", show_label=False),
gr.Number(label="Seed Used")
],
title="Kolors",
description=description,
theme='bethecloud/storj_theme',
)
iface.launch(debug=True) |