EVA787797 commited on
Commit
f95232a
โ€ข
1 Parent(s): c14989c

Upload 5 files

Browse files
Files changed (5) hide show
  1. README (14).md +13 -0
  2. app (8).py +195 -0
  3. gitattributes (14) +35 -0
  4. kolors___init__.py +0 -0
  5. requirements (4).txt +8 -0
README (14).md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Create_image
3
+ emoji: ๐Ÿ–ผ
4
+ colorFrom: purple
5
+ colorTo: red
6
+ sdk: gradio
7
+ sdk_version: 4.38.1
8
+ app_file: app.py
9
+ pinned: false
10
+ license: apache-2.0
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app (8).py ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spaces
2
+ import random
3
+ import torch
4
+ from huggingface_hub import snapshot_download
5
+ from transformers import CLIPVisionModelWithProjection, CLIPImageProcessor
6
+ from kolors.pipelines import pipeline_stable_diffusion_xl_chatglm_256_ipadapter, pipeline_stable_diffusion_xl_chatglm_256
7
+ from kolors.models.modeling_chatglm import ChatGLMModel
8
+ from kolors.models.tokenization_chatglm import ChatGLMTokenizer
9
+ from kolors.models import unet_2d_condition
10
+ from diffusers import AutoencoderKL, EulerDiscreteScheduler, UNet2DConditionModel
11
+ import gradio as gr
12
+ import numpy as np
13
+ from huggingface_hub import InferenceClient
14
+ import os
15
+
16
+ device = "cuda"
17
+ ckpt_dir = snapshot_download(repo_id="Kwai-Kolors/Kolors")
18
+ ckpt_IPA_dir = snapshot_download(repo_id="Kwai-Kolors/Kolors-IP-Adapter-Plus")
19
+
20
+ text_encoder = ChatGLMModel.from_pretrained(f'{ckpt_dir}/text_encoder', torch_dtype=torch.float16).half().to(device)
21
+ tokenizer = ChatGLMTokenizer.from_pretrained(f'{ckpt_dir}/text_encoder')
22
+ vae = AutoencoderKL.from_pretrained(f"{ckpt_dir}/vae", revision=None).half().to(device)
23
+ scheduler = EulerDiscreteScheduler.from_pretrained(f"{ckpt_dir}/scheduler")
24
+ unet_t2i = UNet2DConditionModel.from_pretrained(f"{ckpt_dir}/unet", revision=None).half().to(device)
25
+ unet_i2i = unet_2d_condition.UNet2DConditionModel.from_pretrained(f"{ckpt_dir}/unet", revision=None).half().to(device)
26
+ image_encoder = CLIPVisionModelWithProjection.from_pretrained(f'{ckpt_IPA_dir}/image_encoder',ignore_mismatched_sizes=True).to(dtype=torch.float16, device=device)
27
+ ip_img_size = 336
28
+ clip_image_processor = CLIPImageProcessor(size=ip_img_size, crop_size=ip_img_size)
29
+
30
+ pipe_t2i = pipeline_stable_diffusion_xl_chatglm_256.StableDiffusionXLPipeline(
31
+ vae=vae,
32
+ text_encoder=text_encoder,
33
+ tokenizer=tokenizer,
34
+ unet=unet_t2i,
35
+ scheduler=scheduler,
36
+ force_zeros_for_empty_prompt=False
37
+ ).to(device)
38
+
39
+ pipe_i2i = pipeline_stable_diffusion_xl_chatglm_256_ipadapter.StableDiffusionXLPipeline(
40
+ vae=vae,
41
+ text_encoder=text_encoder,
42
+ tokenizer=tokenizer,
43
+ unet=unet_i2i,
44
+ scheduler=scheduler,
45
+ image_encoder=image_encoder,
46
+ feature_extractor=clip_image_processor,
47
+ force_zeros_for_empty_prompt=False
48
+ ).to(device)
49
+
50
+ if hasattr(pipe_i2i.unet, 'encoder_hid_proj'):
51
+ pipe_i2i.unet.text_encoder_hid_proj = pipe_i2i.unet.encoder_hid_proj
52
+
53
+ pipe_i2i.load_ip_adapter(f'{ckpt_IPA_dir}' , subfolder="", weight_name=["ip_adapter_plus_general.bin"])
54
+
55
+ MAX_SEED = np.iinfo(np.int32).max
56
+ MAX_IMAGE_SIZE = 1024
57
+
58
+ @spaces.GPU
59
+ def infer(prompt,
60
+ ip_adapter_image = None,
61
+ ip_adapter_scale = 0.5,
62
+ negative_prompt = "",
63
+ seed = 0,
64
+ randomize_seed = False,
65
+ width = 1024,
66
+ height = 1024,
67
+ guidance_scale = 5.0,
68
+ num_inference_steps = 25
69
+ ):
70
+ if randomize_seed:
71
+ seed = random.randint(0, MAX_SEED)
72
+ generator = torch.Generator().manual_seed(seed)
73
+
74
+ if ip_adapter_image is None:
75
+ pipe_t2i.to(device)
76
+ image = pipe_t2i(
77
+ prompt = prompt,
78
+ negative_prompt = negative_prompt,
79
+ guidance_scale = guidance_scale,
80
+ num_inference_steps = num_inference_steps,
81
+ width = width,
82
+ height = height,
83
+ generator = generator
84
+ ).images[0]
85
+ image.save("generated_image.jpg") # ํŒŒ์ผ ํ™•์žฅ์ž๋ฅผ .jpg๋กœ ๋ณ€๊ฒฝ
86
+ return image, "generated_image.jpg"
87
+ else:
88
+ pipe_i2i.to(device)
89
+ image_encoder.to(device)
90
+ pipe_i2i.image_encoder = image_encoder
91
+ pipe_i2i.set_ip_adapter_scale([ip_adapter_scale])
92
+ image = pipe_i2i(
93
+ prompt=prompt,
94
+ ip_adapter_image=[ip_adapter_image],
95
+ negative_prompt=negative_prompt,
96
+ height=height,
97
+ width=width,
98
+ num_inference_steps=num_inference_steps,
99
+ guidance_scale=guidance_scale,
100
+ num_images_per_prompt=1,
101
+ generator=generator
102
+ ).images[0]
103
+ image.save("generated_image.jpg") # ํŒŒ์ผ ํ™•์žฅ์ž๋ฅผ .jpg๋กœ ๋ณ€๊ฒฝ
104
+ return image, "generated_image.jpg"
105
+
106
+ css="""
107
+ #col-left {
108
+ margin: 0 auto;
109
+ max-width: 600px;
110
+ }
111
+ #col-right {
112
+ margin: 0 auto;
113
+ max-width: 750px;
114
+ }
115
+ """
116
+
117
+ with gr.Blocks(css=css) as Kolors:
118
+ with gr.Row():
119
+ with gr.Column(elem_id="col-left"):
120
+ with gr.Row():
121
+ generated_prompt = gr.Textbox(
122
+ label="ํ”„๋กฌํ”„ํŠธ ์ž…๋ ฅ",
123
+ placeholder="์ด๋ฏธ์ง€ ์ƒ์„ฑ์— ์‚ฌ์šฉํ•  ํ”„๋กฌํ”„ํŠธ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”",
124
+ lines=2
125
+ )
126
+ with gr.Row():
127
+ ip_adapter_image = gr.Image(label="Image Prompt (optional)", type="pil")
128
+ with gr.Row(visible=False): # Advanced Settings ์ˆจ๊น€
129
+ negative_prompt = gr.Textbox(
130
+ label="Negative prompt",
131
+ placeholder="Enter a negative prompt",
132
+ visible=True,
133
+ )
134
+ seed = gr.Slider(
135
+ label="Seed",
136
+ minimum=0,
137
+ maximum=MAX_SEED,
138
+ step=1,
139
+ value=0,
140
+ )
141
+ randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
142
+ with gr.Row():
143
+ width = gr.Slider(
144
+ label="Width",
145
+ minimum=256,
146
+ maximum=MAX_IMAGE_SIZE,
147
+ step=32,
148
+ value=1024,
149
+ )
150
+ height = gr.Slider(
151
+ label="Height",
152
+ minimum=256,
153
+ maximum=MAX_IMAGE_SIZE,
154
+ step=32,
155
+ value=1024,
156
+ )
157
+ with gr.Row():
158
+ guidance_scale = gr.Slider(
159
+ label="Guidance scale",
160
+ minimum=0.0,
161
+ maximum=10.0,
162
+ step=0.1,
163
+ value=5.0,
164
+ )
165
+ num_inference_steps = gr.Slider(
166
+ label="Number of inference steps",
167
+ minimum=10,
168
+ maximum=50,
169
+ step=1,
170
+ value=25,
171
+ )
172
+ with gr.Row():
173
+ ip_adapter_scale = gr.Slider(
174
+ label="Image influence scale",
175
+ info="Use 1 for creating variations",
176
+ minimum=0.0,
177
+ maximum=1.0,
178
+ step=0.05,
179
+ value=0.5,
180
+ )
181
+ with gr.Row():
182
+ run_button = gr.Button("Generate Image")
183
+
184
+ with gr.Column(elem_id="col-right"):
185
+ result = gr.Image(label="Result", show_label=False)
186
+ download_button = gr.File(label="Download Image")
187
+
188
+ # ์ด๋ฏธ์ง€ ์ƒ์„ฑ ๋ฐ ๋‹ค์šด๋กœ๋“œ ํŒŒ์ผ ๊ฒฝ๋กœ ์„ค์ •
189
+ run_button.click(
190
+ fn=infer,
191
+ inputs=[generated_prompt, ip_adapter_image, ip_adapter_scale, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps],
192
+ outputs=[result, download_button]
193
+ )
194
+
195
+ Kolors.queue().launch(debug=True)
gitattributes (14) ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
kolors___init__.py ADDED
File without changes
requirements (4).txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ accelerate==0.27.2
2
+ diffusers==0.28.2
3
+ invisible_watermark==0.2.0
4
+ torch==2.2.0
5
+ transformers==4.42.4
6
+ sentencepiece==0.1.99
7
+ huggingface_hub
8
+ gradio