Spaces:
Runtime error
Runtime error
Contrebande Labs
commited on
Commit
·
e668dd5
1
Parent(s):
69cf43b
inference code
Browse files
app.py
CHANGED
@@ -1,11 +1,207 @@
|
|
1 |
import gradio as gr
|
2 |
import jax
|
3 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
print(f"JAX devices: {jax.devices()}")
|
5 |
print(f"JAX device type: {jax.devices()[0].device_kind}")
|
6 |
|
7 |
def infer_charred(prompt):
|
8 |
# your inference function for charr stable difusion control
|
|
|
9 |
return None
|
10 |
|
11 |
|
@@ -15,7 +211,21 @@ with gr.Blocks(theme="gradio/soft") as demo:
|
|
15 |
|
16 |
with gr.Tab("Journal"):
|
17 |
gr.Markdown(
|
18 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
)
|
20 |
|
21 |
with gr.Tab("☢️ DEMO ☢️"):
|
|
|
1 |
import gradio as gr
|
2 |
import jax
|
3 |
|
4 |
+
import numpy as np
|
5 |
+
|
6 |
+
import jax
|
7 |
+
import jax.numpy as jnp
|
8 |
+
|
9 |
+
from PIL import Image
|
10 |
+
|
11 |
+
from diffusers import (
|
12 |
+
FlaxAutoencoderKL,
|
13 |
+
FlaxDPMSolverMultistepScheduler,
|
14 |
+
FlaxUNet2DConditionModel,
|
15 |
+
)
|
16 |
+
|
17 |
+
from transformers import ByT5Tokenizer, FlaxT5ForConditionalGeneration
|
18 |
+
|
19 |
+
def get_inference_lambda(seed):
|
20 |
+
|
21 |
+
tokenizer = ByT5Tokenizer()
|
22 |
+
|
23 |
+
language_model = FlaxT5ForConditionalGeneration.from_pretrained(
|
24 |
+
"google/byt5-base",
|
25 |
+
dtype=jnp.float32,
|
26 |
+
)
|
27 |
+
text_encoder = language_model.encode
|
28 |
+
text_encoder_params = language_model.params
|
29 |
+
max_length = 1024
|
30 |
+
tokenized_negative_prompt = tokenizer(
|
31 |
+
"", padding="max_length", max_length=max_length, return_tensors="np"
|
32 |
+
).input_ids
|
33 |
+
negative_prompt_text_encoder_hidden_states = text_encoder(
|
34 |
+
tokenized_negative_prompt,
|
35 |
+
params=text_encoder_params,
|
36 |
+
train=False,
|
37 |
+
)[0]
|
38 |
+
|
39 |
+
scheduler = FlaxDPMSolverMultistepScheduler.from_config(
|
40 |
+
config={
|
41 |
+
"_diffusers_version": "0.16.0",
|
42 |
+
"beta_end": 0.012,
|
43 |
+
"beta_schedule": "scaled_linear",
|
44 |
+
"beta_start": 0.00085,
|
45 |
+
"clip_sample": False,
|
46 |
+
"num_train_timesteps": 1000,
|
47 |
+
"prediction_type": "v_prediction",
|
48 |
+
"set_alpha_to_one": False,
|
49 |
+
"skip_prk_steps": True,
|
50 |
+
"steps_offset": 1,
|
51 |
+
"trained_betas": None,
|
52 |
+
}
|
53 |
+
)
|
54 |
+
timesteps = 50
|
55 |
+
guidance_scale = jnp.array([7.5], dtype=jnp.float32)
|
56 |
+
|
57 |
+
unet, unet_params = FlaxUNet2DConditionModel.from_pretrained(
|
58 |
+
"character-aware-diffusion/charred",
|
59 |
+
dtype=jnp.float32,
|
60 |
+
)
|
61 |
+
|
62 |
+
vae, vae_params = FlaxAutoencoderKL.from_pretrained(
|
63 |
+
"flax/stable-diffusion-2-1",
|
64 |
+
subfolder="vae",
|
65 |
+
dtype=jnp.float32,
|
66 |
+
)
|
67 |
+
vae_scale_factor = 2 ** (len(vae.config.block_out_channels) - 1)
|
68 |
+
|
69 |
+
image_width = image_height = 256
|
70 |
+
|
71 |
+
def __tokenize_prompt(prompt: str):
|
72 |
+
|
73 |
+
return tokenizer(
|
74 |
+
text=prompt,
|
75 |
+
max_length=1024,
|
76 |
+
padding="max_length",
|
77 |
+
truncation=True,
|
78 |
+
return_tensors="jax",
|
79 |
+
).input_ids.astype(jnp.float32)
|
80 |
+
|
81 |
+
def __convert_image(vae_output):
|
82 |
+
return [
|
83 |
+
Image.fromarray(image)
|
84 |
+
for image in (np.asarray(vae_output) * 255).round().astype(np.uint8)
|
85 |
+
]
|
86 |
+
|
87 |
+
def __predict_image(tokenized_prompt: jnp.array):
|
88 |
+
|
89 |
+
# Get the text embedding
|
90 |
+
text_encoder_hidden_states = text_encoder(
|
91 |
+
tokenized_prompt,
|
92 |
+
params=text_encoder_params,
|
93 |
+
train=False,
|
94 |
+
)[0]
|
95 |
+
|
96 |
+
context = jnp.concatenate(
|
97 |
+
[negative_prompt_text_encoder_hidden_states, text_encoder_hidden_states]
|
98 |
+
)
|
99 |
+
|
100 |
+
latent_shape = (
|
101 |
+
tokenized_prompt.shape[0],
|
102 |
+
unet.in_channels,
|
103 |
+
image_width // vae_scale_factor,
|
104 |
+
image_height // vae_scale_factor,
|
105 |
+
)
|
106 |
+
|
107 |
+
def ___timestep(step, step_args):
|
108 |
+
|
109 |
+
latents, scheduler_state = step_args
|
110 |
+
|
111 |
+
t = jnp.array(scheduler_state.timesteps, dtype=jnp.int32)[step]
|
112 |
+
|
113 |
+
# For classifier-free guidance, we need to do two forward passes.
|
114 |
+
# Here we concatenate the unconditional and text embeddings into a single batch
|
115 |
+
# to avoid doing two forward passes
|
116 |
+
latent_input = jnp.concatenate([latents] * 2)
|
117 |
+
|
118 |
+
timestep = jnp.broadcast_to(t, latent_input.shape[0])
|
119 |
+
|
120 |
+
scaled_latent_input = scheduler.scale_model_input(
|
121 |
+
scheduler_state, latent_input, t
|
122 |
+
)
|
123 |
+
|
124 |
+
# predict the noise residual
|
125 |
+
unet_prediction_sample = unet.apply(
|
126 |
+
{"params": unet_params},
|
127 |
+
jnp.array(scaled_latent_input),
|
128 |
+
jnp.array(timestep, dtype=jnp.int32),
|
129 |
+
context,
|
130 |
+
).sample
|
131 |
+
|
132 |
+
# perform guidance
|
133 |
+
unet_prediction_sample_uncond, unet_prediction_text = jnp.split(
|
134 |
+
unet_prediction_sample, 2, axis=0
|
135 |
+
)
|
136 |
+
guided_unet_prediction_sample = (
|
137 |
+
unet_prediction_sample_uncond
|
138 |
+
+ guidance_scale
|
139 |
+
* (unet_prediction_text - unet_prediction_sample_uncond)
|
140 |
+
)
|
141 |
+
|
142 |
+
# compute the previous noisy sample x_t -> x_t-1
|
143 |
+
latents, scheduler_state = scheduler.step(
|
144 |
+
scheduler_state, guided_unet_prediction_sample, t, latents
|
145 |
+
).to_tuple()
|
146 |
+
|
147 |
+
jax.debug.print("did one step...")
|
148 |
+
|
149 |
+
return latents, scheduler_state
|
150 |
+
|
151 |
+
# initialize scheduler state
|
152 |
+
initial_scheduler_state = scheduler.set_timesteps(
|
153 |
+
scheduler.create_state(), num_inference_steps=timesteps, shape=latent_shape
|
154 |
+
)
|
155 |
+
|
156 |
+
# initialize latents
|
157 |
+
initial_latents = (
|
158 |
+
jax.random.normal(
|
159 |
+
jax.random.PRNGKey(seed), shape=latent_shape, dtype=jnp.float32
|
160 |
+
)
|
161 |
+
* initial_scheduler_state.init_noise_sigma
|
162 |
+
)
|
163 |
+
|
164 |
+
final_latents, _ = jax.lax.fori_loop(
|
165 |
+
0, timesteps, ___timestep, (initial_latents, initial_scheduler_state)
|
166 |
+
)
|
167 |
+
|
168 |
+
jax.debug.print("got final latents...")
|
169 |
+
|
170 |
+
# scale and decode the image latents with vae
|
171 |
+
image = (
|
172 |
+
(
|
173 |
+
vae.apply(
|
174 |
+
{"params": vae_params},
|
175 |
+
1 / vae.config.scaling_factor * final_latents,
|
176 |
+
method=vae.decode,
|
177 |
+
).sample
|
178 |
+
/ 2
|
179 |
+
+ 0.5
|
180 |
+
)
|
181 |
+
.clip(0, 1)
|
182 |
+
.transpose(0, 2, 3, 1)
|
183 |
+
)
|
184 |
+
|
185 |
+
jax.debug.print("got vae decoded image output...")
|
186 |
+
|
187 |
+
# return reshaped vae outputs
|
188 |
+
return image
|
189 |
+
|
190 |
+
jax_pmap_predict_image = jax.jit(__predict_image)
|
191 |
+
|
192 |
+
return lambda prompt: __convert_image(
|
193 |
+
jax_pmap_predict_image(__tokenize_prompt(prompt))
|
194 |
+
)
|
195 |
+
|
196 |
+
|
197 |
+
generate_image_for_prompt = get_inference_lambda(87)
|
198 |
+
|
199 |
print(f"JAX devices: {jax.devices()}")
|
200 |
print(f"JAX device type: {jax.devices()[0].device_kind}")
|
201 |
|
202 |
def infer_charred(prompt):
|
203 |
# your inference function for charr stable difusion control
|
204 |
+
generate_image_for_prompt(prompt)
|
205 |
return None
|
206 |
|
207 |
|
|
|
211 |
|
212 |
with gr.Tab("Journal"):
|
213 |
gr.Markdown(
|
214 |
+
"""
|
215 |
+
## On How Four Crazy Fellows Embarked on Training a U-Net from Scratch in Five Days and Almost Died in the End
|
216 |
+
|
217 |
+
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Mauris vitae varius libero. Nullam laoreet eget sapien quis tristique. Cras odio odio, consequat sed cursus quis, dignissim hendrerit ligula. Curabitur non lorem tellus. Nam bibendum malesuada mi sed faucibus. Sed euismod enim metus, sit amet venenatis elit elementum vel. Duis nec rhoncus tellus, rhoncus auctor justo. Proin id gravida dolor. Sed nulla lectus, finibus non fringilla ac, fermentum in sapien. Cras lobortis est augue, vel posuere justo pretium vitae. Aliquam lorem dolor, condimentum et finibus rutrum, rhoncus eget nunc.
|
218 |
+
|
219 |
+
In varius eu nulla non tempor. Maecenas laoreet scelerisque ipsum, eu placerat enim luctus sed. In malesuada, nibh finibus finibus sollicitudin, lacus massa pulvinar sem, vel venenatis nibh sem eget lorem. Cras at augue magna. Nullam elementum porta turpis, et tristique sapien placerat vel. Etiam eu lorem malesuada, ornare leo a, commodo erat. Mauris a velit vulputate, placerat lectus vel, varius lorem. Sed volutpat porttitor venenatisLorem ipsum dolor sit amet, consectetur adipiscing elit. Mauris vitae varius libero. Nullam laoreet eget sapien quis tristique. Cras odio odio, consequat sed cursus quis, dignissim hendrerit ligula. Curabitur non lorem tellus. Nam bibendum malesuada mi sed faucibus. Sed euismod enim metus, sit amet venenatis elit elementum vel. Duis nec rhoncus tellus, rhoncus auctor justo. Proin id gravida dolor. Sed nulla lectus, finibus non fringilla ac, fermentum in sapien. Cras lobortis est augue, vel posuere justo pretium vitae. Aliquam lorem dolor, condimentum et finibus rutrum, rhoncus eget nunc.
|
220 |
+
|
221 |
+
In varius eu nulla non tempor. Maecenas laoreet scelerisque ipsum, eu placerat enim luctus sed. In malesuada, nibh finibus finibus sollicitudin, lacus massa pulvinar sem, vel venenatis nibh sem eget lorem. Cras at augue magna. Nullam elementum porta turpis, et tristique sapien placerat vel. Etiam eu lorem malesuada, ornare leo a, commodo erat. Mauris a velit vulputate, placerat lectus vel, varius lorem. Sed volutpat porttitor venenatis. Ut pellentesque at tellus ac placerat. Nulla condimentum augue euismod, tempus lorem sit amet, consequat nibh. Nunc sollicitudin nulla a neque tincidunt, id tempus dui auctor. Vivamus cursus dignissim felis.
|
222 |
+
|
223 |
+
Sed pellentesque gravida consectetur. Mauris molestie nunc quis lacinia egestas. Curabitur aliquam varius quam, nec venenatis leo efficitur a. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Ut fermentum gravida mauris, at blandit diam suscipit dapibus. Maecenas ac condimentum justo. Pellentesque aliquet risus vitae massa molestie iaculis. Quisque at libero tincidunt dui ornare vulputate. Sed tristique dolor lacinia pellentesque maximus. Donec bibendum tempus orci, eu gravida metus vehicula sit amet. Donec quis sodales neque, id consequat elit.
|
224 |
+
|
225 |
+
Sed molestie diam a massa sodales porta. Sed et ex vitae felis blandit consectetur porttitor in lectus. Interdum et malesuada fames ac ante ipsum primis in faucibus. Praesent est mi, lacinia ut egestas sed, dapibus sed augue. Sed scelerisque est a ex porta suscipit. Curabitur eleifend massa vitae suscipit finibus. Cras lobortis pellentesque est. Pellentesque semper justo nibh, vitae convallis lectus ultrices sed. Nunc auctor dignissim pretium. Praesent orci justo, posuere a diam at, tincidunt viverra leo. Quisque sit amet dignissim erat, id varius massa. Phasellus fringilla vestibulum elit, id eleifend erat hendrerit ut.
|
226 |
+
|
227 |
+
Duis scelerisque sit amet est at iaculis. Suspendisse sed ipsum vitae massa placerat semper. Pellentesque vitae sapien tristique, congue ligula sed, dapibus nunc. Suspendisse sed maximus neque, a lobortis risus. Nam lorem nisi, commodo a neque ut, volutpat porttitor ipsum. Quisque in tortor blandit, ultrices leo eget, venenatis nisl. Vestibulum ultricies sapien at sapien tincidunt vehicula vel in lacus. Sed ultricies mattis quam ac aliquet. Nulla a ullamcorper urna. Duis lacus ligula, auctor in orci sed, hendrerit maximus lectus. Nam a enim at nibh aliquam rhoncus. Pellentesque nulla justo, varius eget molestie sit amet, ultricies id tortor.
|
228 |
+
"""
|
229 |
)
|
230 |
|
231 |
with gr.Tab("☢️ DEMO ☢️"):
|