Contrebande Labs commited on
Commit
e668dd5
·
1 Parent(s): 69cf43b

inference code

Browse files
Files changed (1) hide show
  1. app.py +211 -1
app.py CHANGED
@@ -1,11 +1,207 @@
1
  import gradio as gr
2
  import jax
3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  print(f"JAX devices: {jax.devices()}")
5
  print(f"JAX device type: {jax.devices()[0].device_kind}")
6
 
7
  def infer_charred(prompt):
8
  # your inference function for charr stable difusion control
 
9
  return None
10
 
11
 
@@ -15,7 +211,21 @@ with gr.Blocks(theme="gradio/soft") as demo:
15
 
16
  with gr.Tab("Journal"):
17
  gr.Markdown(
18
- "## On How Four Crazy Fellows Embarked on Training a U-Net from Scratch in Five Days and Almost Died in the End."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  )
20
 
21
  with gr.Tab("☢️ DEMO ☢️"):
 
1
  import gradio as gr
2
  import jax
3
 
4
+ import numpy as np
5
+
6
+ import jax
7
+ import jax.numpy as jnp
8
+
9
+ from PIL import Image
10
+
11
+ from diffusers import (
12
+ FlaxAutoencoderKL,
13
+ FlaxDPMSolverMultistepScheduler,
14
+ FlaxUNet2DConditionModel,
15
+ )
16
+
17
+ from transformers import ByT5Tokenizer, FlaxT5ForConditionalGeneration
18
+
19
+ def get_inference_lambda(seed):
20
+
21
+ tokenizer = ByT5Tokenizer()
22
+
23
+ language_model = FlaxT5ForConditionalGeneration.from_pretrained(
24
+ "google/byt5-base",
25
+ dtype=jnp.float32,
26
+ )
27
+ text_encoder = language_model.encode
28
+ text_encoder_params = language_model.params
29
+ max_length = 1024
30
+ tokenized_negative_prompt = tokenizer(
31
+ "", padding="max_length", max_length=max_length, return_tensors="np"
32
+ ).input_ids
33
+ negative_prompt_text_encoder_hidden_states = text_encoder(
34
+ tokenized_negative_prompt,
35
+ params=text_encoder_params,
36
+ train=False,
37
+ )[0]
38
+
39
+ scheduler = FlaxDPMSolverMultistepScheduler.from_config(
40
+ config={
41
+ "_diffusers_version": "0.16.0",
42
+ "beta_end": 0.012,
43
+ "beta_schedule": "scaled_linear",
44
+ "beta_start": 0.00085,
45
+ "clip_sample": False,
46
+ "num_train_timesteps": 1000,
47
+ "prediction_type": "v_prediction",
48
+ "set_alpha_to_one": False,
49
+ "skip_prk_steps": True,
50
+ "steps_offset": 1,
51
+ "trained_betas": None,
52
+ }
53
+ )
54
+ timesteps = 50
55
+ guidance_scale = jnp.array([7.5], dtype=jnp.float32)
56
+
57
+ unet, unet_params = FlaxUNet2DConditionModel.from_pretrained(
58
+ "character-aware-diffusion/charred",
59
+ dtype=jnp.float32,
60
+ )
61
+
62
+ vae, vae_params = FlaxAutoencoderKL.from_pretrained(
63
+ "flax/stable-diffusion-2-1",
64
+ subfolder="vae",
65
+ dtype=jnp.float32,
66
+ )
67
+ vae_scale_factor = 2 ** (len(vae.config.block_out_channels) - 1)
68
+
69
+ image_width = image_height = 256
70
+
71
+ def __tokenize_prompt(prompt: str):
72
+
73
+ return tokenizer(
74
+ text=prompt,
75
+ max_length=1024,
76
+ padding="max_length",
77
+ truncation=True,
78
+ return_tensors="jax",
79
+ ).input_ids.astype(jnp.float32)
80
+
81
+ def __convert_image(vae_output):
82
+ return [
83
+ Image.fromarray(image)
84
+ for image in (np.asarray(vae_output) * 255).round().astype(np.uint8)
85
+ ]
86
+
87
+ def __predict_image(tokenized_prompt: jnp.array):
88
+
89
+ # Get the text embedding
90
+ text_encoder_hidden_states = text_encoder(
91
+ tokenized_prompt,
92
+ params=text_encoder_params,
93
+ train=False,
94
+ )[0]
95
+
96
+ context = jnp.concatenate(
97
+ [negative_prompt_text_encoder_hidden_states, text_encoder_hidden_states]
98
+ )
99
+
100
+ latent_shape = (
101
+ tokenized_prompt.shape[0],
102
+ unet.in_channels,
103
+ image_width // vae_scale_factor,
104
+ image_height // vae_scale_factor,
105
+ )
106
+
107
+ def ___timestep(step, step_args):
108
+
109
+ latents, scheduler_state = step_args
110
+
111
+ t = jnp.array(scheduler_state.timesteps, dtype=jnp.int32)[step]
112
+
113
+ # For classifier-free guidance, we need to do two forward passes.
114
+ # Here we concatenate the unconditional and text embeddings into a single batch
115
+ # to avoid doing two forward passes
116
+ latent_input = jnp.concatenate([latents] * 2)
117
+
118
+ timestep = jnp.broadcast_to(t, latent_input.shape[0])
119
+
120
+ scaled_latent_input = scheduler.scale_model_input(
121
+ scheduler_state, latent_input, t
122
+ )
123
+
124
+ # predict the noise residual
125
+ unet_prediction_sample = unet.apply(
126
+ {"params": unet_params},
127
+ jnp.array(scaled_latent_input),
128
+ jnp.array(timestep, dtype=jnp.int32),
129
+ context,
130
+ ).sample
131
+
132
+ # perform guidance
133
+ unet_prediction_sample_uncond, unet_prediction_text = jnp.split(
134
+ unet_prediction_sample, 2, axis=0
135
+ )
136
+ guided_unet_prediction_sample = (
137
+ unet_prediction_sample_uncond
138
+ + guidance_scale
139
+ * (unet_prediction_text - unet_prediction_sample_uncond)
140
+ )
141
+
142
+ # compute the previous noisy sample x_t -> x_t-1
143
+ latents, scheduler_state = scheduler.step(
144
+ scheduler_state, guided_unet_prediction_sample, t, latents
145
+ ).to_tuple()
146
+
147
+ jax.debug.print("did one step...")
148
+
149
+ return latents, scheduler_state
150
+
151
+ # initialize scheduler state
152
+ initial_scheduler_state = scheduler.set_timesteps(
153
+ scheduler.create_state(), num_inference_steps=timesteps, shape=latent_shape
154
+ )
155
+
156
+ # initialize latents
157
+ initial_latents = (
158
+ jax.random.normal(
159
+ jax.random.PRNGKey(seed), shape=latent_shape, dtype=jnp.float32
160
+ )
161
+ * initial_scheduler_state.init_noise_sigma
162
+ )
163
+
164
+ final_latents, _ = jax.lax.fori_loop(
165
+ 0, timesteps, ___timestep, (initial_latents, initial_scheduler_state)
166
+ )
167
+
168
+ jax.debug.print("got final latents...")
169
+
170
+ # scale and decode the image latents with vae
171
+ image = (
172
+ (
173
+ vae.apply(
174
+ {"params": vae_params},
175
+ 1 / vae.config.scaling_factor * final_latents,
176
+ method=vae.decode,
177
+ ).sample
178
+ / 2
179
+ + 0.5
180
+ )
181
+ .clip(0, 1)
182
+ .transpose(0, 2, 3, 1)
183
+ )
184
+
185
+ jax.debug.print("got vae decoded image output...")
186
+
187
+ # return reshaped vae outputs
188
+ return image
189
+
190
+ jax_pmap_predict_image = jax.jit(__predict_image)
191
+
192
+ return lambda prompt: __convert_image(
193
+ jax_pmap_predict_image(__tokenize_prompt(prompt))
194
+ )
195
+
196
+
197
+ generate_image_for_prompt = get_inference_lambda(87)
198
+
199
  print(f"JAX devices: {jax.devices()}")
200
  print(f"JAX device type: {jax.devices()[0].device_kind}")
201
 
202
  def infer_charred(prompt):
203
  # your inference function for charr stable difusion control
204
+ generate_image_for_prompt(prompt)
205
  return None
206
 
207
 
 
211
 
212
  with gr.Tab("Journal"):
213
  gr.Markdown(
214
+ """
215
+ ## On How Four Crazy Fellows Embarked on Training a U-Net from Scratch in Five Days and Almost Died in the End
216
+
217
+ Lorem ipsum dolor sit amet, consectetur adipiscing elit. Mauris vitae varius libero. Nullam laoreet eget sapien quis tristique. Cras odio odio, consequat sed cursus quis, dignissim hendrerit ligula. Curabitur non lorem tellus. Nam bibendum malesuada mi sed faucibus. Sed euismod enim metus, sit amet venenatis elit elementum vel. Duis nec rhoncus tellus, rhoncus auctor justo. Proin id gravida dolor. Sed nulla lectus, finibus non fringilla ac, fermentum in sapien. Cras lobortis est augue, vel posuere justo pretium vitae. Aliquam lorem dolor, condimentum et finibus rutrum, rhoncus eget nunc.
218
+
219
+ In varius eu nulla non tempor. Maecenas laoreet scelerisque ipsum, eu placerat enim luctus sed. In malesuada, nibh finibus finibus sollicitudin, lacus massa pulvinar sem, vel venenatis nibh sem eget lorem. Cras at augue magna. Nullam elementum porta turpis, et tristique sapien placerat vel. Etiam eu lorem malesuada, ornare leo a, commodo erat. Mauris a velit vulputate, placerat lectus vel, varius lorem. Sed volutpat porttitor venenatisLorem ipsum dolor sit amet, consectetur adipiscing elit. Mauris vitae varius libero. Nullam laoreet eget sapien quis tristique. Cras odio odio, consequat sed cursus quis, dignissim hendrerit ligula. Curabitur non lorem tellus. Nam bibendum malesuada mi sed faucibus. Sed euismod enim metus, sit amet venenatis elit elementum vel. Duis nec rhoncus tellus, rhoncus auctor justo. Proin id gravida dolor. Sed nulla lectus, finibus non fringilla ac, fermentum in sapien. Cras lobortis est augue, vel posuere justo pretium vitae. Aliquam lorem dolor, condimentum et finibus rutrum, rhoncus eget nunc.
220
+
221
+ In varius eu nulla non tempor. Maecenas laoreet scelerisque ipsum, eu placerat enim luctus sed. In malesuada, nibh finibus finibus sollicitudin, lacus massa pulvinar sem, vel venenatis nibh sem eget lorem. Cras at augue magna. Nullam elementum porta turpis, et tristique sapien placerat vel. Etiam eu lorem malesuada, ornare leo a, commodo erat. Mauris a velit vulputate, placerat lectus vel, varius lorem. Sed volutpat porttitor venenatis. Ut pellentesque at tellus ac placerat. Nulla condimentum augue euismod, tempus lorem sit amet, consequat nibh. Nunc sollicitudin nulla a neque tincidunt, id tempus dui auctor. Vivamus cursus dignissim felis.
222
+
223
+ Sed pellentesque gravida consectetur. Mauris molestie nunc quis lacinia egestas. Curabitur aliquam varius quam, nec venenatis leo efficitur a. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Ut fermentum gravida mauris, at blandit diam suscipit dapibus. Maecenas ac condimentum justo. Pellentesque aliquet risus vitae massa molestie iaculis. Quisque at libero tincidunt dui ornare vulputate. Sed tristique dolor lacinia pellentesque maximus. Donec bibendum tempus orci, eu gravida metus vehicula sit amet. Donec quis sodales neque, id consequat elit.
224
+
225
+ Sed molestie diam a massa sodales porta. Sed et ex vitae felis blandit consectetur porttitor in lectus. Interdum et malesuada fames ac ante ipsum primis in faucibus. Praesent est mi, lacinia ut egestas sed, dapibus sed augue. Sed scelerisque est a ex porta suscipit. Curabitur eleifend massa vitae suscipit finibus. Cras lobortis pellentesque est. Pellentesque semper justo nibh, vitae convallis lectus ultrices sed. Nunc auctor dignissim pretium. Praesent orci justo, posuere a diam at, tincidunt viverra leo. Quisque sit amet dignissim erat, id varius massa. Phasellus fringilla vestibulum elit, id eleifend erat hendrerit ut.
226
+
227
+ Duis scelerisque sit amet est at iaculis. Suspendisse sed ipsum vitae massa placerat semper. Pellentesque vitae sapien tristique, congue ligula sed, dapibus nunc. Suspendisse sed maximus neque, a lobortis risus. Nam lorem nisi, commodo a neque ut, volutpat porttitor ipsum. Quisque in tortor blandit, ultrices leo eget, venenatis nisl. Vestibulum ultricies sapien at sapien tincidunt vehicula vel in lacus. Sed ultricies mattis quam ac aliquet. Nulla a ullamcorper urna. Duis lacus ligula, auctor in orci sed, hendrerit maximus lectus. Nam a enim at nibh aliquam rhoncus. Pellentesque nulla justo, varius eget molestie sit amet, ultricies id tortor.
228
+ """
229
  )
230
 
231
  with gr.Tab("☢️ DEMO ☢️"):