Spaces:
Runtime error
Runtime error
Create new file
Browse files
app.py
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Load an image
|
2 |
+
import gradio as gr
|
3 |
+
from gradio.components import *
|
4 |
+
import torch
|
5 |
+
from torch import autocast
|
6 |
+
from torchvision import transforms as T
|
7 |
+
|
8 |
+
from boomerang import *
|
9 |
+
|
10 |
+
def main(image, prompt, percent_noise): # percent_noise = 0.5, 0.02, 0.999
|
11 |
+
|
12 |
+
# Convert image to float and preprocess it.
|
13 |
+
# From huggingface/diffusers/blob/main/examples/unconditional_image_generation/train_unconditional.py
|
14 |
+
transform = T.Compose([T.PILToTensor(),
|
15 |
+
T.ConvertImageDtype(torch.float),
|
16 |
+
T.Normalize([0.5], [0.5])])
|
17 |
+
|
18 |
+
tensor = transform(image).half().to(pipe.device)
|
19 |
+
tensor = torch.unsqueeze(tensor, 0)
|
20 |
+
|
21 |
+
# Project image into the latent space
|
22 |
+
clean_z = pipe.vae.encode(tensor).latent_dist.mode() # From huggingface/diffusers/blob/main/src/diffusers/models/vae.py
|
23 |
+
clean_z = 0.18215 * clean_z
|
24 |
+
|
25 |
+
# Add noise to the latent variable
|
26 |
+
# (this is the forward diffusion process)
|
27 |
+
noise = torch.randn(clean_z.shape).to(pipe.device)
|
28 |
+
timestep = torch.Tensor([int(pipe.scheduler.config.num_train_timesteps * percent_noise)]).to(pipe.device).long()
|
29 |
+
z = pipe.scheduler.add_noise(clean_z, noise, timestep).half()
|
30 |
+
|
31 |
+
# Run the diffusion model
|
32 |
+
with autocast('cuda'):
|
33 |
+
# The 'num_inference_steps=100' arguments means that, if you were running
|
34 |
+
# the full diffusion model (i.e., percent_noise = 0.999), it would be sampling
|
35 |
+
# at 2x frequency compared to the normal stable diffusion model (which uses 50 steps).
|
36 |
+
# This way, running percent_noise = 0.5 yields 50 inference steps,
|
37 |
+
# and running percent_noise = 0.2 yields 20 inference steps, etc.
|
38 |
+
return pipe(prompt=prompt, latents=z, num_inference_steps=100, percent_noise=percent_noise).images[0]
|
39 |
+
|
40 |
+
|
41 |
+
|
42 |
+
|
43 |
+
|
44 |
+
|
45 |
+
gr.Interface(fn=main,
|
46 |
+
inputs=[gr.Image(type="pil", shape=(512, 512)), "text", gr.Slider(0.02, 0.999, value=0.7, label='percent noise')],
|
47 |
+
outputs=gr.Image(type="pil", shape=(512,512)),
|
48 |
+
examples=[["original.png", "person", 0.7],
|
49 |
+
['cat.png', 'cat', 0.9],
|
50 |
+
['bedroom.jpg', 'bathroom', 0.8],
|
51 |
+
['einstein.jpg', 'dog', 0.8],
|
52 |
+
['oprah.jpeg', 'pirate', 0.8]]).launch()
|