Spaces:
Runtime error
Runtime error
import os | |
os.environ["USE_NATIVE"] = "1" | |
import math | |
import torch | |
import torchvision | |
import gradio as gr | |
from PIL import Image | |
import torchvision | |
from test_ddgan import load_model, sample | |
from model_configs import get_model_config | |
from subprocess import call | |
def download(filename): | |
return "models/" + filename | |
device = 'cuda' if torch.cuda.is_available() else 'cpu' | |
cache = {} | |
def load(name): | |
if name in cache: | |
return cache[name] | |
else: | |
cfg_name = models[name] | |
model_config = get_model_config(cfg_name) | |
model_path = download(name + ".th") | |
model = load_model(model_config, model_path, device=device) | |
cache[name] = model | |
return model | |
models = { | |
"diffusion_db_128ch_1timesteps_openclip_vith14": "ddgan_ddb_v2", | |
"diffusion_db_192ch_2timesteps_openclip_vith14": 'ddgan_ddb_v3', | |
} | |
default = "diffusion_db_128ch_1timesteps_openclip_vith14" | |
load(default) | |
def gen(md, model_name, md2, text, seed, nb_samples, width, height): | |
print("load ", model_name) | |
model = load(model_name) | |
print(model) | |
torch.manual_seed(int(seed)) | |
nb_samples = int(nb_samples) | |
height = int(height) | |
width = int(width) | |
with torch.no_grad(): | |
cond = model.text_encoder([text]*nb_samples) | |
if text == "": | |
cond[0].normal_() | |
cond[1].normal_() | |
cond[0][1:] = cond[0][0:1] | |
cond[1][1:] = cond[1][0:1] | |
x_init = torch.randn(nb_samples, 3, height, width).to(device) | |
print(x_init.shape) | |
fake_sample = sample(model, x_init=x_init, cond=cond) | |
fake_sample = (fake_sample + 1) / 2 | |
grid = torchvision.utils.make_grid(fake_sample, nrow=4) | |
grid = grid.permute(1, 2, 0).cpu().numpy() | |
grid = (grid*255).astype("uint8") | |
return Image.fromarray(grid) | |
text = """ | |
Text-to-Image Denoising Diffusion GANs is a text-to-image model | |
based on Denoising Diffusion GANs <https://arxiv.org/abs/2112.07804>. | |
The code is based on their official code <<https://nvlabs.github.io/denoising-diffusion-gan/>, | |
which is updated to support text conditioning. Many thanks to the authors of DDGAN for releasing | |
the code. | |
The provided models are trained on DiffusionDB <https://arxiv.org/abs/2210.14896>, which is a dataset that was synthetically | |
generated with Stable Diffusion, many thanks to the authors for releasing the dataset. | |
Models were trained on JURECA-DC supercomputer at Jülich Supercomputing Centre (JSC), many thanks for the compute provided to train the models. | |
""" | |
iface = gr.Interface( | |
fn=gen, | |
inputs=[ | |
gr.Markdown(text), | |
# text caption | |
gr.Dropdown(list(models.keys()), value=default), | |
gr.Markdown("If text caption is empty, random CLIP embeddings will be used as input"), | |
gr.Textbox( | |
lines=1, | |
placeholder="Enter text caption here, or leave empty", | |
value="Painting of a hamster king with a crown and a cape in a magical forest." | |
), | |
gr.Number(value=0), # seed | |
gr.Number(value=4), # nb_samples | |
gr.Number(value=256), # width | |
gr.Number(value=256),# height | |
], | |
outputs="image" | |
) | |
iface.launch(debug=True) | |