|
import time |
|
from PIL import Image |
|
from tqdm import tqdm |
|
|
|
import spaces |
|
import torch |
|
import torch.nn as nn |
|
import torch.optim as optim |
|
import torchvision.transforms as transforms |
|
import torchvision.models as models |
|
import gradio as gr |
|
|
|
if torch.cuda.is_available(): device = 'cuda' |
|
elif torch.backends.mps.is_available(): device = 'mps' |
|
else: device = 'cpu' |
|
print('DEVICE:', device) |
|
|
|
class VGG_19(nn.Module): |
|
def __init__(self): |
|
super(VGG_19, self).__init__() |
|
self.model = models.vgg19(pretrained=True).features[:30] |
|
|
|
for i, _ in enumerate(self.model): |
|
if i in [4, 9, 18, 27]: |
|
self.model[i] = nn.AvgPool2d(kernel_size=2, stride=2, padding=0) |
|
|
|
def forward(self, x): |
|
features = [] |
|
|
|
for i, layer in enumerate(self.model): |
|
x = layer(x) |
|
if i in [0, 5, 10, 19, 28]: |
|
features.append(x) |
|
return features |
|
|
|
model = VGG_19().to(device) |
|
for param in model.parameters(): |
|
param.requires_grad = False |
|
|
|
def load_img(img: Image, img_size): |
|
original_size = img.size |
|
|
|
transform = transforms.Compose([ |
|
transforms.Resize((img_size, img_size)), |
|
transforms.ToTensor() |
|
]) |
|
img = transform(img).unsqueeze(0) |
|
return img, original_size |
|
|
|
def load_img_from_path(path_to_image, img_size): |
|
img = Image.open(path_to_image) |
|
original_size = img.size |
|
|
|
transform = transforms.Compose([ |
|
transforms.Resize((img_size, img_size)), |
|
transforms.ToTensor() |
|
]) |
|
img = transform(img).unsqueeze(0) |
|
return img, original_size |
|
|
|
def save_img(img, original_size): |
|
img = img.cpu().clone() |
|
img = img.squeeze(0) |
|
|
|
|
|
img = torch.clamp(img, 0, 1) |
|
img = img.mul(255).byte() |
|
|
|
unloader = transforms.ToPILImage() |
|
img = unloader(img) |
|
|
|
img = img.resize(original_size, Image.Resampling.LANCZOS) |
|
|
|
return img |
|
|
|
|
|
style_options = { |
|
|
|
'Starry Night': 'StarryNight.jpg', |
|
'Great Wave': 'GreatWave.jpg', |
|
'Scream': 'Scream.jpg', |
|
|
|
'Lego Bricks': 'LegoBricks.jpg', |
|
'Oil Painting': 'OilPainting.jpg', |
|
'Mosaic': 'Mosaic.jpg' |
|
} |
|
style_options = {k: f'./style_images/{v}' for k, v in style_options.items()} |
|
|
|
@spaces.GPU(duration=30) |
|
def inference(content_image, style_image, style_strength, output_quality, progress=gr.Progress(track_tqdm=True)): |
|
yield None |
|
print('-'*15) |
|
print('STYLE:', style_image) |
|
img_size = 1024 if output_quality else 512 |
|
content_img, original_size = load_img(content_image, img_size) |
|
content_img = content_img.to(device) |
|
style_img = load_img_from_path(style_options[style_image], img_size)[0].to(device) |
|
|
|
print('CONTENT IMG SIZE:', original_size) |
|
|
|
iters = style_strength |
|
lr = 1e-1 |
|
alpha = 1 |
|
beta = 1 |
|
|
|
st = time.time() |
|
generated_img = content_img.clone().requires_grad_(True) |
|
optimizer = optim.Adam([generated_img], lr=lr) |
|
|
|
for _ in tqdm(range(iters), desc='The magic is happening ✨'): |
|
generated_features = model(generated_img) |
|
content_features = model(content_img) |
|
style_features = model(style_img) |
|
|
|
content_loss = 0 |
|
style_loss = 0 |
|
|
|
for generated_feature, content_feature, style_feature in zip(generated_features, content_features, style_features): |
|
batch_size, n_feature_maps, height, width = generated_feature.size() |
|
|
|
content_loss += (torch.mean((generated_feature - content_feature) ** 2)) |
|
|
|
G = torch.mm((generated_feature.view(batch_size * n_feature_maps, height * width)), (generated_feature.view(batch_size * n_feature_maps, height * width)).t()) |
|
A = torch.mm((style_feature.view(batch_size * n_feature_maps, height * width)), (style_feature.view(batch_size * n_feature_maps, height * width)).t()) |
|
|
|
E_l = ((G - A) ** 2) |
|
w_l = 1/5 |
|
style_loss += torch.mean(w_l * E_l) |
|
|
|
total_loss = alpha * content_loss + beta * style_loss |
|
optimizer.zero_grad() |
|
total_loss.backward() |
|
optimizer.step() |
|
|
|
et = time.time() |
|
print('TIME TAKEN:', et-st) |
|
yield save_img(generated_img, original_size) |
|
|
|
|
|
def set_slider(value): |
|
return gr.update(value=value) |
|
|
|
css = """ |
|
#container { |
|
margin: 0 auto; |
|
max-width: 550px; |
|
} |
|
""" |
|
|
|
with gr.Blocks(css=css) as demo: |
|
gr.HTML("<h1 style='text-align: center; padding: 10px'>🖼️ Neural Style Transfer</h1>") |
|
with gr.Column(elem_id='container'): |
|
content_and_output = gr.Image(show_label=False, type='pil', sources=['upload'], format='jpg') |
|
style_dropdown = gr.Radio(choices=list(style_options.keys()), label='Choose a style', value='Starry Night', type='value') |
|
with gr.Accordion('Adjustments', open=False): |
|
with gr.Group(): |
|
style_strength_slider = gr.Slider(label='Style Strength', minimum=0, maximum=100, step=5, value=50) |
|
with gr.Row(): |
|
low_button = gr.Button('Low').click(fn=lambda: set_slider(10), outputs=[style_strength_slider]) |
|
medium_button = gr.Button('Medium').click(fn=lambda: set_slider(50), outputs=[style_strength_slider]) |
|
high_button = gr.Button('High').click(fn=lambda: set_slider(100), outputs=[style_strength_slider]) |
|
with gr.Group(): |
|
output_quality = gr.Checkbox(label='High Quality', info='Note: This takes longer, but improves output image quality') |
|
submit_button = gr.Button('Submit') |
|
|
|
submit_button.click(fn=inference, inputs=[content_and_output, style_dropdown, style_strength_slider, output_quality], outputs=[content_and_output]) |
|
|
|
examples = gr.Examples( |
|
examples=[ |
|
['./content_images/TajMahal.jpg', 'Starry Night', 75, False], |
|
['./content_images/GoldenRetriever.jpg', 'Lego Bricks', 50, False], |
|
['./content_images/SeaTurtle.jpg', 'Mosaic', 100, False] |
|
], |
|
inputs=[content_and_output, style_dropdown, style_strength_slider, output_quality] |
|
) |
|
|
|
demo.launch(show_api=True, allowed_paths=['/tmp/gradio/']) |