import os import time import datetime from tqdm import tqdm import spaces import torch import torch.optim as optim import gradio as gr from utils import preprocess_img, preprocess_img_from_path, postprocess_img from vgg19 import VGG_19 if torch.cuda.is_available(): device = 'cuda' elif torch.backends.mps.is_available(): device = 'mps' else: device = 'cpu' print('DEVICE:', device) if device == 'cuda': print('CUDA DEVICE:', torch.cuda.get_device_name()) model = VGG_19().to(device) for param in model.parameters(): param.requires_grad = False style_files = os.listdir('./style_images') style_options = {' '.join(style_file.split('.')[0].split('_')): f'./style_images/{style_file}' for style_file in style_files} optimal_settings = { 'Starry Night': (100, True), 'Lego Bricks': (100, False), 'Mosaic': (100, False), 'Oil Painting': (100, False), 'Scream': (75, True), 'Great Wave': (75, False), 'Watercolor': (10, False), } def compute_loss(generated_features, content_features, style_features, alpha, beta): content_loss = 0 style_loss = 0 for generated_feature, content_feature, style_feature in zip(generated_features, content_features, style_features): batch_size, n_feature_maps, height, width = generated_feature.size() content_loss += (torch.mean((generated_feature - content_feature) ** 2)) G = torch.mm((generated_feature.view(batch_size * n_feature_maps, height * width)), (generated_feature.view(batch_size * n_feature_maps, height * width)).t()) A = torch.mm((style_feature.view(batch_size * n_feature_maps, height * width)), (style_feature.view(batch_size * n_feature_maps, height * width)).t()) E_l = ((G - A) ** 2) w_l = 1/5 style_loss += torch.mean(w_l * E_l) return alpha * content_loss + beta * style_loss @spaces.GPU(duration=20) def inference(content_image, style_image, style_strength, output_quality, progress=gr.Progress(track_tqdm=True)): yield None print('-'*15) print('DATETIME:', datetime.datetime.now()) print('STYLE:', style_image) img_size = 1024 if output_quality else 512 content_img, original_size = preprocess_img(content_image, img_size) content_img = content_img.to(device) style_img = preprocess_img_from_path(style_options[style_image], img_size)[0].to(device) print('CONTENT IMG SIZE:', original_size) print('STYLE STRENGTH:', style_strength) print('HIGH QUALITY:', output_quality) iters = 50 # learning rate determined by input lr = 0.001 + (0.099 / 99) * (style_strength - 1) alpha = 1 beta = 1 st = time.time() generated_img = content_img.clone().requires_grad_(True) optimizer = optim.Adam([generated_img], lr=lr) content_features = model(content_img) style_features = model(style_img) for _ in tqdm(range(iters), desc='The magic is happening ✨'): generated_features = model(generated_img) total_loss = compute_loss(generated_features, content_features, style_features, alpha, beta) optimizer.zero_grad() total_loss.backward() optimizer.step() et = time.time() print('TIME TAKEN:', et-st) yield postprocess_img(generated_img, original_size) def set_slider(value): return gr.update(value=value) def update_settings(style): return optimal_settings.get(style, (50, True)) css = """ #container { margin: 0 auto; max-width: 550px; } """ with gr.Blocks(css=css) as demo: gr.HTML("