Spaces:

jamino30
/

salient-style-transfer

Running on Zero

App Files Files Community

jamino30 commited on Sep 25, 2024

Commit

a9077eb

verified ·

1 Parent(s): a706eb7

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

app.py +5 -3
inference.py +44 -6

app.py CHANGED Viewed

@@ -35,7 +35,7 @@ for style_name, style_img_path in style_options.items():
     cached_style_features[style_name] = style_features
 @spaces.GPU(duration=10)
-def run(content_image, style_name, style_strength=5, progress=gr.Progress(track_tqdm=True)):
     yield None
     content_img, original_size = preprocess_img(content_image, img_size)
     content_img = content_img.to(device)
@@ -53,7 +53,8 @@ def run(content_image, style_name, style_strength=5, progress=gr.Progress(track_
         model=model,
         content_image=content_img,
         style_features=style_features,
-        lr=lrs[style_strength-1]
     )
     et = time.time()
     print('TIME TAKEN:', et-st)
@@ -78,6 +79,7 @@ with gr.Blocks(css=css) as demo:
             style_dropdown = gr.Radio(choices=list(style_options.keys()), label='Style', value='Starry Night', type='value')
             with gr.Group():
                 style_strength_slider = gr.Slider(label='Style Strength', minimum=1, maximum=10, step=1, value=5, info='Higher values add artistic flair, lower values add a realistic feel.')
             submit_button = gr.Button('Submit', variant='primary')
             examples = gr.Examples(
@@ -105,7 +107,7 @@ with gr.Blocks(css=css) as demo:
     submit_button.click(
         fn=run,
-        inputs=[content_image, style_dropdown, style_strength_slider],
         outputs=[output_image]
     ).then(
         fn=save_image,

     cached_style_features[style_name] = style_features
 @spaces.GPU(duration=10)
+def run(content_image, style_name, style_strength=5, apply_to_background=False, progress=gr.Progress(track_tqdm=True)):
     yield None
     content_img, original_size = preprocess_img(content_image, img_size)
     content_img = content_img.to(device)
         model=model,
         content_image=content_img,
         style_features=style_features,
+        lr=lrs[style_strength-1],
+        apply_to_background=apply_to_background
     )
     et = time.time()
     print('TIME TAKEN:', et-st)
             style_dropdown = gr.Radio(choices=list(style_options.keys()), label='Style', value='Starry Night', type='value')
             with gr.Group():
                 style_strength_slider = gr.Slider(label='Style Strength', minimum=1, maximum=10, step=1, value=5, info='Higher values add artistic flair, lower values add a realistic feel.')
+                apply_to_background = gr.Checkbox(label='Apply to background only')
             submit_button = gr.Button('Submit', variant='primary')
             examples = gr.Examples(
     submit_button.click(
         fn=run,
+        inputs=[content_image, style_dropdown, style_strength_slider, apply_to_background],
         outputs=[output_image]
     ).then(
         fn=save_image,

inference.py CHANGED Viewed

@@ -3,21 +3,34 @@ from tqdm import tqdm
 import torch
 import torch.optim as optim
 import torch.nn.functional as F
 def _gram_matrix(feature):
     batch_size, n_feature_maps, height, width = feature.size()
     new_feature = feature.view(batch_size * n_feature_maps, height * width)
     return torch.mm(new_feature, new_feature.t())
-def _compute_loss(generated_features, content_features, style_features, alpha, beta):
     content_loss = 0
     style_loss = 0
     w_l = 1 / len(generated_features)
-    for gf, cf, sf in zip(generated_features, content_features, style_features):
         content_loss += F.mse_loss(gf, cf)
-        G = _gram_matrix(gf)
-        A = _gram_matrix(sf)
         style_loss += w_l * F.mse_loss(G, A)
     return alpha * content_loss + beta * style_loss
 def inference(
@@ -25,11 +38,12 @@ def inference(
     model,
     content_image,
     style_features,
     lr,
     iterations=101,
     optim_caller=optim.AdamW,
     alpha=1,
-    beta=1
 ):
     generated_image = content_image.clone().requires_grad_(True)
     optimizer = optim_caller([generated_image], lr=lr)
@@ -37,17 +51,41 @@ def inference(
     with torch.no_grad():
         content_features = model(content_image)
     def closure(iter):
         optimizer.zero_grad()
         generated_features = model(generated_image)
-        total_loss = _compute_loss(generated_features, content_features, style_features, alpha, beta)
         total_loss.backward()
         min_losses[iter] = min(min_losses[iter], total_loss.item())
         return total_loss
     for iter in tqdm(range(iterations), desc='The magic is happening ✨'):
         optimizer.step(lambda: closure(iter))
         if iter % 10 == 0: print(f'Loss ({iter}):', min_losses[iter])
     return generated_image

 import torch
 import torch.optim as optim
 import torch.nn.functional as F
+from torchvision.transforms.functional import gaussian_blur
+from torchvision import models
 def _gram_matrix(feature):
     batch_size, n_feature_maps, height, width = feature.size()
     new_feature = feature.view(batch_size * n_feature_maps, height * width)
     return torch.mm(new_feature, new_feature.t())
+def _compute_loss(generated_features, content_features, style_features, resized_bg_masks, alpha, beta):
     content_loss = 0
     style_loss = 0
     w_l = 1 / len(generated_features)
+    for i, (gf, cf, sf) in enumerate(zip(generated_features, content_features, style_features)):
         content_loss += F.mse_loss(gf, cf)
+        if resized_bg_masks:
+            blurred_bg_mask = gaussian_blur(resized_bg_masks[i], kernel_size=5)
+            masked_gf = gf * blurred_bg_mask
+            masked_sf = sf * blurred_bg_mask
+            G = _gram_matrix(masked_gf)
+            A = _gram_matrix(masked_sf)
+        else:
+            G = _gram_matrix(gf)
+            A = _gram_matrix(sf)
+            style_loss += w_l * F.mse_loss(G, A)
         style_loss += w_l * F.mse_loss(G, A)
     return alpha * content_loss + beta * style_loss
 def inference(
     model,
     content_image,
     style_features,
+    apply_to_background,
     lr,
     iterations=101,
     optim_caller=optim.AdamW,
     alpha=1,
+    beta=1,
 ):
     generated_image = content_image.clone().requires_grad_(True)
     optimizer = optim_caller([generated_image], lr=lr)
     with torch.no_grad():
         content_features = model(content_image)
+        resized_bg_masks = []
+        if apply_to_background:
+            segmentation_model = models.segmentation.deeplabv3_resnet101(weights='DEFAULT').eval()
+            segmentation_model = segmentation_model.to(content_image.device)
+            segmentation_output = segmentation_model(content_image)['out']
+            segmentation_mask = segmentation_output.argmax(dim=1)
+            background_mask = (segmentation_mask == 0).float()
+            foreground_mask = (segmentation_mask != 0).float()
+            for cf in content_features:
+                _, _, h_i, w_i = cf.shape
+                bg_mask = F.interpolate(background_mask.unsqueeze(1), size=(h_i, w_i), mode='bilinear', align_corners=False)
+                resized_bg_masks.append(bg_mask)
     def closure(iter):
         optimizer.zero_grad()
         generated_features = model(generated_image)
+        total_loss = _compute_loss(
+            generated_features, content_features, style_features, resized_bg_masks, alpha, beta
+        )
         total_loss.backward()
         min_losses[iter] = min(min_losses[iter], total_loss.item())
         return total_loss
     for iter in tqdm(range(iterations), desc='The magic is happening ✨'):
         optimizer.step(lambda: closure(iter))
+        if apply_to_background:
+            with torch.no_grad():
+                foreground_mask_resized = F.interpolate(foreground_mask.unsqueeze(1), size=generated_image.shape[2:], mode='nearest')
+                generated_image.data = generated_image.data * (1 - foreground_mask_resized) + content_image.data * foreground_mask_resized
         if iter % 10 == 0: print(f'Loss ({iter}):', min_losses[iter])
     return generated_image