Spaces:

jamino30
/

salient-style-transfer

Sleeping

App Files Files Community

jamino30 commited on Nov 14

Commit

814e69a

•

1 Parent(s): 5464cad

Upload folder using huggingface_hub

Browse files

Files changed (13) hide show

app.py +20 -8
content_images/228.jpg +0 -0
content_images/24.jpg +0 -0
content_images/baseball.jpg +0 -0
content_images/bleachers.jpg +0 -0
content_images/dancers.jpg +0 -0
content_images/glassesman.jpg +0 -0
content_images/ladies.jpg +0 -0
content_images/messi.jpg +0 -0
content_images/motorcycle.jpg +0 -0
inference.py +14 -21
requirements.txt +102 -9
utils.py +6 -3

app.py CHANGED Viewed

@@ -12,6 +12,7 @@ from gradio_imageslider import ImageSlider
 from utils import preprocess_img, preprocess_img_from_path, postprocess_img
 from vgg.vgg19 import VGG_19
 from inference import inference
 if torch.cuda.is_available(): device = 'cuda'
@@ -20,10 +21,21 @@ else: device = 'cpu'
 print('DEVICE:', device)
 if device == 'cuda': print('CUDA DEVICE:', torch.cuda.get_device_name())
 model = VGG_19().to(device).eval()
 for param in model.parameters():
     param.requires_grad = False
-segmentation_model = models.segmentation.deeplabv3_resnet101(
     weights='DEFAULT'
 ).to(device).eval()
@@ -39,7 +51,7 @@ for style_name, style_img_path in style_options.items():
         style_features = model(style_img)
     cached_style_features[style_name] = style_features
-@spaces.GPU(duration=20)
 def run(content_image, style_name, style_strength=10):
     yield [None] * 3
     content_img, original_size = preprocess_img(content_image, img_size)
@@ -66,7 +78,7 @@ def run(content_image, style_name, style_strength=10):
     def run_inference(apply_to_background):
         return inference(
             model=model,
-            segmentation_model=segmentation_model,
             content_image=content_img,
             style_features=style_features,
             lr=lrs[style_strength-1],
@@ -81,7 +93,7 @@ def run(content_image, style_name, style_strength=10):
             future_all = executor.submit(run_inference, False)
             future_bg = executor.submit(run_inference, True)
         generated_img_all, _ = future_all.result()
-        generated_img_bg, salient_object_ratio = future_bg.result()
     et = time.time()
     print('TIME TAKEN:', et-st)
@@ -89,7 +101,7 @@ def run(content_image, style_name, style_strength=10):
     yield (
         (content_image, postprocess_img(generated_img_all, original_size)),
         (content_image, postprocess_img(generated_img_bg, original_size)),
-        f'{salient_object_ratio:.2f}'
     )
 def set_slider(value):
@@ -109,7 +121,7 @@ with gr.Blocks(css=css) as demo:
             content_image = gr.Image(label='Content', type='pil', sources=['upload', 'webcam', 'clipboard'], format='jpg', show_download_button=False)
             style_dropdown = gr.Radio(choices=list(style_options.keys()), label='Style', value='Starry Night', type='value')
             with gr.Group():
-                style_strength_slider = gr.Slider(label='Style Strength', minimum=1, maximum=10, step=1, value=10, info='Higher values add artistic flair, lower values add a realistic feel.')
             submit_button = gr.Button('Submit', variant='primary')
             examples = gr.Examples(
@@ -125,7 +137,7 @@ with gr.Blocks(css=css) as demo:
             download_button_1 = gr.DownloadButton(label='Download Styled Image', visible=False)
             with gr.Group():
                 output_image_background = ImageSlider(position=0.15, label='Styled Background', type='pil', interactive=False, show_download_button=False)
-                salient_object_ratio_label = gr.Label(label='Salient Object Ratio')
             download_button_2 = gr.DownloadButton(label='Download Styled Background', visible=False)
     def save_image(img_tuple1, img_tuple2):
@@ -142,7 +154,7 @@ with gr.Blocks(css=css) as demo:
     submit_button.click(
         fn=run,
         inputs=[content_image, style_dropdown, style_strength_slider],
-        outputs=[output_image_all, output_image_background, salient_object_ratio_label]
     ).then(
         fn=save_image,
         inputs=[output_image_all, output_image_background],

 from utils import preprocess_img, preprocess_img_from_path, postprocess_img
 from vgg.vgg19 import VGG_19
+from u2net.model import U2Net
 from inference import inference
 if torch.cuda.is_available(): device = 'cuda'
 print('DEVICE:', device)
 if device == 'cuda': print('CUDA DEVICE:', torch.cuda.get_device_name())
+def load_model_without_module(model, model_path):
+    state_dict = torch.load(model_path, map_location=device, weights_only=True)
+    new_state_dict = {}
+    for k, v in state_dict.items():
+        name = k[7:] if k.startswith('module.') else k
+        new_state_dict[name] = v
+    model.load_state_dict(new_state_dict)
 model = VGG_19().to(device).eval()
 for param in model.parameters():
     param.requires_grad = False
+# sod_model = U2Net().to(device).eval()
+# load_model_without_module(sod_model, 'u2net/saved_models/u2net-duts.pt')
+sod_model = models.segmentation.deeplabv3_resnet101(
     weights='DEFAULT'
 ).to(device).eval()
         style_features = model(style_img)
     cached_style_features[style_name] = style_features
+@spaces.GPU(duration=30)
 def run(content_image, style_name, style_strength=10):
     yield [None] * 3
     content_img, original_size = preprocess_img(content_image, img_size)
     def run_inference(apply_to_background):
         return inference(
             model=model,
+            sod_model=sod_model,
             content_image=content_img,
             style_features=style_features,
             lr=lrs[style_strength-1],
             future_all = executor.submit(run_inference, False)
             future_bg = executor.submit(run_inference, True)
         generated_img_all, _ = future_all.result()
+        generated_img_bg, bg_ratio = future_bg.result()
     et = time.time()
     print('TIME TAKEN:', et-st)
     yield (
         (content_image, postprocess_img(generated_img_all, original_size)),
         (content_image, postprocess_img(generated_img_bg, original_size)),
+        f'{bg_ratio:.2f}'
     )
 def set_slider(value):
             content_image = gr.Image(label='Content', type='pil', sources=['upload', 'webcam', 'clipboard'], format='jpg', show_download_button=False)
             style_dropdown = gr.Radio(choices=list(style_options.keys()), label='Style', value='Starry Night', type='value')
             with gr.Group():
+                style_strength_slider = gr.Slider(label='Style Strength', minimum=1, maximum=10, step=1, value=5, info='Higher values add artistic flair, lower values add a realistic feel.')
             submit_button = gr.Button('Submit', variant='primary')
             examples = gr.Examples(
             download_button_1 = gr.DownloadButton(label='Download Styled Image', visible=False)
             with gr.Group():
                 output_image_background = ImageSlider(position=0.15, label='Styled Background', type='pil', interactive=False, show_download_button=False)
+                bg_ratio_label = gr.Label(label='Background Ratio')
             download_button_2 = gr.DownloadButton(label='Download Styled Background', visible=False)
     def save_image(img_tuple1, img_tuple2):
     submit_button.click(
         fn=run,
         inputs=[content_image, style_dropdown, style_strength_slider],
+        outputs=[output_image_all, output_image_background, bg_ratio_label]
     ).then(
         fn=save_image,
         inputs=[output_image_all, output_image_background],

content_images/228.jpg ADDED Viewed

content_images/24.jpg ADDED Viewed

content_images/baseball.jpg ADDED Viewed

content_images/bleachers.jpg ADDED Viewed

content_images/dancers.jpg ADDED Viewed

content_images/glassesman.jpg ADDED Viewed

content_images/ladies.jpg ADDED Viewed

content_images/messi.jpg ADDED Viewed

content_images/motorcycle.jpg ADDED Viewed

inference.py CHANGED Viewed

@@ -6,9 +6,6 @@ import torch.optim as optim
 import torch.nn.functional as F
 from torchvision.transforms.functional import gaussian_blur
-DEV_MODE = os.environ.get('DEV_MODE', None)
-print('DEV MODE:', True if DEV_MODE else False)
 def _gram_matrix(feature):
     batch_size, n_feature_maps, height, width = feature.size()
     new_feature = feature.view(batch_size * n_feature_maps, height * width)
@@ -39,7 +36,7 @@ def _compute_loss(generated_features, content_features, style_features, resized_
 def inference(
     *,
     model,
-    segmentation_model,
     content_image,
     style_features,
     apply_to_background,
@@ -49,9 +46,6 @@ def inference(
     alpha=1,
     beta=1,
 ):
-    if DEV_MODE:
-        from torch.utils.tensorboard import SummaryWriter
-        writer = SummaryWriter()
     generated_image = content_image.clone().requires_grad_(True)
     optimizer = optim_caller([generated_image], lr=lr)
     min_losses = [float('inf')] * iterations
@@ -61,12 +55,20 @@ def inference(
         resized_bg_masks = []
         salient_object_ratio = None
-        if apply_to_background:
-            segmentation_output = segmentation_model(content_image)['out']
-            segmentation_mask = segmentation_output.argmax(dim=1)
             background_mask = (segmentation_mask == 0).float()
             foreground_mask = 1 - background_mask
             salient_object_pixel_count = foreground_mask.sum().item()
             total_pixel_count = segmentation_mask.numel()
             salient_object_ratio = salient_object_pixel_count / total_pixel_count
@@ -85,12 +87,6 @@ def inference(
         total_loss.backward()
         # log loss
-        if DEV_MODE:
-            writer.add_scalars(f'style-{"background" if apply_to_background else "image"}', {
-                'Loss/content': content_loss.item(),
-                'Loss/style': style_loss.item(),
-                'Loss/total': total_loss.item()
-            }, iter)
         min_losses[iter] = min(min_losses[iter], total_loss.item())
         return total_loss
@@ -102,8 +98,5 @@ def inference(
             with torch.no_grad():
                 foreground_mask_resized = F.interpolate(foreground_mask.unsqueeze(1), size=generated_image.shape[2:], mode='nearest')
                 generated_image.data = generated_image.data * (1 - foreground_mask_resized) + content_image.data * foreground_mask_resized
-    if DEV_MODE:
-        writer.flush()
-        writer.close()
-    return generated_image, salient_object_ratio

 import torch.nn.functional as F
 from torchvision.transforms.functional import gaussian_blur
 def _gram_matrix(feature):
     batch_size, n_feature_maps, height, width = feature.size()
     new_feature = feature.view(batch_size * n_feature_maps, height * width)
 def inference(
     *,
     model,
+    sod_model,
     content_image,
     style_features,
     apply_to_background,
     alpha=1,
     beta=1,
 ):
     generated_image = content_image.clone().requires_grad_(True)
     optimizer = optim_caller([generated_image], lr=lr)
     min_losses = [float('inf')] * iterations
         resized_bg_masks = []
         salient_object_ratio = None
+        if apply_to_background:
+            # original
+            segmentation_output = sod_model(content_image)['out'] # [1, 21, 512, 512]
+            segmentation_mask = segmentation_output.argmax(dim=1) # [1, 512, 512]
             background_mask = (segmentation_mask == 0).float()
             foreground_mask = 1 - background_mask
+            # new
+            # segmentation_output = sod_model(content_image)[0]
+            # segmentation_output = torch.sigmoid(segmentation_output)
+            # segmentation_mask = (segmentation_output > 0.7).float()
+            # background_mask = (segmentation_mask == 0).float()
+            # foreground_mask = 1 - background_mask
             salient_object_pixel_count = foreground_mask.sum().item()
             total_pixel_count = segmentation_mask.numel()
             salient_object_ratio = salient_object_pixel_count / total_pixel_count
         total_loss.backward()
         # log loss
         min_losses[iter] = min(min_losses[iter], total_loss.item())
         return total_loss
             with torch.no_grad():
                 foreground_mask_resized = F.interpolate(foreground_mask.unsqueeze(1), size=generated_image.shape[2:], mode='nearest')
                 generated_image.data = generated_image.data * (1 - foreground_mask_resized) + content_image.data * foreground_mask_resized
+    return generated_image, salient_object_ratio

requirements.txt CHANGED Viewed

@@ -1,9 +1,102 @@
---extra-index-url https://download.pytorch.org/whl/cu113
-torch
-torchvision
-pillow
-gradio
-gradio_imageslider
-spaces
-tqdm
-tensorboard

+absl-py==2.1.0
+aiofiles==23.2.1
+annotated-types==0.7.0
+anyio==4.6.0
+appnope==0.1.4
+asttokens==2.4.1
+certifi==2024.8.30
+charset-normalizer==3.3.2
+click==8.1.7
+comm==0.2.2
+contourpy==1.3.0
+cycler==0.12.1
+debugpy==1.8.7
+decorator==5.1.1
+executing==2.1.0
+fastapi==0.115.0
+ffmpy==0.4.0
+filelock==3.16.1
+fonttools==4.54.1
+fsspec==2024.9.0
+gradio==4.44.0
+gradio_client==1.3.0
+gradio_imageslider==0.0.20
+grpcio==1.66.1
+h11==0.14.0
+httpcore==1.0.5
+httpx==0.27.2
+huggingface-hub==0.25.1
+idna==3.10
+importlib_resources==6.4.5
+ipykernel==6.29.5
+ipython==8.28.0
+jedi==0.19.1
+Jinja2==3.1.4
+joblib==1.4.2
+jupyter_client==8.6.3
+jupyter_core==5.7.2
+kiwisolver==1.4.7
+Markdown==3.7
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.9.2
+matplotlib-inline==0.1.7
+mdurl==0.1.2
+mpmath==1.3.0
+nest-asyncio==1.6.0
+networkx==3.3
+numpy==2.1.1
+opencv-python==4.10.0.84
+orjson==3.10.7
+packaging==24.1
+pandas==2.2.3
+parso==0.8.4
+pexpect==4.9.0
+pillow==10.4.0
+platformdirs==4.3.6
+prompt_toolkit==3.0.48
+protobuf==5.28.2
+psutil==5.9.8
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pydantic==2.9.2
+pydantic_core==2.23.4
+pydub==0.25.1
+Pygments==2.18.0
+pyparsing==3.1.4
+python-dateutil==2.9.0.post0
+python-multipart==0.0.10
+pytz==2024.2
+PyYAML==6.0.2
+pyzmq==26.2.0
+requests==2.32.3
+rich==13.8.1
+ruff==0.6.8
+scikit-learn==1.5.2
+scipy==1.14.1
+semantic-version==2.10.0
+setuptools==75.1.0
+shellingham==1.5.4
+six==1.16.0
+sniffio==1.3.1
+spaces==0.30.2
+stack-data==0.6.3
+starlette==0.38.6
+sympy==1.13.3
+tensorboard==2.18.0
+tensorboard-data-server==0.7.2
+threadpoolctl==3.5.0
+tomlkit==0.12.0
+torch==2.4.1
+torchvision==0.19.1
+tornado==6.4.1
+tqdm==4.66.5
+traitlets==5.14.3
+typer==0.12.5
+typing_extensions==4.12.2
+tzdata==2024.2
+urllib3==2.2.3
+uvicorn==0.30.6
+wcwidth==0.2.13
+websockets==12.0
+Werkzeug==3.0.4

utils.py CHANGED Viewed

@@ -12,7 +12,8 @@ def preprocess_img(img: Image, img_size):
     transform = transforms.Compose([
         transforms.Resize((img_size, img_size)),
-        transforms.ToTensor()
     ])
     img = transform(img).unsqueeze(0)
     return img, original_size
@@ -20,9 +21,11 @@ def preprocess_img(img: Image, img_size):
 def postprocess_img(img, original_size):
     img = img.detach().cpu().squeeze(0)
-    # address tensor value scaling and quantization
     img = torch.clamp(img, 0, 1)
-    img = img.mul(255).byte()
     img = transforms.ToPILImage()(img)
     img = img.resize(original_size, Image.Resampling.LANCZOS)

     transform = transforms.Compose([
         transforms.Resize((img_size, img_size)),
+        transforms.ToTensor(),
+        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
     ])
     img = transform(img).unsqueeze(0)
     return img, original_size
 def postprocess_img(img, original_size):
     img = img.detach().cpu().squeeze(0)
+    # Denormalize the image
+    mean = torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)
+    std = torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1)
+    img = img * std + mean
     img = torch.clamp(img, 0, 1)
     img = transforms.ToPILImage()(img)
     img = img.resize(original_size, Image.Resampling.LANCZOS)