Spaces:

priyam314
/

Neural_Style_Texture

Runtime error

App Files Files Community

priyam314 commited on Mar 10, 2023

Commit

cbcb207

•

1 Parent(s): bb603a9

first commit

Browse files

Files changed (34) hide show

app.py +180 -0
makefile +3 -0
requirements.txt +57 -0
src/README.md +239 -0
src/__pycache__/neural_style_transfer.cpython-310.pyc +0 -0
src/__pycache__/reconstruct_image_from_representation.cpython-310.pyc +0 -0
src/commands.py +96 -0
src/config.yaml +16 -0
src/data/content.jpg +0 -0
src/data/style.jpg +0 -0
src/data/transfer/0000.jpg +0 -0
src/data/transfer/0001.jpg +0 -0
src/data/transfer/0002.jpg +0 -0
src/data/transfer/0003.jpg +0 -0
src/data/transfer/0004.jpg +0 -0
src/data/transfer/0005.jpg +0 -0
src/data/transfer/0006.jpg +0 -0
src/data/transfer/0007.jpg +0 -0
src/data/transfer/0008.jpg +0 -0
src/data/transfer/0009.jpg +0 -0
src/data/transfer/out.mp4 +0 -0
src/environment.yml +13 -0
src/models/definitions/__init__.py +0 -0
src/models/definitions/__pycache__/__init__.cpython-310.pyc +0 -0
src/models/definitions/__pycache__/vgg_nets.cpython-310.pyc +0 -0
src/models/definitions/vgg_nets.py +241 -0
src/neural_style_transfer.py +163 -0
src/reconstruct_image_from_representation.py +108 -0
src/utils/__init__.py +0 -0
src/utils/__pycache__/__init__.cpython-310.pyc +0 -0
src/utils/__pycache__/utils.cpython-310.pyc +0 -0
src/utils/__pycache__/video_utils.cpython-310.pyc +0 -0
src/utils/utils.py +282 -0
src/utils/video_utils.py +38 -0

app.py ADDED Viewed

	@@ -0,0 +1,180 @@

+import io
+import streamlit as st
+import numpy as np
+from src.utils import utils
+import PIL.Image as Image
+from src.reconstruct_image_from_representation import reconstruct_image_from_representation
+from src.neural_style_transfer import neural_style_transfer
+st.set_page_config(
+    page_title="Neural Style Transfer Video Generation of image reconstruction",
+    page_icon="\u2712",
+    layout="wide",
+    initial_sidebar_state="expanded",
+)
+st.header("Neural Style Transfer Video Generation")
+# Sidebar
+st.sidebar.header("Neural Style Transfer Video Generation")
+with st.sidebar.expander('About the app'):
+    st.write("""
+        Use this application to play with the Neural Style Transfer
+        by generating video of optimizer
+    """)
+# Reconstruct or Transfer
+with st.sidebar.container():
+    st.sidebar.subheader("Reconstruct or Transfer")
+    Type = st.sidebar.selectbox("Do you want to reconstruct or transfer",
+                                ["Reconstruct", "Transfer"])
+    utils.yamlSet('type', Type)
+# Optimizer
+with st.sidebar.container():
+    st.sidebar.subheader("Optimizer")
+    optimizer = st.sidebar.selectbox("Choose Optimizer", ["Adam", "LBFGS"])
+    utils.yamlSet('optimizer', optimizer)
+    iterations = st.sidebar.slider("Iterations", 10, 3000)
+    utils.yamlSet('iterations', iterations)
+    if optimizer == "Adam":
+        learning_rate = st.sidebar.slider("Learning Rate (100\u03BB)", 0.01,
+                                          90.0)
+        utils.yamlSet('learning_rate', learning_rate)
+        st.sidebar.write("\u03BB = ", learning_rate / 100.0)
+# Reconstruction
+if Type == "Reconstruct":
+    with st.sidebar.container():
+        st.sidebar.subheader("Reconstruction")
+        reconstruct = st.sidebar.selectbox("Reconstruct which image",
+                                           ('Content', 'Style'))
+        utils.yamlSet('reconstruct', reconstruct)
+# Visualization
+with st.sidebar.container():
+    st.sidebar.subheader("Visualization")
+    visualize = st.sidebar.selectbox(
+        "Do you want to visualize feature maps of reconstruct images",
+        ("Yes", "No"))
+    utils.yamlSet('visualize', visualize)
+# Model
+with st.sidebar.container():
+    st.sidebar.subheader("Model")
+    model = st.sidebar.selectbox("Choose Model",
+                                 ("VGG16", "VGG16-Experimental"))
+    utils.yamlSet('model', model)
+# # use layer
+# if model == "VGG19":
+#     with st.sidebar.container():
+#         st.sidebar.subheader("Layer Type")
+#         use = st.sidebar.selectbox("Which type of layer you want to use",
+#                                ("convolution", "relu"))
+# Init Image
+if Type == "Transfer":
+    with st.sidebar.container():
+        st.sidebar.subheader("Init Image")
+        initImage = st.sidebar.selectbox(
+            "Init Image",
+            ('Gaussian Noise Image', 'White Noise Image', 'Content', 'Style'))
+        utils.yamlSet('initImage', initImage)
+# Content Layer
+with st.sidebar.container():
+    st.sidebar.subheader("Content Layer")
+    if model == "VGG16-Experimental":
+        contentLayer = st.sidebar.selectbox(
+            "Content Layer", ('relu1_1', 'relu2_1', 'relu2_2', 'relu3_1',
+                              'relu3_2', 'relu4_1', 'relu4_3', 'relu5_1'))
+    elif model == "VGG16":
+        contentLayer = st.sidebar.selectbox(
+            "Content Layer", ('relu1_2', 'relu2_2', 'relu3_3', 'relu4_3'))
+    utils.yamlSet('contentLayer', contentLayer)
+    # elif model == "VGG19" and use == "relu":
+    #     st.sidebar.selectbox("Content Layer",
+    #                      ('relu1_1', 'relu2_1', 'relu3_1', 'relu4_1', 'relu5_1'))
+    # elif model == "VGG19" and use == "convolution":
+    #     st.sidebar.selectbox("Content Layer",
+    #                      ('conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv4_2',
+    #             'conv5_1'))
+# Height
+with st.sidebar.container():
+    st.sidebar.subheader("Height")
+    height = st.sidebar.slider("Height", 100, 6000, 400)
+    utils.yamlSet('height', height)
+# Representation saving frequency
+with st.sidebar.container():
+    st.sidebar.subheader("Representation Saving Frequency")
+    reprSavFreq = st.sidebar.slider(
+        "After how many iterations you want to save representation for "
+        "video generation", 1, 100)
+    utils.yamlSet('reprSavFreq', reprSavFreq)
+if Type == "Transfer":
+    # Content Weight
+    col1, col2 = st.columns([0.85, 0.15])
+    with col1:
+        contentWeight = st.slider("Content Weight (1000\u03B1)", 0.01, 1000.0)
+        utils.yamlSet('contentWeight', contentWeight)
+    with col2:
+        st.write("\u03B1 = ", contentWeight / 1000.0)
+    # Style Weight
+    col1, col2 = st.columns([0.85, 0.15])
+    with col1:
+        styleWeight = st.slider("Style Weight (1000\u03B2)", 0.01, 1000.0)
+        utils.yamlSet('styleWeight', styleWeight)
+    with col2:
+        st.write("\u03B2 = ", styleWeight / 1000.0)
+    # Total Variation Weight
+    col1, col2 = st.columns([0.85, 0.15])
+    with col1:
+        totalVariationWeight = st.slider("Total Variation Weight (1000\u03B3)",
+                                         0.01, 1000.0)
+        utils.yamlSet('totalVariationWeight', totalVariationWeight)
+    with col2:
+        st.write("\u03B3 = ", totalVariationWeight / 1000.0)
+# File upload
+col1, col2 = st.columns([0.5, 0.5])
+with col1:
+    contentImage = st.file_uploader('Choose Content Image', type=['jpg'])
+    if contentImage:
+        st.image(contentImage)
+        contentNumpy = np.asarray(
+            Image.open(io.BytesIO(contentImage.getvalue())))
+        contentPath = utils.save_numpy_array_as_jpg(contentNumpy, "content")
+        utils.yamlSet('contentPath', contentPath)
+with col2:
+    styleImage = st.file_uploader('Choose Style Image', type=['jpg'])
+    if styleImage:
+        st.image(styleImage)
+        styleNumpy = np.asarray(Image.open(io.BytesIO(styleImage.getvalue())))
+        stylePath = utils.save_numpy_array_as_jpg(styleNumpy, "style")
+        utils.yamlSet("stylePath", stylePath)
+submit = st.button("Submit")
+if submit:
+    utils.clearDir()
+    if Type == "Reconstruct":
+        reconstruct_image_from_representation()
+    elif Type == "Transfer":
+        neural_style_transfer()
+        video_file = open("src/data/transfer/out.mp4", "rb")
+        video_bytes = video_file.read()
+        st.video(video_bytes)

makefile ADDED Viewed

	@@ -0,0 +1,3 @@

+format:
+	yapf -i --recursive src/
+	yapf -i app.py

requirements.txt ADDED Viewed

	@@ -0,0 +1,57 @@

+altair==4.2.2
+attrs==22.2.0
+blinker==1.5
+cachetools==5.3.0
+certifi==2022.12.7
+charset-normalizer==3.0.1
+click==8.1.3
+decorator==5.1.1
+entrypoints==0.4
+gitdb==4.0.10
+GitPython==3.1.31
+idna==3.4
+importlib-metadata==6.0.0
+Jinja2==3.1.2
+jsonschema==4.17.3
+markdown-it-py==2.2.0
+MarkupSafe==2.1.2
+mdurl==0.1.2
+numpy==1.24.2
+nvidia-cublas-cu11==11.10.3.66
+nvidia-cuda-nvrtc-cu11==11.7.99
+nvidia-cuda-runtime-cu11==11.7.99
+nvidia-cudnn-cu11==8.5.0.96
+opencv-python==4.7.0.72
+packaging==23.0
+pandas==1.5.3
+Pillow==9.4.0
+protobuf==3.20.3
+pyarrow==11.0.0
+pydeck==0.8.0
+Pygments==2.14.0
+Pympler==1.0.1
+pyrsistent==0.19.3
+python-dateutil==2.8.2
+pytz==2022.7.1
+pytz-deprecation-shim==0.1.0.post0
+PyYAML==6.0
+requests==2.28.2
+rich==13.3.1
+semver==2.13.0
+six==1.16.0
+smmap==5.0.0
+streamlit==1.19.0
+toml==0.10.2
+toolz==0.12.0
+torch==1.13.1
+torchaudio==0.13.1
+torchvision==0.14.1
+tornado==6.2
+typing_extensions==4.5.0
+tzdata==2022.7
+tzlocal==4.2
+urllib3==1.26.14
+validators==0.20.0
+watchdog==2.3.0
+yapf==0.32.0
+zipp==3.15.0

src/README.md ADDED Viewed

	@@ -0,0 +1,239 @@

+## Neural Style Transfer (optimization method) :computer: + :art: = :heart:
+This repo contains a concise PyTorch implementation of the original NST paper (:link: [Gatys et al.](https://www.cv-foundation.org/openaccess/content_cvpr_2016/papers/Gatys_Image_Style_Transfer_CVPR_2016_paper.pdf)).
+It's an accompanying repository for [this video series on YouTube](https://www.youtube.com/watch?v=S78LQebx6jo&list=PLBoQnSflObcmbfshq9oNs41vODgXG-608).
+<p align="left">
+<a href="https://www.youtube.com/watch?v=S78LQebx6jo" target="_blank"><img src="https://img.youtube.com/vi/S78LQebx6jo/0.jpg"
+alt="NST Intro" width="480" height="360" border="10" /></a>
+</p>
+### What is NST algorithm?
+The algorithm transfers style from one input image (the style image) onto another input image (the content image) using CNN nets (usually VGG-16/19) and gives a composite, stylized image out which keeps the content from the content image but takes the style from the style image.
+<p align="center">
+<img src="data/examples/bridge/green_bridge_vg_la_cafe_o_lbfgs_i_content_h_500_m_vgg19_cw_100000.0_sw_30000.0_tv_1.0.jpg" width="570"/>
+<img src="data/examples/bridge/content_style.jpg" width="260"/>
+</p>
+### Why yet another NST repo?
+It's the **cleanest and most concise** NST repo that I know of + it's written in **PyTorch!** :heart:
+Most of NST repos were written in TensorFlow (before it even had L-BFGS optimizer) and torch (obsolete framework, used Lua) and are overly complicated often times including multiple functionalities (video, static image, color transfer, etc.) in 1 repo and exposing 100 parameters over command-line (out of which maybe 5 or 6 may actually be used on a regular basis).
+## Examples
+Transfering style gives beautiful artistic results:
+<p align="center">
+<img src="data/examples/bridge/green_bridge_vg_starry_night_o_lbfgs_i_content_h_500_m_vgg19_cw_100000.0_sw_30000.0_tv_1.0_resized.jpg" width="270px">
+<img src="data/examples/bridge/green_bridge_edtaonisl_o_lbfgs_i_content_h_500_m_vgg19_cw_100000.0_sw_30000.0_tv_1.0_resized.jpg" width="270px">
+<img src="data/examples/bridge/green_bridge_wave_crop_o_lbfgs_i_content_h_500_m_vgg19_cw_100000.0_sw_30000.0_tv_1.0_resized.jpg" width="270px">
+<img src="data/examples/lion/lion_candy_o_lbfgs_i_content_h_500_m_vgg19_cw_100000.0_sw_30000.0_tv_1.0_resized.jpg" width="270px">
+<img src="data/examples/lion/lion_edtaonisl_o_lbfgs_i_content_h_500_m_vgg19_cw_100000.0_sw_30000.0_tv_1.0_resized.jpg" width="270px">
+<img src="data/examples/lion/lion_vg_la_cafe_o_lbfgs_i_content_h_500_m_vgg19_cw_100000.0_sw_30000.0_tv_1.0_resized.jpg" width="270px">
+</p>
+And here are some results coupled with their style:
+<p align="center">
+<img src="data/examples/figures/figures_ben_giles_o_lbfgs_i_content_h_500_m_vgg19_cw_100000.0_sw_30000.0_tv_1.0_resized.jpg" width="400px">
+<img src="data/style-images/ben_giles.jpg" width="267px">
+<img src="data/examples/figures/figures_wave_crop_o_lbfgs_i_content_h_500_m_vgg19_cw_100000.0_sw_30000.0_tv_1.0_resized.jpg" width="400px">
+<img src="data/style-images/wave_crop.jpg" width="267px">
+<img src="data/examples/figures/figures_vg_wheat_field_w_350_m_vgg19_cw_100000.0_sw_300000.0_tv_1.0_resized.jpg" width="400px">
+<img src="data/style-images/vg_wheat_field_cropped.jpg" width="267px">
+<img src="data/examples/figures/figures_vg_starry_night_w_350_m_vgg19_cw_100000.0_sw_30000.0_tv_1.0_resized.jpg" width="400px">
+<img src="data/style-images/vg_starry_night_resized.jpg" width="267px">
+</p>
+*Note: all of the stylized images were produced by me (using this repo), credits for original image artists [are given bellow](#acknowledgements).*
+### Content/Style tradeoff
+Changing style weight gives you less or more style on the final image, assuming you keep the content weight constant. <br/>
+I did increments of 10 here for style weight (1e1, 1e2, 1e3, 1e4), while keeping content weight at constant 1e5, and I used random image as initialization image.
+<p align="center">
+<img src="data/examples/style-tradeoff/figures_vg_starry_night_o_lbfgs_i_random_h_352_m_vgg19_cw_100000.0_sw_10.0_tv_1.0_resized.jpg" width="200px">
+<img src="data/examples/style-tradeoff/figures_vg_starry_night_o_lbfgs_i_random_h_352_m_vgg19_cw_100000.0_sw_100.0_tv_1.0_resized.jpg" width="200px">
+<img src="data/examples/style-tradeoff/figures_vg_starry_night_o_lbfgs_i_random_h_352_m_vgg19_cw_100000.0_sw_1000.0_tv_1.0_resized.jpg" width="200px">
+<img src="data/examples/style-tradeoff/figures_vg_starry_night_o_lbfgs_i_random_h_352_m_vgg19_cw_100000.0_sw_10000.0_tv_1.0_resized.jpg" width="200px">
+</p>
+### Impact of total variation (tv) loss
+Rarely explained, the total variation loss i.e. it's corresponding weight controls the smoothness of the image. <br/>
+I also did increments of 10 here (1e1, 1e4, 1e5, 1e6) and I used content image as initialization image.
+<p align="center">
+<img src="data/examples/tv-tradeoff/figures_candy_o_lbfgs_i_content_h_350_m_vgg19_cw_100000.0_sw_30000.0_tv_10.0_resized.jpg" width="200px">
+<img src="data/examples/tv-tradeoff/figures_candy_o_lbfgs_i_content_h_350_m_vgg19_cw_100000.0_sw_30000.0_tv_10000.0_resized.jpg" width="200px">
+<img src="data/examples/tv-tradeoff/figures_candy_o_lbfgs_i_content_h_350_m_vgg19_cw_100000.0_sw_30000.0_tv_100000.0_resized.jpg" width="200px">
+<img src="data/examples/tv-tradeoff/figures_candy_o_lbfgs_i_content_h_350_m_vgg19_cw_100000.0_sw_30000.0_tv_1000000.0_resized.jpg" width="200px">
+</p>
+### Optimization initialization
+Starting with different initialization images: noise (white or gaussian), content and style leads to different results. <br/>
+Empirically content image gives the best results as explored in [this research paper](https://arxiv.org/pdf/1602.07188.pdf) also. <br/>
+Here you can see results for content, random and style initialization in that order (left to right):
+<p align="center">
+<img src="data/examples/init_methods/golden_gate_vg_la_cafe_o_lbfgs_i_content_h_500_m_vgg19_cw_100000.0_sw_30000.0_tv_1.0_resized.jpg" width="270px">
+<img src="data/examples/init_methods/golden_gate_vg_la_cafe_o_lbfgs_i_random_h_500_m_vgg19_cw_100000.0_sw_1000.0_tv_1.0_resized.jpg" width="270px">
+<img src="data/examples/init_methods/golden_gate_vg_la_cafe_o_lbfgs_i_style_h_500_m_vgg19_cw_100000.0_sw_10.0_tv_0.1_resized.jpg" width="270px">
+</p>
+You can also see that with style initialization we had some content from the artwork leaking directly into our output.
+### Famous "Figure 3" reconstruction
+Finally if I haven't included this portion you couldn't say that I've successfully reproduced the [original paper]((https://www.cv-foundation.org/openaccess/content_cvpr_2016/papers/Gatys_Image_Style_Transfer_CVPR_2016_paper.pdf)) (laughs in Python):
+<p align="center">
+<img src="data/examples/gatys_reconstruction/tubingen.jpg" width="300px">
+<img src="data/examples/gatys_reconstruction/tubingen_shipwreck_o_lbfgs_i_random_h_400_m_vgg19_cw_100000.0_sw_200.0_tv_1.0_resized.jpg" width="300px">
+<img src="data/examples/gatys_reconstruction/tubingen_starry-night_o_lbfgs_i_content_h_400_m_vgg19_cw_100000.0_sw_30000.0_tv_1.0.jpg" width="300px">
+<img src="data/examples/gatys_reconstruction/tubingen_the_scream_o_lbfgs_i_random_h_400_m_vgg19_cw_100000.0_sw_300.0_tv_1.0.jpg" width="300px">
+<img src="data/examples/gatys_reconstruction/tubingen_seated-nude_o_lbfgs_i_random_h_400_m_vgg19_cw_100000.0_sw_2000.0_tv_1.0.jpg" width="300px">
+<img src="data/examples/gatys_reconstruction/tubingen_kandinsky_o_lbfgs_i_content_h_400_m_vgg19_cw_100000.0_sw_30000.0_tv_1.0.jpg" width="300px">
+</p>
+I haven't give it much effort results can be much nicer.
+### Content reconstruction
+If we only use the content (perceptual) loss and try to minimize that objective function this is what we get (starting from noise):
+<p align="center">
+<img src="data/examples/content_reconstruction/0000.jpg" width="200px">
+<img src="data/examples/content_reconstruction/0026.jpg" width="200px">
+<img src="data/examples/content_reconstruction/0070.jpg" width="200px">
+<img src="data/examples/content_reconstruction/0509.jpg" width="200px">
+</p>
+In steps 0, 26, 70 and 509 of the L-BFGS numerical optimizer, using layer relu3_1 for content representation.<br/>
+Check-out [this section](#reconstruct-image-from-representation) if you want to play with this.
+### Style reconstruction
+We can do the same thing for style (on the left is the original art image "Candy") starting from noise:
+<p align="center">
+<img src="data/examples/style_reconstruction/candy.jpg" width="200px">
+<img src="data/examples/style_reconstruction/0045.jpg" width="200px">
+<img src="data/examples/style_reconstruction/0129.jpg" width="200px">
+<img src="data/examples/style_reconstruction/0510.jpg" width="200px">
+</p>
+In steps 45, 129 and 510 of the L-BFGS using layers relu1_1, relu2_1, relu3_1, relu4_1 and relu5_1 for style representation.
+## Setup
+1. Open Anaconda Prompt and navigate into project directory `cd path_to_repo`
+2. Run `conda env create` (while in project directory)
+3. Run `activate pytorch-nst`
+That's it! It should work out-of-the-box executing environment.yml file which deals with dependencies.
+-----
+PyTorch package will pull some version of CUDA with it, but it is highly recommended that you install system-wide CUDA beforehand, mostly because of GPU drivers. I also recommend using Miniconda installer as a way to get conda on your system.
+Follow through points 1 and 2 of [this setup](https://github.com/Petlja/PSIML/blob/master/docs/MachineSetup.md) and use the most up-to-date versions of Miniconda (Python 3.7) and CUDA/cuDNN.
+(I recommend CUDA 10.1 as it is compatible with PyTorch 1.4, which is used in this repo, and newest compatible cuDNN)
+## Usage
+1. Copy content images to the default content image directory: `/data/content-images/`
+2. Copy style images to the default style image directory: `/data/style-images/`
+3. Run `python neural_style_transfer.py --content_img_name <content-img-name> --style_img_name <style-img-name>`
+It's that easy. For more advanced usage take a look at the code it's (hopefully) self-explanatory (if you speak Python ^^).
+Or take a look at [this accompanying YouTube video](https://www.youtube.com/watch?v=XWMwdkaLFsI), it explains how to use this repo in greater detail.
+Just run it! So that you can get something like this: :heart:
+<p align="center">
+<img src="data/examples/taj_mahal/taj_mahal_ben_giles_o_lbfgs_i_content_h_500_m_vgg19_cw_100000.0_sw_30000.0_tv_1.0.jpg" width="615px">
+</p>
+### Debugging/Experimenting
+Q: L-BFGS can't run on my computer it takes too much GPU VRAM?<br/>
+A: Set Adam as your default and take a look at the code for initial style/content/tv weights you should use as a start point.
+Q: Output image looks too much like style image?<br/>
+A: Decrease style weight or take a look at the table of weights (in neural_style_transfer.py), which I've included, that works.
+Q: There is too much noise (image is not smooth)?<br/>
+A: Increase total variation (tv) weight (usually by multiples of 10, again the table is your friend here or just experiment yourself).
+### Reconstruct image from representation
+I've also included a file that will help you better understand how the algorithm works and what the neural net sees.<br/>
+What it does is that it allows you to visualize content **(feature maps)** and style representations **(Gram matrices)**.<br/>
+It will also reconstruct either only style or content using those representations and corresponding model that produces them. <br/>
+Just run this:<br/>
+`reconstruct_image_from_representation.py --should_reconstruct_content <Bool> --should_visualize_representation <Bool>`
+<br/><br/>
+And that's it! --should_visualize_representation if set to True will visualize these for you<br/>
+--should_reconstruct_content picks between style and content reconstruction
+Here are some feature maps (relu1_1, VGG 19) as well as a Gram matrix (relu2_1, VGG 19) for Van Gogh's famous [starry night](https://en.wikipedia.org/wiki/The_Starry_Night):
+<p align="center">
+<img src="data/examples/fms_gram/fm_vgg19_relu1_1_0005_resized.jpg" width="200px">
+<img src="data/examples/fms_gram/fm_vgg19_relu1_1_0046_resized.jpg" width="200px">
+<img src="data/examples/fms_gram/fm_vgg19_relu1_1_0058_resized.jpg" width="200px">
+<img src="data/examples/fms_gram/gram_vgg19_relu2_1_0001.jpg" width="200px">
+</p>
+No more dark magic.
+## Acknowledgements
+I found these repos useful: (while developing this one)
+* [fast_neural_style](https://github.com/pytorch/examples/tree/master/fast_neural_style) (PyTorch, feed-forward method)
+* [neural-style-tf](https://github.com/cysmith/neural-style-tf/) (TensorFlow, optimization method)
+* [neural-style](https://github.com/anishathalye/neural-style/) (TensorFlow, optimization method)
+I found some of the content/style images I was using here:
+* [style/artistic images](https://www.rawpixel.com/board/537381/vincent-van-gogh-free-original-public-domain-paintings?sort=curated&mode=shop&page=1)
+* [awesome figures pic](https://www.pexels.com/photo/action-android-device-electronics-595804/)
+* [awesome bridge pic](https://www.pexels.com/photo/gray-bridge-and-trees-814499/)
+Other images are now already classics in the NST world.
+## Citation
+If you find this code useful for your research, please cite the following:
+```
+@misc{Gordić2020nst,
+  author = {Gordić, Aleksa},
+  title = {pytorch-neural-style-transfer},
+  year = {2020},
+  publisher = {GitHub},
+  journal = {GitHub repository},
+  howpublished = {\url{https://github.com/gordicaleksa/pytorch-neural-style-transfer}},
+}
+```
+## Connect with me
+If you'd love to have some more AI-related content in your life :nerd_face:, consider:
+* Subscribing to my YouTube channel [The AI Epiphany](https://www.youtube.com/c/TheAiEpiphany) :bell:
+* Follow me on [LinkedIn](https://www.linkedin.com/in/aleksagordic/) and [Twitter](https://twitter.com/gordic_aleksa) :bulb:
+* Follow me on [Medium](https://gordicaleksa.medium.com/) :books: :heart:
+## Licence
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://github.com/gordicaleksa/pytorch-neural-style-transfer/blob/master/LICENCE)

src/__pycache__/neural_style_transfer.cpython-310.pyc ADDED Viewed

Binary file (5.46 kB). View file

src/__pycache__/reconstruct_image_from_representation.cpython-310.pyc ADDED Viewed

Binary file (3.34 kB). View file

src/commands.py ADDED Viewed

	@@ -0,0 +1,96 @@

+from abc import ABC, abstractmethod
+import numpy as np
+import utils
+import torch
+class Tuning(ABC):
+    @abstractmethod
+    def Image(self, image):
+        pass
+class TuningReconstruction(Tuning):
+    def __init__(self, model, optimizer, target_representation,
+                 content_feature_maps_index, style_feature_maps_indices):
+        self.model = model
+        self.optimizer = optimizer
+        self.target_representation = target_representation
+        self.content_feature_maps_index = content_feature_maps_index
+        self.style_feature_maps_indices = style_feature_maps_indices
+    def Image(self, image):
+        # Finds the current representation
+        set_of_feature_maps = self.model(image)
+        if utils.yamlGet('reconstruct') == 'Content':
+            current_representation = set_of_feature_maps[
+                self.content_feature_maps_index].squeeze(axis=0)
+        elif utils.yamlGet('reconstruct') == 'Style':
+            current_representation = [
+                utils.gram_matrix(fmaps)
+                for i, fmaps in enumerate(set_of_feature_maps)
+                if i in self.style_feature_maps_indices
+            ]
+        loss = 0.0
+        if utils.yamlGet('reconstruct') == 'Content':
+            loss = torch.nn.MSELoss(reduction='mean')(
+                self.target_representation, current_representation)
+        elif utils.yamlGet('reconstruct') == 'Style':
+            for gram_gt, gram_hat in zip(self.target_representation,
+                                         current_representation):
+                loss += (1 / len(self.target_representation)) * \
+                    torch.nn.MSELoss(
+                    reduction='sum')(gram_gt[0], gram_hat[0])
+        loss.backward()
+        self.optimizer.step()
+        self.optimizer.zero_grad()
+        return loss.item(), current_representation
+class Reconstruct(ABC):
+    @abstractmethod
+    def Visualize(self):
+        pass
+class ContentReconstruct(Reconstruct):
+    """
+        tcr -> target_content_representation
+    """
+    def __init__(self, feature_maps):
+        self.fm = feature_maps
+        self.tcr = self.fm['set_of_feature_maps'][
+            self.fm['content_feature_maps_index_name'][0]].squeeze(axis=0)
+        self.nfm = self.tcr.size()[0]
+    def Visualize(self):
+        for i in range(self.nfm):
+            feature_map = self.tcr[i].to('cpu').numpy()
+            feature_map = np.uint8(utils.get_uint8_range(feature_map))
+            # plt.imshow(feature_map)
+            # plt.title(
+            #     f'Feature map {i+1}/{num_of_feature_maps} from layer'
+            #     f' {content_feature_maps_index_name[1]} '
+            #     f'(model={config["model"]}) for'
+            #     f' {config["content_img_name"]} image.'
+            # )
+            # plt.show()
+            filename = f'fm_{config["model"]}_{content_feature_maps_index_name[1]}_{str(i).zfill(config["img_format"][0])}{config["img_format"][1]}'
+            utils.save_image(feature_map, os.path.join(dump_path, filename))
+class StyleReconstruct(Reconstruct):
+    pass
+class Invoker:
+    pass

src/config.yaml ADDED Viewed

	@@ -0,0 +1,16 @@

+contentLayer: relu4_3
+contentPath: src/data/content.jpg
+contentWeight: 475.71
+height: 400
+initImage: Content
+iterations: 10
+learning_rate: 0.01
+model: VGG16
+optimizer: Adam
+reconstruct: Content
+reprSavFreq: 1
+stylePath: src/data/style.jpg
+styleWeight: 307.7
+totalVariationWeight: 854.25
+type: Transfer
+visualize: 'Yes'

src/data/content.jpg ADDED Viewed

src/data/style.jpg ADDED Viewed

src/data/transfer/0000.jpg ADDED Viewed

src/data/transfer/0001.jpg ADDED Viewed

src/data/transfer/0002.jpg ADDED Viewed

src/data/transfer/0003.jpg ADDED Viewed

src/data/transfer/0004.jpg ADDED Viewed

src/data/transfer/0005.jpg ADDED Viewed

src/data/transfer/0006.jpg ADDED Viewed

src/data/transfer/0007.jpg ADDED Viewed

src/data/transfer/0008.jpg ADDED Viewed

src/data/transfer/0009.jpg ADDED Viewed

src/data/transfer/out.mp4 ADDED Viewed

Binary file (20.1 kB). View file

src/environment.yml ADDED Viewed

	@@ -0,0 +1,13 @@

+name: pytorch-nst
+channels:
+  - defaults
+  - pytorch
+dependencies:
+  - python=3.7.6
+  - pip=20.0.2
+  - matplotlib=3.1.3
+  - pytorch==1.4.0
+  - torchvision=0.5.0
+  - pip:
+    - numpy==1.18.1
+    - opencv-python==4.2.0.32

src/models/definitions/__init__.py ADDED Viewed

File without changes

src/models/definitions/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (191 Bytes). View file

src/models/definitions/__pycache__/vgg_nets.cpython-310.pyc ADDED Viewed

Binary file (6.01 kB). View file

src/models/definitions/vgg_nets.py ADDED Viewed

	@@ -0,0 +1,241 @@

+from collections import namedtuple
+import torch
+from torchvision import models
+from src.utils import utils
+"""
+    More detail about the VGG architecture (if you want to understand magic/hardcoded numbers) can be found here:
+    https://github.com/pytorch/vision/blob/3c254fb7af5f8af252c24e89949c54a3461ff0be/torchvision/models/vgg.py
+"""
+class Vgg16(torch.nn.Module):
+    """Only those layers are exposed which have already proven to work nicely."""
+    def __init__(self, requires_grad=False, show_progress=False):
+        super().__init__()
+        vgg_pretrained_features = models.vgg16(pretrained=True,
+                                               progress=show_progress).features
+        self.layer_names = {'relu1_2': 1, 'relu2_2': 2,
+                            'relu3_3': 3, 'relu4_3': 4}
+        self.content_feature_maps_index = self.layer_names[
+            utils.yamlGet('contentLayer')]-1  # relu2_2
+        self.style_feature_maps_indices = list(range(len(
+            self.layer_names)))  # all layers used for style representation
+        self.slice1 = torch.nn.Sequential()
+        self.slice2 = torch.nn.Sequential()
+        self.slice3 = torch.nn.Sequential()
+        self.slice4 = torch.nn.Sequential()
+        for x in range(4):
+            self.slice1.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(4, 9):
+            self.slice2.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(9, 16):
+            self.slice3.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(16, 23):
+            self.slice4.add_module(str(x), vgg_pretrained_features[x])
+        if not requires_grad:
+            for param in self.parameters():
+                param.requires_grad = False
+    def forward(self, x):
+        x = self.slice1(x)
+        relu1_2 = x
+        x = self.slice2(x)
+        relu2_2 = x
+        x = self.slice3(x)
+        relu3_3 = x
+        x = self.slice4(x)
+        relu4_3 = x
+        vgg_outputs = namedtuple("VggOutputs", self.layer_names.keys())
+        out = vgg_outputs(relu1_2, relu2_2, relu3_3, relu4_3)
+        return out
+class Vgg16Experimental(torch.nn.Module):
+    """Everything exposed so you can play with different combinations for style and content representation"""
+    def __init__(self, requires_grad=False, show_progress=False):
+        super().__init__()
+        vgg_pretrained_features = models.vgg16(pretrained=True,
+                                               progress=show_progress).features
+        self.layer_names = [
+            'relu1_1', 'relu2_1', 'relu2_2', 'relu3_1', 'relu3_2', 'relu4_1',
+            'relu4_3', 'relu5_1'
+        ]
+        self.content_feature_maps_index = 4
+        self.style_feature_maps_indices = list(range(len(
+            self.layer_names)))  # all layers used for style representation
+        self.conv1_1 = vgg_pretrained_features[0]
+        self.relu1_1 = vgg_pretrained_features[1]
+        self.conv1_2 = vgg_pretrained_features[2]
+        self.relu1_2 = vgg_pretrained_features[3]
+        self.max_pooling1 = vgg_pretrained_features[4]
+        self.conv2_1 = vgg_pretrained_features[5]
+        self.relu2_1 = vgg_pretrained_features[6]
+        self.conv2_2 = vgg_pretrained_features[7]
+        self.relu2_2 = vgg_pretrained_features[8]
+        self.max_pooling2 = vgg_pretrained_features[9]
+        self.conv3_1 = vgg_pretrained_features[10]
+        self.relu3_1 = vgg_pretrained_features[11]
+        self.conv3_2 = vgg_pretrained_features[12]
+        self.relu3_2 = vgg_pretrained_features[13]
+        self.conv3_3 = vgg_pretrained_features[14]
+        self.relu3_3 = vgg_pretrained_features[15]
+        self.max_pooling3 = vgg_pretrained_features[16]
+        self.conv4_1 = vgg_pretrained_features[17]
+        self.relu4_1 = vgg_pretrained_features[18]
+        self.conv4_2 = vgg_pretrained_features[19]
+        self.relu4_2 = vgg_pretrained_features[20]
+        self.conv4_3 = vgg_pretrained_features[21]
+        self.relu4_3 = vgg_pretrained_features[22]
+        self.max_pooling4 = vgg_pretrained_features[23]
+        self.conv5_1 = vgg_pretrained_features[24]
+        self.relu5_1 = vgg_pretrained_features[25]
+        self.conv5_2 = vgg_pretrained_features[26]
+        self.relu5_2 = vgg_pretrained_features[27]
+        self.conv5_3 = vgg_pretrained_features[28]
+        self.relu5_3 = vgg_pretrained_features[29]
+        self.max_pooling5 = vgg_pretrained_features[30]
+        if not requires_grad:
+            for param in self.parameters():
+                param.requires_grad = False
+    def forward(self, x):
+        x = self.conv1_1(x)
+        conv1_1 = x
+        x = self.relu1_1(x)
+        relu1_1 = x
+        x = self.conv1_2(x)
+        conv1_2 = x
+        x = self.relu1_2(x)
+        relu1_2 = x
+        x = self.max_pooling1(x)
+        x = self.conv2_1(x)
+        conv2_1 = x
+        x = self.relu2_1(x)
+        relu2_1 = x
+        x = self.conv2_2(x)
+        conv2_2 = x
+        x = self.relu2_2(x)
+        relu2_2 = x
+        x = self.max_pooling2(x)
+        x = self.conv3_1(x)
+        conv3_1 = x
+        x = self.relu3_1(x)
+        relu3_1 = x
+        x = self.conv3_2(x)
+        conv3_2 = x
+        x = self.relu3_2(x)
+        relu3_2 = x
+        x = self.conv3_3(x)
+        conv3_3 = x
+        x = self.relu3_3(x)
+        relu3_3 = x
+        x = self.max_pooling3(x)
+        x = self.conv4_1(x)
+        conv4_1 = x
+        x = self.relu4_1(x)
+        relu4_1 = x
+        x = self.conv4_2(x)
+        conv4_2 = x
+        x = self.relu4_2(x)
+        relu4_2 = x
+        x = self.conv4_3(x)
+        conv4_3 = x
+        x = self.relu4_3(x)
+        relu4_3 = x
+        x = self.max_pooling4(x)
+        x = self.conv5_1(x)
+        conv5_1 = x
+        x = self.relu5_1(x)
+        relu5_1 = x
+        x = self.conv5_2(x)
+        conv5_2 = x
+        x = self.relu5_2(x)
+        relu5_2 = x
+        x = self.conv5_3(x)
+        conv5_3 = x
+        x = self.relu5_3(x)
+        relu5_3 = x
+        x = self.max_pooling5(x)
+        # expose only the layers that you want to experiment with here
+        vgg_outputs = namedtuple("VggOutputs", self.layer_names)
+        out = vgg_outputs(relu1_1, relu2_1, relu2_2, relu3_1, relu3_2, relu4_1,
+                          relu4_3, relu5_1)
+        return out
+class Vgg19(torch.nn.Module):
+    """
+    Used in the original NST paper, only those layers are exposed which were used in the original paper
+    'conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1' were used for style representation
+    'conv4_2' was used for content representation (although they did some experiments with conv2_2 and conv5_2)
+    """
+    def __init__(self,
+                 requires_grad=False,
+                 show_progress=False,
+                 use_relu=True):
+        super().__init__()
+        vgg_pretrained_features = models.vgg19(pretrained=True,
+                                               progress=show_progress).features
+        if use_relu:  # use relu or as in original paper conv layers
+            self.layer_names = [
+                'relu1_1', 'relu2_1', 'relu3_1', 'relu4_1', 'relu5_1'
+            ]
+            self.offset = 1
+        else:
+            self.layer_names = [
+                'conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv4_2',
+                'conv5_1'
+            ]
+            self.offset = 0
+        self.content_feature_maps_index = 4  # conv4_2
+        # all layers used for style representation except conv4_2
+        self.style_feature_maps_indices = list(range(len(self.layer_names)))
+        self.style_feature_maps_indices.remove(4)  # conv4_2
+        self.slice1 = torch.nn.Sequential()
+        self.slice2 = torch.nn.Sequential()
+        self.slice3 = torch.nn.Sequential()
+        self.slice4 = torch.nn.Sequential()
+        self.slice5 = torch.nn.Sequential()
+        self.slice6 = torch.nn.Sequential()
+        for x in range(1 + self.offset):
+            self.slice1.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(1 + self.offset, 6 + self.offset):
+            self.slice2.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(6 + self.offset, 11 + self.offset):
+            self.slice3.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(11 + self.offset, 20 + self.offset):
+            self.slice4.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(20 + self.offset, 22):
+            self.slice5.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(22, 29 + +self.offset):
+            self.slice6.add_module(str(x), vgg_pretrained_features[x])
+        if not requires_grad:
+            for param in self.parameters():
+                param.requires_grad = False
+    def forward(self, x):
+        x = self.slice1(x)
+        layer1_1 = x
+        x = self.slice2(x)
+        layer2_1 = x
+        x = self.slice3(x)
+        layer3_1 = x
+        x = self.slice4(x)
+        layer4_1 = x
+        x = self.slice5(x)
+        conv4_2 = x
+        x = self.slice6(x)
+        layer5_1 = x
+        vgg_outputs = namedtuple("VggOutputs", self.layer_names)
+        out = vgg_outputs(layer1_1, layer2_1, layer3_1, layer4_1, conv4_2,
+                          layer5_1)
+        return out

src/neural_style_transfer.py ADDED Viewed

	@@ -0,0 +1,163 @@

+import os
+import src.utils.utils as utils
+from src.utils.video_utils import create_video_from_intermediate_results
+import torch
+from torch import nn
+from torch.optim import Adam, LBFGS
+from torch.autograd import Variable
+class ContentLoss(nn.Module):
+    def __init__(self, target):
+        super(ContentLoss, self).__init__()
+        self.target = target.detach()
+    def forward(self, current):
+        return nn.MSELoss(reduction='mean')(self.target, current)
+class StyleLoss(nn.Module):
+    def __init__(self):
+        super(StyleLoss, self).__init__()
+        self.loss = 0.0
+    def forward(self, x, y):
+        for gram_gt, gram_hat in zip(x, y):
+            self.loss += torch.nn.MSELoss(reduction='sum')(gram_gt[0], gram_hat[0])
+        self.loss /= len(x)
+        return self.loss
+class Build(nn.Module):
+    def __init__(
+            self,
+            config,
+            target_content_representation,
+            target_style_representation,
+        ):
+        super(Build, self).__init__()
+        self.current_set_of_feature_maps = None
+        self.current_content_representation = None
+        self.current_Style_representation = None
+        self.config = config
+        self.target_content_representation = target_content_representation
+        self.target_style_representation = target_style_representation
+    def forward(self, model, x):
+        self.current_set_of_feature_maps = model(x)
+        self.current_content_representation = self.current_set_of_feature_maps[
+            self.config.content_feature_maps_index].squeeze(axis=0)
+        self.current_style_representation = [
+            utils.gram_matrix(x)
+            for cnt, x in enumerate(self.current_set_of_feature_maps)
+            if cnt in self.config.style_feature_maps_indices
+        ]
+        content_loss = ContentLoss(self.target_content_representation)(
+            self.current_content_representation)
+        style_loss = StyleLoss()(
+            self.target_style_representation,
+            self.current_style_representation)
+        tv_loss = TotalVariationLoss(x)()
+        return Loss()(content_loss, style_loss, tv_loss)
+class TotalVariationLoss(nn.Module):
+    def __init__(self, y):
+        super(TotalVariationLoss, self).__init__()
+        self.first = torch.sum(torch.abs(y[:, :, :, :-1] - y[:, :, :, 1:]))
+        self.second = torch.sum(torch.abs(y[:, :, :-1, :] - y[:, :, 1:, :]))
+    def forward(self):
+        return self.first + self.second
+class Loss(nn.Module):
+    def __init__(self):
+        super(Loss, self).__init__()
+    def forward(self, x, y, z):
+        return utils.yamlGet("contentWeight") * x + utils.yamlGet("styleWeight") * y + utils.yamlGet("totalVariationWeight") * z
+def neural_style_transfer():
+    dump_path = os.path.join(os.path.dirname(__file__), "data/transfer")
+    config = utils.Config()
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    content_img, style_img, init_img = utils.Images().getImages(device)
+    optimizing_img = Variable(init_img, requires_grad=True)
+    output = list(utils.prepare_model(device))
+    neural_net = output[0]
+    content_feature_maps_index_name = output[1]
+    style_feature_maps_indices_names = output[2]
+    config.content_feature_maps_index = content_feature_maps_index_name[0]
+    config.style_feature_maps_indices = style_feature_maps_indices_names[0]
+    content_img_set_of_feature_maps = neural_net(content_img)
+    style_img_set_of_feature_maps = neural_net(style_img)
+    target_content_representation = content_img_set_of_feature_maps[
+        config.content_feature_maps_index].squeeze(axis=0)
+    target_style_representation = [
+        utils.gram_matrix(x)
+        for cnt, x in enumerate(style_img_set_of_feature_maps)
+        if cnt in config.style_feature_maps_indices
+    ]
+    if utils.yamlGet('optimizer') == 'Adam':
+        optimizer = Adam((optimizing_img, ), lr=utils.yamlGet('learning_rate'))
+        for cnt in range(utils.yamlGet("iterations")):
+            total_loss = Build(config, target_content_representation,
+                               target_style_representation)(neural_net,
+                                                            optimizing_img)
+            total_loss.backward()
+            optimizer.step()
+            optimizer.zero_grad()
+            with torch.no_grad():
+                utils.save_optimizing_image(optimizing_img, dump_path, cnt)
+    elif utils.yamlGet('optimizer') == 'LBFGS':
+        optimizer = LBFGS((optimizing_img, ),
+                          max_iter=utils.yamlGet('iterations'),
+                          line_search_fn='strong_wolfe')
+        def closure():
+            total_loss, _, _, _ = build_loss(
+                neural_net, optimizing_img, target_content_representation,
+                target_style_representation, config)
+            total_loss.backward()
+            optimizer.zero_grad()
+            with torch.no_grad():
+                utils.save_optimizing_image(optimizing_img, dump_path, cnt)
+            return total_loss
+        for cnt in range(utils.yamlGet("iterations")):
+            optimizer.step(closure)
+    create_video_from_intermediate_results(dump_path)
+# some values of weights that worked for figures.jpg, vg_starry_night.jpg
+# (starting point for finding good images)
+# once you understand what each one does it gets really easy -> also see
+# README.md
+# lbfgs, content init -> (cw, sw, tv) = (1e5, 3e4, 1e0)
+# lbfgs, style   init -> (cw, sw, tv) = (1e5, 1e1, 1e-1)
+# lbfgs, random  init -> (cw, sw, tv) = (1e5, 1e3, 1e0)
+# adam, content init -> (cw, sw, tv, lr) = (1e5, 1e5, 1e-1, 1e1)
+# adam, style   init -> (cw, sw, tv, lr) = (1e5, 1e2, 1e-1, 1e1)
+# adam, random  init -> (cw, sw, tv, lr) = (1e5, 1e2, 1e-1, 1e1)
+# original NST Neural Style Transfer) algorithm (Gatys et al.)
+# results_path = neural_style_transfer()
+# create_video_from_intermediate_results(results_path)

src/reconstruct_image_from_representation.py ADDED Viewed

	@@ -0,0 +1,108 @@

+import os
+import src.utils.utils as utils
+from src.utils.video_utils import create_video_from_intermediate_results
+import torch
+from torch.autograd import Variable
+from torch.optim import Adam, LBFGS
+import numpy as np
+def make_tuning_step(optimizer, config):
+    def tuning_step(optimizing_img):
+        config.current_set_of_feature_maps = config.neural_net(optimizing_img)
+        loss, config.current_representation = utils.getCurrentData(config)
+        loss.backward()
+        optimizer.step()
+        optimizer.zero_grad()
+        return loss.item(), config.current_representation
+    return tuning_step
+def reconstruct_image_from_representation():
+    dump_path = os.path.join(os.path.dirname(__file__), "data/reconstruct")
+    config = utils.Config()
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    img, img_path = utils.getImageAndPath(device)
+    white_noise_img = np.random.uniform(-90., 90.,
+                                        img.shape).astype(np.float32)
+    init_img = torch.from_numpy(white_noise_img).float().to(device)
+    optimizing_img = Variable(init_img, requires_grad=True)
+    # indices pick relevant feature maps (say conv4_1, relu1_1, etc.)
+    output = list(utils.prepare_model(device))
+    config.neural_net = output[0]
+    content_feature_maps_index_name = output[1]
+    style_feature_maps_indices_names = output[2]
+    config.content_feature_maps_index = content_feature_maps_index_name[0]
+    config.style_feature_maps_indices = style_feature_maps_indices_names[0]
+    config.current_set_of_feature_maps = config.neural_net(img)
+    config.target_content_representation = config.current_set_of_feature_maps[
+        config.content_feature_maps_index].squeeze(axis=0)
+    config.target_style_representation = [
+        utils.gram_matrix(fmaps)
+        for i, fmaps in enumerate(config.current_set_of_feature_maps)
+        if i in config.style_feature_maps_indices
+    ]
+    if utils.yamlGet('reconstruct') == "Content":
+        config.target_representation = config.target_content_representation
+        num_of_feature_maps = config.target_content_representation.size()[0]
+        for i in range(num_of_feature_maps):
+            feature_map = config.target_content_representation[i].to(
+                'cpu').numpy()
+            feature_map = np.uint8(utils.get_uint8_range(feature_map))
+            # filename = f'fm_{config["model"]}_{content_feature_maps_index_name[1]}_{str(i).zfill(config["img_format"][0])}{config["img_format"][1]}'
+            # utils.save_image(feature_map, os.path.join(dump_path, filename))
+    elif utils.yamlGet('reconstruct') == "Style":
+        config.target_representation = config.target_style_representation
+        num_of_gram_matrices = len(config.target_style_representation)
+        for i in range(num_of_gram_matrices):
+            Gram_matrix = config.target_style_representation[i].squeeze(
+                axis=0).to('cpu').numpy()
+            Gram_matrix = np.uint8(utils.get_uint8_range(Gram_matrix))
+            # filename = f'gram_{config["model"]}_{style_feature_maps_indices_names[1][i]}_{str(i).zfill(config["img_format"][0])}{config["img_format"][1]}'
+            # utils.save_image(Gram_matrix, os.path.join(dump_path, filename))
+    if utils.yamlGet('optimizer') == 'Adam':
+        optimizer = Adam((optimizing_img, ), lr=utils.yamlGet('learning_rate'))
+        tuning_step = make_tuning_step(optimizer, config)
+        for it in range(utils.yamlGet('optimizer')):
+            tuning_step(optimizing_img)
+            with torch.no_grad():
+                utils.save_optimizing_image(optimizing_img, dump_path, it)
+    elif utils.yamlGet('optimizer') == 'LBFGS':
+        optimizer = LBFGS((optimizing_img, ),
+                          max_iter=utils.yamlGet('optimizer'),
+                          line_search_fn='strong_wolfe')
+        cnt = 0
+        def closure():
+            nonlocal cnt
+            loss = utils.getLBFGSReconstructLoss(config, optimizing_img)
+            loss.backward()
+            with torch.no_grad():
+                utils.save_optimizing_image(optimizing_img, dump_path, cnt)
+                cnt += 1
+            return loss
+        optimizer.step(closure)
+    return dump_path
+if __name__ == "__main__":
+    # reconstruct style or content image purely from their representation
+    results_path = reconstruct_image_from_representation()
+    create_video_from_intermediate_results(results_path)

src/utils/__init__.py ADDED Viewed

File without changes

src/utils/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (178 Bytes). View file

src/utils/__pycache__/utils.cpython-310.pyc ADDED Viewed

Binary file (8.67 kB). View file

src/utils/__pycache__/video_utils.cpython-310.pyc ADDED Viewed

Binary file (1.07 kB). View file

src/utils/utils.py ADDED Viewed

	@@ -0,0 +1,282 @@

+import cv2 as cv
+import numpy as np
+import torch
+from torchvision import transforms
+import os
+import yaml
+import PIL.Image as Image
+from src.models.definitions.vgg_nets import Vgg16, Vgg19, Vgg16Experimental
+IMAGENET_MEAN_255 = [123.675, 116.28, 103.53]
+IMAGENET_STD_NEUTRAL = [1, 1, 1]
+def load_image(img_path, target_shape=None):
+    if not os.path.exists(img_path):
+        raise Exception(f'Path does not exist: {img_path}')
+    img = cv.imread(img_path)[:, :, ::-1]
+    if target_shape is not None:  # resize section
+        current_height, current_width = img.shape[:2]
+        new_height = target_shape
+        new_width = int(current_width * (new_height / current_height))
+        img = cv.resize(img, (new_width, new_height),
+                        interpolation=cv.INTER_CUBIC)
+    # this need to go after resizing - otherwise cv.resize will push values outside of [0,1] range
+    img = img.astype(np.float32)  # convert from uint8 to float32
+    img /= 255.0  # get to [0, 1] range
+    return img
+def getInitImage(content_img, style_img, device):
+    if yamlGet("initImage") == 'White Noise Image':
+        white_noise_img = np.random.uniform(
+            -90., 90., content_img.shape).astype(np.float32)
+        init_img = torch.from_numpy(white_noise_img).float().to(device)
+    elif yamlGet("initImage") == 'Gaussian Noise Image':
+        gaussian_noise_img = np.random.normal(loc=0,
+                                              scale=90.,
+                                              size=content_img.shape).astype(
+                                                  np.float32)
+        init_img = torch.from_numpy(gaussian_noise_img).float().to(device)
+    elif yamlGet("initImage") == 'Content':
+        init_img = content_img
+    else:
+        # init image has same dimension as content image - this is a hard constraint
+        # feature maps need to be of same size for content image and init image
+        style_img_resized = prepare_img(style_img,
+                                        np.asarray(content_img.shape[2:]),
+                                        device)
+        init_img = style_img_resized
+    return init_img
+def prepare_img(img_path, target_shape, device):
+    img = load_image(img_path, target_shape=target_shape)
+    # normalize using ImageNet's mean
+    # [0, 255] range worked much better for me than [0, 1] range (even though PyTorch models were trained on latter)
+    transform = transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Lambda(lambda x: x.mul(255)),
+        transforms.Normalize(mean=IMAGENET_MEAN_255, std=IMAGENET_STD_NEUTRAL)
+    ])
+    img = transform(img).to(device).unsqueeze(0)
+    return img
+def save_image(img, img_path):
+    if len(img.shape) == 2:
+        img = np.stack((img, ) * 3, axis=-1)
+    cv.imwrite(img_path, img[:, :, ::-1]
+               )  # [:, :, ::-1] converts rgb into bgr (opencv contraint...)
+def save_optimizing_image(optimizing_img, dump_path, img_id):
+    img_format = (4, '.jpg')
+    saving_freq = yamlGet('reprSavFreq')
+    out_img = optimizing_img.squeeze(axis=0).to('cpu').detach().numpy()
+    out_img = np.moveaxis(
+        out_img, 0,
+        2)  # swap channel from 1st to 3rd position: ch, _, _ -> _, _, chr
+    if img_id == yamlGet('iterations') - 1 or \
+       (saving_freq > 0 and img_id % saving_freq == 0):
+        out_img_name = str(img_id).zfill(img_format[0]) + img_format[1] \
+            if saving_freq != -1 else None
+        dump_img = np.copy(out_img)
+        dump_img += np.array(IMAGENET_MEAN_255).reshape((1, 1, 3))
+        dump_img = np.clip(dump_img, 0, 255).astype('uint8')
+        cv.imwrite(os.path.join(dump_path, out_img_name), dump_img[:, :, ::-1])
+        print(f"{out_img_name} written to {dump_path}")
+    # if should_display:
+    #     plt.imshow(np.uint8(get_uint8_range(out_img)))
+    #     plt.show()
+def get_uint8_range(x):
+    if isinstance(x, np.ndarray):
+        x -= np.min(x)
+        x /= np.max(x)
+        x *= 255
+        return x
+    else:
+        raise ValueError(f'Expected numpy array got {type(x)}')
+def prepare_model(device):
+    model = yamlGet('model')
+    if model == 'VGG16':
+        model = Vgg16(requires_grad=False, show_progress=True)
+    elif model == 'VGG16-Experimental':
+        model = Vgg16Experimental(requires_grad=False, show_progress=True)
+    elif model == 'VGG19':
+        model = Vgg19(requires_grad=False, show_progress=True)
+    else:
+        raise ValueError(f'{model} not supported.')
+    content_feature_maps_index = model.content_feature_maps_index
+    style_feature_maps_indices = model.style_feature_maps_indices
+    layer_names = list(model.layer_names.keys())
+    content_fms_index_name = (content_feature_maps_index,
+                              layer_names[content_feature_maps_index])
+    style_fms_indices_names = (style_feature_maps_indices, layer_names)
+    return model.to(
+        device).eval(), content_fms_index_name, style_fms_indices_names
+def yamlSet(key, value):
+    with open('src/config.yaml', 'r') as f:
+        config = yaml.load(f, Loader=yaml.FullLoader)
+    config[key] = value
+    with open('src/config.yaml', 'w') as f:
+        yaml.dump(config, f, default_flow_style=False)
+def yamlGet(key):
+    with open('src/config.yaml', 'r') as f:
+        config = yaml.load(f, Loader=yaml.FullLoader)
+    return config[key]
+def save_numpy_array_as_jpg(array, name):
+    image = Image.fromarray(array)
+    image.save("src/data/" + str(name) + '.jpg')
+    return "src/data/" + str(name) + '.jpg'
+def gram_matrix(x, should_normalize=True):
+    (b, ch, h, w) = x.size()
+    features = x.view(b, ch, w * h)
+    features_t = features.transpose(1, 2)
+    gram = features.bmm(features_t)
+    if should_normalize:
+        gram /= ch * h * w
+    return gram
+def total_variation(y):
+    return
+def getImageAndPath(device):
+    if yamlGet('reconstruct') == 'Content':
+        img_path = yamlGet('contentPath')
+    elif yamlGet('reconstruct') == 'Style':
+        img_path = yamlGet('stylePath')
+    img = prepare_img(img_path, yamlGet('height'), device)
+    return img, img_path
+def getContentCurrentData(config):
+    current_representation = config.current_set_of_feature_maps[
+        config.content_feature_maps_index].squeeze(axis=0)
+    loss = torch.nn.MSELoss(reduction='mean')(config.target_representation,
+                                              current_representation)
+    return loss, current_representation
+def getStyleCurrentData(config):
+    current_representation = [
+        gram_matrix(x)
+        for cnt, x in enumerate(config.current_set_of_feature_maps)
+        if cnt in config.style_feature_maps_indices
+    ]
+    loss = 0.0
+    for gram_gt, gram_hat in zip(config.target_style_representation,
+                                 current_representation):
+        loss += torch.nn.MSELoss(reduction='sum')(gram_gt[0], gram_hat[0])
+    loss /= len(config.target_style_representation)
+    return loss, current_representation
+def getCurrentData(config):
+    if yamlGet('reconstruct') == 'Content':
+        return getContentCurrentData(config)
+    elif yamlGet('reconstruct') == 'Style':
+        return getStyleCurrentData(config)
+def getLBFGSReconstructLoss(config, optimizing_img):
+    loss = 0.0
+    if yamlGet('reconstruct') == 'Content':
+        loss = torch.nn.MSELoss(reduction='mean')(
+            config.target_content_representation,
+            config.neural_net(optimizing_img)[
+                config.content_feature_maps_index].squeeze(axis=0))
+    else:
+        config.current_set_of_feature_maps = config.neural_net(optimizing_img)
+        current_style_representation = [
+            gram_matrix(fmaps)
+            for i, fmaps in enumerate(config.current_set_of_feature_maps)
+            if i in config.style_feature_maps_indices
+        ]
+        for gram_gt, gram_hat in zip(config.target_style_representation,
+                                     current_style_representation):
+            loss += (1 / len(config.target_style_representation)) * \
+                torch.nn.MSELoss(reduction='sum')(gram_gt[0], gram_hat[0])
+    return loss
+class Config:
+    def __init__(self):
+        self.target_representation = 0
+        self.target_content_representation = 0
+        self.target_style_representation = 0
+        self.content_feature_maps_index = 0
+        self.style_feature_maps_indices = 0
+        self.current_set_of_feature_maps = 0
+        self.current_representation = 0
+        self.neural_net = 0
+class Images:
+    def getImages(self, device):
+        return [
+            self.__getContentImage(device),
+            self.__getStyleImage(device),
+            self.__getInitImage(device),
+        ]
+    def __getContentImage(self, device):
+        return prepare_img(yamlGet('contentPath'), yamlGet('height'), device)
+    def __getStyleImage(self, device):
+        return prepare_img(yamlGet('stylePath'), yamlGet('height'), device)
+    def __getInitImage(self, device):
+        return getInitImage(self.__getContentImage(device),
+                            self.__getStyleImage(device), device)
+def clearDir():
+    path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "data")
+    reconstructPath = os.path.join(path, "reconstruct")
+    transferPath = os.path.join(path, "transfer")
+    for transfer_file in os.scandir(transferPath):
+        os.remove(transfer_file)
+    for reconstruct_file in os.scandir(reconstructPath):
+        os.remove(reconstruct_file)

src/utils/video_utils.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import os
+import subprocess
+import shutil
+def create_video_from_intermediate_results(results_path):
+    #
+    # change this depending on what you want to accomplish (modify out video
+    # name, change fps and trim video)
+    #
+    img_format = (4, '.jpg')
+    out_file_name = 'out.mp4'
+    fps = 10
+    first_frame = 0
+    number_of_frames_to_process = len(os.listdir(results_path))
+    ffmpeg = 'ffmpeg'
+    if shutil.which(ffmpeg):  # if ffmpeg is in system path
+        # example: '%4d.png' for (4, '.png')
+        img_name_format = '%' + str(img_format[0]) + 'd' + img_format[1]
+        pattern = os.path.join(results_path, img_name_format)
+        out_video_path = os.path.join(results_path, out_file_name)
+        trim_video_command = [
+            '-start_number',
+            str(first_frame), '-vframes',
+            str(number_of_frames_to_process)
+        ]
+        input_options = ['-r', str(fps), '-i', pattern]
+        encoding_options = [
+            '-c:v', 'libx264', '-crf', '25', '-pix_fmt', 'yuv420p',
+            '-vf', "pad=ceil(iw/2)*2:ceil(ih/2)*2"
+        ]
+        subprocess.call([
+            ffmpeg, *input_options, *trim_video_command, *encoding_options,
+            out_video_path
+        ])
+    else:
+        print(f'{ffmpeg} not found in the system path, aborting.')