priyam314 commited on
Commit
cbcb207
1 Parent(s): bb603a9

first commit

Browse files
app.py ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import streamlit as st
3
+ import numpy as np
4
+ from src.utils import utils
5
+ import PIL.Image as Image
6
+ from src.reconstruct_image_from_representation import reconstruct_image_from_representation
7
+ from src.neural_style_transfer import neural_style_transfer
8
+
9
+ st.set_page_config(
10
+ page_title="Neural Style Transfer Video Generation of image reconstruction",
11
+ page_icon="\u2712",
12
+ layout="wide",
13
+ initial_sidebar_state="expanded",
14
+ )
15
+
16
+ st.header("Neural Style Transfer Video Generation")
17
+
18
+ # Sidebar
19
+ st.sidebar.header("Neural Style Transfer Video Generation")
20
+ with st.sidebar.expander('About the app'):
21
+ st.write("""
22
+ Use this application to play with the Neural Style Transfer
23
+ by generating video of optimizer
24
+ """)
25
+
26
+ # Reconstruct or Transfer
27
+ with st.sidebar.container():
28
+ st.sidebar.subheader("Reconstruct or Transfer")
29
+
30
+ Type = st.sidebar.selectbox("Do you want to reconstruct or transfer",
31
+ ["Reconstruct", "Transfer"])
32
+ utils.yamlSet('type', Type)
33
+
34
+ # Optimizer
35
+ with st.sidebar.container():
36
+ st.sidebar.subheader("Optimizer")
37
+
38
+ optimizer = st.sidebar.selectbox("Choose Optimizer", ["Adam", "LBFGS"])
39
+ utils.yamlSet('optimizer', optimizer)
40
+
41
+ iterations = st.sidebar.slider("Iterations", 10, 3000)
42
+ utils.yamlSet('iterations', iterations)
43
+
44
+ if optimizer == "Adam":
45
+ learning_rate = st.sidebar.slider("Learning Rate (100\u03BB)", 0.01,
46
+ 90.0)
47
+ utils.yamlSet('learning_rate', learning_rate)
48
+ st.sidebar.write("\u03BB = ", learning_rate / 100.0)
49
+
50
+ # Reconstruction
51
+ if Type == "Reconstruct":
52
+ with st.sidebar.container():
53
+ st.sidebar.subheader("Reconstruction")
54
+ reconstruct = st.sidebar.selectbox("Reconstruct which image",
55
+ ('Content', 'Style'))
56
+ utils.yamlSet('reconstruct', reconstruct)
57
+
58
+ # Visualization
59
+ with st.sidebar.container():
60
+ st.sidebar.subheader("Visualization")
61
+ visualize = st.sidebar.selectbox(
62
+ "Do you want to visualize feature maps of reconstruct images",
63
+ ("Yes", "No"))
64
+ utils.yamlSet('visualize', visualize)
65
+
66
+ # Model
67
+ with st.sidebar.container():
68
+ st.sidebar.subheader("Model")
69
+ model = st.sidebar.selectbox("Choose Model",
70
+ ("VGG16", "VGG16-Experimental"))
71
+ utils.yamlSet('model', model)
72
+
73
+ # # use layer
74
+ # if model == "VGG19":
75
+ # with st.sidebar.container():
76
+ # st.sidebar.subheader("Layer Type")
77
+ # use = st.sidebar.selectbox("Which type of layer you want to use",
78
+ # ("convolution", "relu"))
79
+
80
+ # Init Image
81
+ if Type == "Transfer":
82
+ with st.sidebar.container():
83
+ st.sidebar.subheader("Init Image")
84
+ initImage = st.sidebar.selectbox(
85
+ "Init Image",
86
+ ('Gaussian Noise Image', 'White Noise Image', 'Content', 'Style'))
87
+ utils.yamlSet('initImage', initImage)
88
+
89
+ # Content Layer
90
+ with st.sidebar.container():
91
+ st.sidebar.subheader("Content Layer")
92
+ if model == "VGG16-Experimental":
93
+ contentLayer = st.sidebar.selectbox(
94
+ "Content Layer", ('relu1_1', 'relu2_1', 'relu2_2', 'relu3_1',
95
+ 'relu3_2', 'relu4_1', 'relu4_3', 'relu5_1'))
96
+ elif model == "VGG16":
97
+ contentLayer = st.sidebar.selectbox(
98
+ "Content Layer", ('relu1_2', 'relu2_2', 'relu3_3', 'relu4_3'))
99
+ utils.yamlSet('contentLayer', contentLayer)
100
+ # elif model == "VGG19" and use == "relu":
101
+ # st.sidebar.selectbox("Content Layer",
102
+ # ('relu1_1', 'relu2_1', 'relu3_1', 'relu4_1', 'relu5_1'))
103
+ # elif model == "VGG19" and use == "convolution":
104
+ # st.sidebar.selectbox("Content Layer",
105
+ # ('conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv4_2',
106
+ # 'conv5_1'))
107
+
108
+ # Height
109
+ with st.sidebar.container():
110
+ st.sidebar.subheader("Height")
111
+ height = st.sidebar.slider("Height", 100, 6000, 400)
112
+ utils.yamlSet('height', height)
113
+
114
+ # Representation saving frequency
115
+ with st.sidebar.container():
116
+ st.sidebar.subheader("Representation Saving Frequency")
117
+ reprSavFreq = st.sidebar.slider(
118
+ "After how many iterations you want to save representation for "
119
+ "video generation", 1, 100)
120
+ utils.yamlSet('reprSavFreq', reprSavFreq)
121
+
122
+ if Type == "Transfer":
123
+ # Content Weight
124
+ col1, col2 = st.columns([0.85, 0.15])
125
+ with col1:
126
+ contentWeight = st.slider("Content Weight (1000\u03B1)", 0.01, 1000.0)
127
+ utils.yamlSet('contentWeight', contentWeight)
128
+
129
+ with col2:
130
+ st.write("\u03B1 = ", contentWeight / 1000.0)
131
+
132
+ # Style Weight
133
+ col1, col2 = st.columns([0.85, 0.15])
134
+ with col1:
135
+ styleWeight = st.slider("Style Weight (1000\u03B2)", 0.01, 1000.0)
136
+ utils.yamlSet('styleWeight', styleWeight)
137
+
138
+ with col2:
139
+ st.write("\u03B2 = ", styleWeight / 1000.0)
140
+
141
+ # Total Variation Weight
142
+ col1, col2 = st.columns([0.85, 0.15])
143
+ with col1:
144
+ totalVariationWeight = st.slider("Total Variation Weight (1000\u03B3)",
145
+ 0.01, 1000.0)
146
+ utils.yamlSet('totalVariationWeight', totalVariationWeight)
147
+
148
+ with col2:
149
+ st.write("\u03B3 = ", totalVariationWeight / 1000.0)
150
+
151
+ # File upload
152
+ col1, col2 = st.columns([0.5, 0.5])
153
+ with col1:
154
+ contentImage = st.file_uploader('Choose Content Image', type=['jpg'])
155
+ if contentImage:
156
+ st.image(contentImage)
157
+ contentNumpy = np.asarray(
158
+ Image.open(io.BytesIO(contentImage.getvalue())))
159
+ contentPath = utils.save_numpy_array_as_jpg(contentNumpy, "content")
160
+ utils.yamlSet('contentPath', contentPath)
161
+
162
+ with col2:
163
+ styleImage = st.file_uploader('Choose Style Image', type=['jpg'])
164
+ if styleImage:
165
+ st.image(styleImage)
166
+ styleNumpy = np.asarray(Image.open(io.BytesIO(styleImage.getvalue())))
167
+ stylePath = utils.save_numpy_array_as_jpg(styleNumpy, "style")
168
+ utils.yamlSet("stylePath", stylePath)
169
+
170
+ submit = st.button("Submit")
171
+
172
+ if submit:
173
+ utils.clearDir()
174
+ if Type == "Reconstruct":
175
+ reconstruct_image_from_representation()
176
+ elif Type == "Transfer":
177
+ neural_style_transfer()
178
+ video_file = open("src/data/transfer/out.mp4", "rb")
179
+ video_bytes = video_file.read()
180
+ st.video(video_bytes)
makefile ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ format:
2
+ yapf -i --recursive src/
3
+ yapf -i app.py
requirements.txt ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ altair==4.2.2
2
+ attrs==22.2.0
3
+ blinker==1.5
4
+ cachetools==5.3.0
5
+ certifi==2022.12.7
6
+ charset-normalizer==3.0.1
7
+ click==8.1.3
8
+ decorator==5.1.1
9
+ entrypoints==0.4
10
+ gitdb==4.0.10
11
+ GitPython==3.1.31
12
+ idna==3.4
13
+ importlib-metadata==6.0.0
14
+ Jinja2==3.1.2
15
+ jsonschema==4.17.3
16
+ markdown-it-py==2.2.0
17
+ MarkupSafe==2.1.2
18
+ mdurl==0.1.2
19
+ numpy==1.24.2
20
+ nvidia-cublas-cu11==11.10.3.66
21
+ nvidia-cuda-nvrtc-cu11==11.7.99
22
+ nvidia-cuda-runtime-cu11==11.7.99
23
+ nvidia-cudnn-cu11==8.5.0.96
24
+ opencv-python==4.7.0.72
25
+ packaging==23.0
26
+ pandas==1.5.3
27
+ Pillow==9.4.0
28
+ protobuf==3.20.3
29
+ pyarrow==11.0.0
30
+ pydeck==0.8.0
31
+ Pygments==2.14.0
32
+ Pympler==1.0.1
33
+ pyrsistent==0.19.3
34
+ python-dateutil==2.8.2
35
+ pytz==2022.7.1
36
+ pytz-deprecation-shim==0.1.0.post0
37
+ PyYAML==6.0
38
+ requests==2.28.2
39
+ rich==13.3.1
40
+ semver==2.13.0
41
+ six==1.16.0
42
+ smmap==5.0.0
43
+ streamlit==1.19.0
44
+ toml==0.10.2
45
+ toolz==0.12.0
46
+ torch==1.13.1
47
+ torchaudio==0.13.1
48
+ torchvision==0.14.1
49
+ tornado==6.2
50
+ typing_extensions==4.5.0
51
+ tzdata==2022.7
52
+ tzlocal==4.2
53
+ urllib3==1.26.14
54
+ validators==0.20.0
55
+ watchdog==2.3.0
56
+ yapf==0.32.0
57
+ zipp==3.15.0
src/README.md ADDED
@@ -0,0 +1,239 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Neural Style Transfer (optimization method) :computer: + :art: = :heart:
2
+ This repo contains a concise PyTorch implementation of the original NST paper (:link: [Gatys et al.](https://www.cv-foundation.org/openaccess/content_cvpr_2016/papers/Gatys_Image_Style_Transfer_CVPR_2016_paper.pdf)).
3
+
4
+ It's an accompanying repository for [this video series on YouTube](https://www.youtube.com/watch?v=S78LQebx6jo&list=PLBoQnSflObcmbfshq9oNs41vODgXG-608).
5
+
6
+ <p align="left">
7
+ <a href="https://www.youtube.com/watch?v=S78LQebx6jo" target="_blank"><img src="https://img.youtube.com/vi/S78LQebx6jo/0.jpg"
8
+ alt="NST Intro" width="480" height="360" border="10" /></a>
9
+ </p>
10
+
11
+ ### What is NST algorithm?
12
+ The algorithm transfers style from one input image (the style image) onto another input image (the content image) using CNN nets (usually VGG-16/19) and gives a composite, stylized image out which keeps the content from the content image but takes the style from the style image.
13
+
14
+ <p align="center">
15
+ <img src="data/examples/bridge/green_bridge_vg_la_cafe_o_lbfgs_i_content_h_500_m_vgg19_cw_100000.0_sw_30000.0_tv_1.0.jpg" width="570"/>
16
+ <img src="data/examples/bridge/content_style.jpg" width="260"/>
17
+ </p>
18
+
19
+ ### Why yet another NST repo?
20
+ It's the **cleanest and most concise** NST repo that I know of + it's written in **PyTorch!** :heart:
21
+
22
+ Most of NST repos were written in TensorFlow (before it even had L-BFGS optimizer) and torch (obsolete framework, used Lua) and are overly complicated often times including multiple functionalities (video, static image, color transfer, etc.) in 1 repo and exposing 100 parameters over command-line (out of which maybe 5 or 6 may actually be used on a regular basis).
23
+
24
+ ## Examples
25
+
26
+ Transfering style gives beautiful artistic results:
27
+
28
+ <p align="center">
29
+ <img src="data/examples/bridge/green_bridge_vg_starry_night_o_lbfgs_i_content_h_500_m_vgg19_cw_100000.0_sw_30000.0_tv_1.0_resized.jpg" width="270px">
30
+ <img src="data/examples/bridge/green_bridge_edtaonisl_o_lbfgs_i_content_h_500_m_vgg19_cw_100000.0_sw_30000.0_tv_1.0_resized.jpg" width="270px">
31
+ <img src="data/examples/bridge/green_bridge_wave_crop_o_lbfgs_i_content_h_500_m_vgg19_cw_100000.0_sw_30000.0_tv_1.0_resized.jpg" width="270px">
32
+
33
+ <img src="data/examples/lion/lion_candy_o_lbfgs_i_content_h_500_m_vgg19_cw_100000.0_sw_30000.0_tv_1.0_resized.jpg" width="270px">
34
+ <img src="data/examples/lion/lion_edtaonisl_o_lbfgs_i_content_h_500_m_vgg19_cw_100000.0_sw_30000.0_tv_1.0_resized.jpg" width="270px">
35
+ <img src="data/examples/lion/lion_vg_la_cafe_o_lbfgs_i_content_h_500_m_vgg19_cw_100000.0_sw_30000.0_tv_1.0_resized.jpg" width="270px">
36
+ </p>
37
+
38
+ And here are some results coupled with their style:
39
+
40
+ <p align="center">
41
+ <img src="data/examples/figures/figures_ben_giles_o_lbfgs_i_content_h_500_m_vgg19_cw_100000.0_sw_30000.0_tv_1.0_resized.jpg" width="400px">
42
+ <img src="data/style-images/ben_giles.jpg" width="267px">
43
+
44
+ <img src="data/examples/figures/figures_wave_crop_o_lbfgs_i_content_h_500_m_vgg19_cw_100000.0_sw_30000.0_tv_1.0_resized.jpg" width="400px">
45
+ <img src="data/style-images/wave_crop.jpg" width="267px">
46
+
47
+ <img src="data/examples/figures/figures_vg_wheat_field_w_350_m_vgg19_cw_100000.0_sw_300000.0_tv_1.0_resized.jpg" width="400px">
48
+ <img src="data/style-images/vg_wheat_field_cropped.jpg" width="267px">
49
+
50
+ <img src="data/examples/figures/figures_vg_starry_night_w_350_m_vgg19_cw_100000.0_sw_30000.0_tv_1.0_resized.jpg" width="400px">
51
+ <img src="data/style-images/vg_starry_night_resized.jpg" width="267px">
52
+ </p>
53
+
54
+ *Note: all of the stylized images were produced by me (using this repo), credits for original image artists [are given bellow](#acknowledgements).*
55
+
56
+ ### Content/Style tradeoff
57
+
58
+ Changing style weight gives you less or more style on the final image, assuming you keep the content weight constant. <br/>
59
+ I did increments of 10 here for style weight (1e1, 1e2, 1e3, 1e4), while keeping content weight at constant 1e5, and I used random image as initialization image.
60
+
61
+ <p align="center">
62
+ <img src="data/examples/style-tradeoff/figures_vg_starry_night_o_lbfgs_i_random_h_352_m_vgg19_cw_100000.0_sw_10.0_tv_1.0_resized.jpg" width="200px">
63
+ <img src="data/examples/style-tradeoff/figures_vg_starry_night_o_lbfgs_i_random_h_352_m_vgg19_cw_100000.0_sw_100.0_tv_1.0_resized.jpg" width="200px">
64
+ <img src="data/examples/style-tradeoff/figures_vg_starry_night_o_lbfgs_i_random_h_352_m_vgg19_cw_100000.0_sw_1000.0_tv_1.0_resized.jpg" width="200px">
65
+ <img src="data/examples/style-tradeoff/figures_vg_starry_night_o_lbfgs_i_random_h_352_m_vgg19_cw_100000.0_sw_10000.0_tv_1.0_resized.jpg" width="200px">
66
+ </p>
67
+
68
+ ### Impact of total variation (tv) loss
69
+
70
+ Rarely explained, the total variation loss i.e. it's corresponding weight controls the smoothness of the image. <br/>
71
+ I also did increments of 10 here (1e1, 1e4, 1e5, 1e6) and I used content image as initialization image.
72
+
73
+ <p align="center">
74
+ <img src="data/examples/tv-tradeoff/figures_candy_o_lbfgs_i_content_h_350_m_vgg19_cw_100000.0_sw_30000.0_tv_10.0_resized.jpg" width="200px">
75
+ <img src="data/examples/tv-tradeoff/figures_candy_o_lbfgs_i_content_h_350_m_vgg19_cw_100000.0_sw_30000.0_tv_10000.0_resized.jpg" width="200px">
76
+ <img src="data/examples/tv-tradeoff/figures_candy_o_lbfgs_i_content_h_350_m_vgg19_cw_100000.0_sw_30000.0_tv_100000.0_resized.jpg" width="200px">
77
+ <img src="data/examples/tv-tradeoff/figures_candy_o_lbfgs_i_content_h_350_m_vgg19_cw_100000.0_sw_30000.0_tv_1000000.0_resized.jpg" width="200px">
78
+ </p>
79
+
80
+ ### Optimization initialization
81
+
82
+ Starting with different initialization images: noise (white or gaussian), content and style leads to different results. <br/>
83
+ Empirically content image gives the best results as explored in [this research paper](https://arxiv.org/pdf/1602.07188.pdf) also. <br/>
84
+ Here you can see results for content, random and style initialization in that order (left to right):
85
+
86
+ <p align="center">
87
+ <img src="data/examples/init_methods/golden_gate_vg_la_cafe_o_lbfgs_i_content_h_500_m_vgg19_cw_100000.0_sw_30000.0_tv_1.0_resized.jpg" width="270px">
88
+ <img src="data/examples/init_methods/golden_gate_vg_la_cafe_o_lbfgs_i_random_h_500_m_vgg19_cw_100000.0_sw_1000.0_tv_1.0_resized.jpg" width="270px">
89
+ <img src="data/examples/init_methods/golden_gate_vg_la_cafe_o_lbfgs_i_style_h_500_m_vgg19_cw_100000.0_sw_10.0_tv_0.1_resized.jpg" width="270px">
90
+ </p>
91
+
92
+ You can also see that with style initialization we had some content from the artwork leaking directly into our output.
93
+
94
+ ### Famous "Figure 3" reconstruction
95
+
96
+ Finally if I haven't included this portion you couldn't say that I've successfully reproduced the [original paper]((https://www.cv-foundation.org/openaccess/content_cvpr_2016/papers/Gatys_Image_Style_Transfer_CVPR_2016_paper.pdf)) (laughs in Python):
97
+
98
+ <p align="center">
99
+ <img src="data/examples/gatys_reconstruction/tubingen.jpg" width="300px">
100
+ <img src="data/examples/gatys_reconstruction/tubingen_shipwreck_o_lbfgs_i_random_h_400_m_vgg19_cw_100000.0_sw_200.0_tv_1.0_resized.jpg" width="300px">
101
+ <img src="data/examples/gatys_reconstruction/tubingen_starry-night_o_lbfgs_i_content_h_400_m_vgg19_cw_100000.0_sw_30000.0_tv_1.0.jpg" width="300px">
102
+
103
+ <img src="data/examples/gatys_reconstruction/tubingen_the_scream_o_lbfgs_i_random_h_400_m_vgg19_cw_100000.0_sw_300.0_tv_1.0.jpg" width="300px">
104
+ <img src="data/examples/gatys_reconstruction/tubingen_seated-nude_o_lbfgs_i_random_h_400_m_vgg19_cw_100000.0_sw_2000.0_tv_1.0.jpg" width="300px">
105
+ <img src="data/examples/gatys_reconstruction/tubingen_kandinsky_o_lbfgs_i_content_h_400_m_vgg19_cw_100000.0_sw_30000.0_tv_1.0.jpg" width="300px">
106
+ </p>
107
+
108
+ I haven't give it much effort results can be much nicer.
109
+
110
+ ### Content reconstruction
111
+
112
+ If we only use the content (perceptual) loss and try to minimize that objective function this is what we get (starting from noise):
113
+
114
+ <p align="center">
115
+ <img src="data/examples/content_reconstruction/0000.jpg" width="200px">
116
+ <img src="data/examples/content_reconstruction/0026.jpg" width="200px">
117
+ <img src="data/examples/content_reconstruction/0070.jpg" width="200px">
118
+ <img src="data/examples/content_reconstruction/0509.jpg" width="200px">
119
+ </p>
120
+
121
+ In steps 0, 26, 70 and 509 of the L-BFGS numerical optimizer, using layer relu3_1 for content representation.<br/>
122
+ Check-out [this section](#reconstruct-image-from-representation) if you want to play with this.
123
+
124
+ ### Style reconstruction
125
+
126
+ We can do the same thing for style (on the left is the original art image "Candy") starting from noise:
127
+
128
+ <p align="center">
129
+ <img src="data/examples/style_reconstruction/candy.jpg" width="200px">
130
+ <img src="data/examples/style_reconstruction/0045.jpg" width="200px">
131
+ <img src="data/examples/style_reconstruction/0129.jpg" width="200px">
132
+ <img src="data/examples/style_reconstruction/0510.jpg" width="200px">
133
+ </p>
134
+
135
+ In steps 45, 129 and 510 of the L-BFGS using layers relu1_1, relu2_1, relu3_1, relu4_1 and relu5_1 for style representation.
136
+
137
+ ## Setup
138
+
139
+ 1. Open Anaconda Prompt and navigate into project directory `cd path_to_repo`
140
+ 2. Run `conda env create` (while in project directory)
141
+ 3. Run `activate pytorch-nst`
142
+
143
+ That's it! It should work out-of-the-box executing environment.yml file which deals with dependencies.
144
+
145
+ -----
146
+
147
+ PyTorch package will pull some version of CUDA with it, but it is highly recommended that you install system-wide CUDA beforehand, mostly because of GPU drivers. I also recommend using Miniconda installer as a way to get conda on your system.
148
+
149
+ Follow through points 1 and 2 of [this setup](https://github.com/Petlja/PSIML/blob/master/docs/MachineSetup.md) and use the most up-to-date versions of Miniconda (Python 3.7) and CUDA/cuDNN.
150
+ (I recommend CUDA 10.1 as it is compatible with PyTorch 1.4, which is used in this repo, and newest compatible cuDNN)
151
+
152
+ ## Usage
153
+
154
+ 1. Copy content images to the default content image directory: `/data/content-images/`
155
+ 2. Copy style images to the default style image directory: `/data/style-images/`
156
+ 3. Run `python neural_style_transfer.py --content_img_name <content-img-name> --style_img_name <style-img-name>`
157
+
158
+ It's that easy. For more advanced usage take a look at the code it's (hopefully) self-explanatory (if you speak Python ^^).
159
+
160
+ Or take a look at [this accompanying YouTube video](https://www.youtube.com/watch?v=XWMwdkaLFsI), it explains how to use this repo in greater detail.
161
+
162
+ Just run it! So that you can get something like this: :heart:
163
+ <p align="center">
164
+ <img src="data/examples/taj_mahal/taj_mahal_ben_giles_o_lbfgs_i_content_h_500_m_vgg19_cw_100000.0_sw_30000.0_tv_1.0.jpg" width="615px">
165
+ </p>
166
+
167
+ ### Debugging/Experimenting
168
+
169
+ Q: L-BFGS can't run on my computer it takes too much GPU VRAM?<br/>
170
+ A: Set Adam as your default and take a look at the code for initial style/content/tv weights you should use as a start point.
171
+
172
+ Q: Output image looks too much like style image?<br/>
173
+ A: Decrease style weight or take a look at the table of weights (in neural_style_transfer.py), which I've included, that works.
174
+
175
+ Q: There is too much noise (image is not smooth)?<br/>
176
+ A: Increase total variation (tv) weight (usually by multiples of 10, again the table is your friend here or just experiment yourself).
177
+
178
+ ### Reconstruct image from representation
179
+
180
+ I've also included a file that will help you better understand how the algorithm works and what the neural net sees.<br/>
181
+ What it does is that it allows you to visualize content **(feature maps)** and style representations **(Gram matrices)**.<br/>
182
+ It will also reconstruct either only style or content using those representations and corresponding model that produces them. <br/>
183
+
184
+ Just run this:<br/>
185
+ `reconstruct_image_from_representation.py --should_reconstruct_content <Bool> --should_visualize_representation <Bool>`
186
+ <br/><br/>
187
+ And that's it! --should_visualize_representation if set to True will visualize these for you<br/>
188
+ --should_reconstruct_content picks between style and content reconstruction
189
+
190
+ Here are some feature maps (relu1_1, VGG 19) as well as a Gram matrix (relu2_1, VGG 19) for Van Gogh's famous [starry night](https://en.wikipedia.org/wiki/The_Starry_Night):
191
+
192
+ <p align="center">
193
+ <img src="data/examples/fms_gram/fm_vgg19_relu1_1_0005_resized.jpg" width="200px">
194
+ <img src="data/examples/fms_gram/fm_vgg19_relu1_1_0046_resized.jpg" width="200px">
195
+ <img src="data/examples/fms_gram/fm_vgg19_relu1_1_0058_resized.jpg" width="200px">
196
+ <img src="data/examples/fms_gram/gram_vgg19_relu2_1_0001.jpg" width="200px">
197
+ </p>
198
+
199
+ No more dark magic.
200
+
201
+ ## Acknowledgements
202
+
203
+ I found these repos useful: (while developing this one)
204
+ * [fast_neural_style](https://github.com/pytorch/examples/tree/master/fast_neural_style) (PyTorch, feed-forward method)
205
+ * [neural-style-tf](https://github.com/cysmith/neural-style-tf/) (TensorFlow, optimization method)
206
+ * [neural-style](https://github.com/anishathalye/neural-style/) (TensorFlow, optimization method)
207
+
208
+ I found some of the content/style images I was using here:
209
+ * [style/artistic images](https://www.rawpixel.com/board/537381/vincent-van-gogh-free-original-public-domain-paintings?sort=curated&mode=shop&page=1)
210
+ * [awesome figures pic](https://www.pexels.com/photo/action-android-device-electronics-595804/)
211
+ * [awesome bridge pic](https://www.pexels.com/photo/gray-bridge-and-trees-814499/)
212
+
213
+ Other images are now already classics in the NST world.
214
+
215
+ ## Citation
216
+
217
+ If you find this code useful for your research, please cite the following:
218
+
219
+ ```
220
+ @misc{Gordić2020nst,
221
+ author = {Gordić, Aleksa},
222
+ title = {pytorch-neural-style-transfer},
223
+ year = {2020},
224
+ publisher = {GitHub},
225
+ journal = {GitHub repository},
226
+ howpublished = {\url{https://github.com/gordicaleksa/pytorch-neural-style-transfer}},
227
+ }
228
+ ```
229
+
230
+ ## Connect with me
231
+
232
+ If you'd love to have some more AI-related content in your life :nerd_face:, consider:
233
+ * Subscribing to my YouTube channel [The AI Epiphany](https://www.youtube.com/c/TheAiEpiphany) :bell:
234
+ * Follow me on [LinkedIn](https://www.linkedin.com/in/aleksagordic/) and [Twitter](https://twitter.com/gordic_aleksa) :bulb:
235
+ * Follow me on [Medium](https://gordicaleksa.medium.com/) :books: :heart:
236
+
237
+ ## Licence
238
+
239
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://github.com/gordicaleksa/pytorch-neural-style-transfer/blob/master/LICENCE)
src/__pycache__/neural_style_transfer.cpython-310.pyc ADDED
Binary file (5.46 kB). View file
 
src/__pycache__/reconstruct_image_from_representation.cpython-310.pyc ADDED
Binary file (3.34 kB). View file
 
src/commands.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from abc import ABC, abstractmethod
2
+ import numpy as np
3
+ import utils
4
+ import torch
5
+
6
+
7
+ class Tuning(ABC):
8
+
9
+ @abstractmethod
10
+ def Image(self, image):
11
+ pass
12
+
13
+
14
+ class TuningReconstruction(Tuning):
15
+
16
+ def __init__(self, model, optimizer, target_representation,
17
+ content_feature_maps_index, style_feature_maps_indices):
18
+
19
+ self.model = model
20
+ self.optimizer = optimizer
21
+ self.target_representation = target_representation
22
+ self.content_feature_maps_index = content_feature_maps_index
23
+ self.style_feature_maps_indices = style_feature_maps_indices
24
+
25
+ def Image(self, image):
26
+
27
+ # Finds the current representation
28
+ set_of_feature_maps = self.model(image)
29
+ if utils.yamlGet('reconstruct') == 'Content':
30
+ current_representation = set_of_feature_maps[
31
+ self.content_feature_maps_index].squeeze(axis=0)
32
+ elif utils.yamlGet('reconstruct') == 'Style':
33
+ current_representation = [
34
+ utils.gram_matrix(fmaps)
35
+ for i, fmaps in enumerate(set_of_feature_maps)
36
+ if i in self.style_feature_maps_indices
37
+ ]
38
+
39
+ loss = 0.0
40
+
41
+ if utils.yamlGet('reconstruct') == 'Content':
42
+ loss = torch.nn.MSELoss(reduction='mean')(
43
+ self.target_representation, current_representation)
44
+ elif utils.yamlGet('reconstruct') == 'Style':
45
+ for gram_gt, gram_hat in zip(self.target_representation,
46
+ current_representation):
47
+ loss += (1 / len(self.target_representation)) * \
48
+ torch.nn.MSELoss(
49
+ reduction='sum')(gram_gt[0], gram_hat[0])
50
+
51
+ loss.backward()
52
+ self.optimizer.step()
53
+ self.optimizer.zero_grad()
54
+ return loss.item(), current_representation
55
+
56
+
57
+ class Reconstruct(ABC):
58
+
59
+ @abstractmethod
60
+ def Visualize(self):
61
+ pass
62
+
63
+
64
+ class ContentReconstruct(Reconstruct):
65
+ """
66
+ tcr -> target_content_representation
67
+ """
68
+
69
+ def __init__(self, feature_maps):
70
+ self.fm = feature_maps
71
+ self.tcr = self.fm['set_of_feature_maps'][
72
+ self.fm['content_feature_maps_index_name'][0]].squeeze(axis=0)
73
+ self.nfm = self.tcr.size()[0]
74
+
75
+ def Visualize(self):
76
+ for i in range(self.nfm):
77
+ feature_map = self.tcr[i].to('cpu').numpy()
78
+ feature_map = np.uint8(utils.get_uint8_range(feature_map))
79
+ # plt.imshow(feature_map)
80
+ # plt.title(
81
+ # f'Feature map {i+1}/{num_of_feature_maps} from layer'
82
+ # f' {content_feature_maps_index_name[1]} '
83
+ # f'(model={config["model"]}) for'
84
+ # f' {config["content_img_name"]} image.'
85
+ # )
86
+ # plt.show()
87
+ filename = f'fm_{config["model"]}_{content_feature_maps_index_name[1]}_{str(i).zfill(config["img_format"][0])}{config["img_format"][1]}'
88
+ utils.save_image(feature_map, os.path.join(dump_path, filename))
89
+
90
+
91
+ class StyleReconstruct(Reconstruct):
92
+ pass
93
+
94
+
95
+ class Invoker:
96
+ pass
src/config.yaml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ contentLayer: relu4_3
2
+ contentPath: src/data/content.jpg
3
+ contentWeight: 475.71
4
+ height: 400
5
+ initImage: Content
6
+ iterations: 10
7
+ learning_rate: 0.01
8
+ model: VGG16
9
+ optimizer: Adam
10
+ reconstruct: Content
11
+ reprSavFreq: 1
12
+ stylePath: src/data/style.jpg
13
+ styleWeight: 307.7
14
+ totalVariationWeight: 854.25
15
+ type: Transfer
16
+ visualize: 'Yes'
src/data/content.jpg ADDED
src/data/style.jpg ADDED
src/data/transfer/0000.jpg ADDED
src/data/transfer/0001.jpg ADDED
src/data/transfer/0002.jpg ADDED
src/data/transfer/0003.jpg ADDED
src/data/transfer/0004.jpg ADDED
src/data/transfer/0005.jpg ADDED
src/data/transfer/0006.jpg ADDED
src/data/transfer/0007.jpg ADDED
src/data/transfer/0008.jpg ADDED
src/data/transfer/0009.jpg ADDED
src/data/transfer/out.mp4 ADDED
Binary file (20.1 kB). View file
 
src/environment.yml ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: pytorch-nst
2
+ channels:
3
+ - defaults
4
+ - pytorch
5
+ dependencies:
6
+ - python=3.7.6
7
+ - pip=20.0.2
8
+ - matplotlib=3.1.3
9
+ - pytorch==1.4.0
10
+ - torchvision=0.5.0
11
+ - pip:
12
+ - numpy==1.18.1
13
+ - opencv-python==4.2.0.32
src/models/definitions/__init__.py ADDED
File without changes
src/models/definitions/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (191 Bytes). View file
 
src/models/definitions/__pycache__/vgg_nets.cpython-310.pyc ADDED
Binary file (6.01 kB). View file
 
src/models/definitions/vgg_nets.py ADDED
@@ -0,0 +1,241 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections import namedtuple
2
+ import torch
3
+ from torchvision import models
4
+ from src.utils import utils
5
+ """
6
+ More detail about the VGG architecture (if you want to understand magic/hardcoded numbers) can be found here:
7
+
8
+ https://github.com/pytorch/vision/blob/3c254fb7af5f8af252c24e89949c54a3461ff0be/torchvision/models/vgg.py
9
+ """
10
+
11
+
12
+ class Vgg16(torch.nn.Module):
13
+ """Only those layers are exposed which have already proven to work nicely."""
14
+
15
+ def __init__(self, requires_grad=False, show_progress=False):
16
+ super().__init__()
17
+ vgg_pretrained_features = models.vgg16(pretrained=True,
18
+ progress=show_progress).features
19
+ self.layer_names = {'relu1_2': 1, 'relu2_2': 2,
20
+ 'relu3_3': 3, 'relu4_3': 4}
21
+ self.content_feature_maps_index = self.layer_names[
22
+ utils.yamlGet('contentLayer')]-1 # relu2_2
23
+ self.style_feature_maps_indices = list(range(len(
24
+ self.layer_names))) # all layers used for style representation
25
+
26
+ self.slice1 = torch.nn.Sequential()
27
+ self.slice2 = torch.nn.Sequential()
28
+ self.slice3 = torch.nn.Sequential()
29
+ self.slice4 = torch.nn.Sequential()
30
+ for x in range(4):
31
+ self.slice1.add_module(str(x), vgg_pretrained_features[x])
32
+ for x in range(4, 9):
33
+ self.slice2.add_module(str(x), vgg_pretrained_features[x])
34
+ for x in range(9, 16):
35
+ self.slice3.add_module(str(x), vgg_pretrained_features[x])
36
+ for x in range(16, 23):
37
+ self.slice4.add_module(str(x), vgg_pretrained_features[x])
38
+ if not requires_grad:
39
+ for param in self.parameters():
40
+ param.requires_grad = False
41
+
42
+ def forward(self, x):
43
+ x = self.slice1(x)
44
+ relu1_2 = x
45
+ x = self.slice2(x)
46
+ relu2_2 = x
47
+ x = self.slice3(x)
48
+ relu3_3 = x
49
+ x = self.slice4(x)
50
+ relu4_3 = x
51
+ vgg_outputs = namedtuple("VggOutputs", self.layer_names.keys())
52
+ out = vgg_outputs(relu1_2, relu2_2, relu3_3, relu4_3)
53
+ return out
54
+
55
+
56
+ class Vgg16Experimental(torch.nn.Module):
57
+ """Everything exposed so you can play with different combinations for style and content representation"""
58
+
59
+ def __init__(self, requires_grad=False, show_progress=False):
60
+ super().__init__()
61
+ vgg_pretrained_features = models.vgg16(pretrained=True,
62
+ progress=show_progress).features
63
+ self.layer_names = [
64
+ 'relu1_1', 'relu2_1', 'relu2_2', 'relu3_1', 'relu3_2', 'relu4_1',
65
+ 'relu4_3', 'relu5_1'
66
+ ]
67
+ self.content_feature_maps_index = 4
68
+ self.style_feature_maps_indices = list(range(len(
69
+ self.layer_names))) # all layers used for style representation
70
+
71
+ self.conv1_1 = vgg_pretrained_features[0]
72
+ self.relu1_1 = vgg_pretrained_features[1]
73
+ self.conv1_2 = vgg_pretrained_features[2]
74
+ self.relu1_2 = vgg_pretrained_features[3]
75
+ self.max_pooling1 = vgg_pretrained_features[4]
76
+ self.conv2_1 = vgg_pretrained_features[5]
77
+ self.relu2_1 = vgg_pretrained_features[6]
78
+ self.conv2_2 = vgg_pretrained_features[7]
79
+ self.relu2_2 = vgg_pretrained_features[8]
80
+ self.max_pooling2 = vgg_pretrained_features[9]
81
+ self.conv3_1 = vgg_pretrained_features[10]
82
+ self.relu3_1 = vgg_pretrained_features[11]
83
+ self.conv3_2 = vgg_pretrained_features[12]
84
+ self.relu3_2 = vgg_pretrained_features[13]
85
+ self.conv3_3 = vgg_pretrained_features[14]
86
+ self.relu3_3 = vgg_pretrained_features[15]
87
+ self.max_pooling3 = vgg_pretrained_features[16]
88
+ self.conv4_1 = vgg_pretrained_features[17]
89
+ self.relu4_1 = vgg_pretrained_features[18]
90
+ self.conv4_2 = vgg_pretrained_features[19]
91
+ self.relu4_2 = vgg_pretrained_features[20]
92
+ self.conv4_3 = vgg_pretrained_features[21]
93
+ self.relu4_3 = vgg_pretrained_features[22]
94
+ self.max_pooling4 = vgg_pretrained_features[23]
95
+ self.conv5_1 = vgg_pretrained_features[24]
96
+ self.relu5_1 = vgg_pretrained_features[25]
97
+ self.conv5_2 = vgg_pretrained_features[26]
98
+ self.relu5_2 = vgg_pretrained_features[27]
99
+ self.conv5_3 = vgg_pretrained_features[28]
100
+ self.relu5_3 = vgg_pretrained_features[29]
101
+ self.max_pooling5 = vgg_pretrained_features[30]
102
+ if not requires_grad:
103
+ for param in self.parameters():
104
+ param.requires_grad = False
105
+
106
+ def forward(self, x):
107
+ x = self.conv1_1(x)
108
+ conv1_1 = x
109
+ x = self.relu1_1(x)
110
+ relu1_1 = x
111
+ x = self.conv1_2(x)
112
+ conv1_2 = x
113
+ x = self.relu1_2(x)
114
+ relu1_2 = x
115
+ x = self.max_pooling1(x)
116
+ x = self.conv2_1(x)
117
+ conv2_1 = x
118
+ x = self.relu2_1(x)
119
+ relu2_1 = x
120
+ x = self.conv2_2(x)
121
+ conv2_2 = x
122
+ x = self.relu2_2(x)
123
+ relu2_2 = x
124
+ x = self.max_pooling2(x)
125
+ x = self.conv3_1(x)
126
+ conv3_1 = x
127
+ x = self.relu3_1(x)
128
+ relu3_1 = x
129
+ x = self.conv3_2(x)
130
+ conv3_2 = x
131
+ x = self.relu3_2(x)
132
+ relu3_2 = x
133
+ x = self.conv3_3(x)
134
+ conv3_3 = x
135
+ x = self.relu3_3(x)
136
+ relu3_3 = x
137
+ x = self.max_pooling3(x)
138
+ x = self.conv4_1(x)
139
+ conv4_1 = x
140
+ x = self.relu4_1(x)
141
+ relu4_1 = x
142
+ x = self.conv4_2(x)
143
+ conv4_2 = x
144
+ x = self.relu4_2(x)
145
+ relu4_2 = x
146
+ x = self.conv4_3(x)
147
+ conv4_3 = x
148
+ x = self.relu4_3(x)
149
+ relu4_3 = x
150
+ x = self.max_pooling4(x)
151
+ x = self.conv5_1(x)
152
+ conv5_1 = x
153
+ x = self.relu5_1(x)
154
+ relu5_1 = x
155
+ x = self.conv5_2(x)
156
+ conv5_2 = x
157
+ x = self.relu5_2(x)
158
+ relu5_2 = x
159
+ x = self.conv5_3(x)
160
+ conv5_3 = x
161
+ x = self.relu5_3(x)
162
+ relu5_3 = x
163
+ x = self.max_pooling5(x)
164
+ # expose only the layers that you want to experiment with here
165
+ vgg_outputs = namedtuple("VggOutputs", self.layer_names)
166
+ out = vgg_outputs(relu1_1, relu2_1, relu2_2, relu3_1, relu3_2, relu4_1,
167
+ relu4_3, relu5_1)
168
+
169
+ return out
170
+
171
+
172
+ class Vgg19(torch.nn.Module):
173
+ """
174
+ Used in the original NST paper, only those layers are exposed which were used in the original paper
175
+
176
+ 'conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1' were used for style representation
177
+ 'conv4_2' was used for content representation (although they did some experiments with conv2_2 and conv5_2)
178
+ """
179
+
180
+ def __init__(self,
181
+ requires_grad=False,
182
+ show_progress=False,
183
+ use_relu=True):
184
+ super().__init__()
185
+ vgg_pretrained_features = models.vgg19(pretrained=True,
186
+ progress=show_progress).features
187
+ if use_relu: # use relu or as in original paper conv layers
188
+ self.layer_names = [
189
+ 'relu1_1', 'relu2_1', 'relu3_1', 'relu4_1', 'relu5_1'
190
+ ]
191
+ self.offset = 1
192
+ else:
193
+ self.layer_names = [
194
+ 'conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv4_2',
195
+ 'conv5_1'
196
+ ]
197
+ self.offset = 0
198
+ self.content_feature_maps_index = 4 # conv4_2
199
+ # all layers used for style representation except conv4_2
200
+ self.style_feature_maps_indices = list(range(len(self.layer_names)))
201
+ self.style_feature_maps_indices.remove(4) # conv4_2
202
+
203
+ self.slice1 = torch.nn.Sequential()
204
+ self.slice2 = torch.nn.Sequential()
205
+ self.slice3 = torch.nn.Sequential()
206
+ self.slice4 = torch.nn.Sequential()
207
+ self.slice5 = torch.nn.Sequential()
208
+ self.slice6 = torch.nn.Sequential()
209
+ for x in range(1 + self.offset):
210
+ self.slice1.add_module(str(x), vgg_pretrained_features[x])
211
+ for x in range(1 + self.offset, 6 + self.offset):
212
+ self.slice2.add_module(str(x), vgg_pretrained_features[x])
213
+ for x in range(6 + self.offset, 11 + self.offset):
214
+ self.slice3.add_module(str(x), vgg_pretrained_features[x])
215
+ for x in range(11 + self.offset, 20 + self.offset):
216
+ self.slice4.add_module(str(x), vgg_pretrained_features[x])
217
+ for x in range(20 + self.offset, 22):
218
+ self.slice5.add_module(str(x), vgg_pretrained_features[x])
219
+ for x in range(22, 29 + +self.offset):
220
+ self.slice6.add_module(str(x), vgg_pretrained_features[x])
221
+ if not requires_grad:
222
+ for param in self.parameters():
223
+ param.requires_grad = False
224
+
225
+ def forward(self, x):
226
+ x = self.slice1(x)
227
+ layer1_1 = x
228
+ x = self.slice2(x)
229
+ layer2_1 = x
230
+ x = self.slice3(x)
231
+ layer3_1 = x
232
+ x = self.slice4(x)
233
+ layer4_1 = x
234
+ x = self.slice5(x)
235
+ conv4_2 = x
236
+ x = self.slice6(x)
237
+ layer5_1 = x
238
+ vgg_outputs = namedtuple("VggOutputs", self.layer_names)
239
+ out = vgg_outputs(layer1_1, layer2_1, layer3_1, layer4_1, conv4_2,
240
+ layer5_1)
241
+ return out
src/neural_style_transfer.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import src.utils.utils as utils
3
+ from src.utils.video_utils import create_video_from_intermediate_results
4
+ import torch
5
+ from torch import nn
6
+ from torch.optim import Adam, LBFGS
7
+ from torch.autograd import Variable
8
+
9
+
10
+ class ContentLoss(nn.Module):
11
+ def __init__(self, target):
12
+ super(ContentLoss, self).__init__()
13
+ self.target = target.detach()
14
+
15
+ def forward(self, current):
16
+ return nn.MSELoss(reduction='mean')(self.target, current)
17
+
18
+
19
+ class StyleLoss(nn.Module):
20
+ def __init__(self):
21
+ super(StyleLoss, self).__init__()
22
+ self.loss = 0.0
23
+
24
+ def forward(self, x, y):
25
+ for gram_gt, gram_hat in zip(x, y):
26
+ self.loss += torch.nn.MSELoss(reduction='sum')(gram_gt[0], gram_hat[0])
27
+ self.loss /= len(x)
28
+ return self.loss
29
+
30
+
31
+ class Build(nn.Module):
32
+ def __init__(
33
+ self,
34
+ config,
35
+ target_content_representation,
36
+ target_style_representation,
37
+ ):
38
+ super(Build, self).__init__()
39
+ self.current_set_of_feature_maps = None
40
+ self.current_content_representation = None
41
+ self.current_Style_representation = None
42
+ self.config = config
43
+ self.target_content_representation = target_content_representation
44
+ self.target_style_representation = target_style_representation
45
+
46
+ def forward(self, model, x):
47
+ self.current_set_of_feature_maps = model(x)
48
+
49
+ self.current_content_representation = self.current_set_of_feature_maps[
50
+ self.config.content_feature_maps_index].squeeze(axis=0)
51
+ self.current_style_representation = [
52
+ utils.gram_matrix(x)
53
+ for cnt, x in enumerate(self.current_set_of_feature_maps)
54
+ if cnt in self.config.style_feature_maps_indices
55
+ ]
56
+ content_loss = ContentLoss(self.target_content_representation)(
57
+ self.current_content_representation)
58
+ style_loss = StyleLoss()(
59
+ self.target_style_representation,
60
+ self.current_style_representation)
61
+ tv_loss = TotalVariationLoss(x)()
62
+
63
+ return Loss()(content_loss, style_loss, tv_loss)
64
+
65
+
66
+ class TotalVariationLoss(nn.Module):
67
+ def __init__(self, y):
68
+ super(TotalVariationLoss, self).__init__()
69
+ self.first = torch.sum(torch.abs(y[:, :, :, :-1] - y[:, :, :, 1:]))
70
+ self.second = torch.sum(torch.abs(y[:, :, :-1, :] - y[:, :, 1:, :]))
71
+
72
+ def forward(self):
73
+ return self.first + self.second
74
+
75
+
76
+ class Loss(nn.Module):
77
+ def __init__(self):
78
+ super(Loss, self).__init__()
79
+
80
+ def forward(self, x, y, z):
81
+ return utils.yamlGet("contentWeight") * x + utils.yamlGet("styleWeight") * y + utils.yamlGet("totalVariationWeight") * z
82
+
83
+
84
+ def neural_style_transfer():
85
+
86
+ dump_path = os.path.join(os.path.dirname(__file__), "data/transfer")
87
+ config = utils.Config()
88
+
89
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
90
+
91
+ content_img, style_img, init_img = utils.Images().getImages(device)
92
+ optimizing_img = Variable(init_img, requires_grad=True)
93
+
94
+ output = list(utils.prepare_model(device))
95
+ neural_net = output[0]
96
+ content_feature_maps_index_name = output[1]
97
+ style_feature_maps_indices_names = output[2]
98
+
99
+ config.content_feature_maps_index = content_feature_maps_index_name[0]
100
+ config.style_feature_maps_indices = style_feature_maps_indices_names[0]
101
+
102
+ content_img_set_of_feature_maps = neural_net(content_img)
103
+ style_img_set_of_feature_maps = neural_net(style_img)
104
+
105
+ target_content_representation = content_img_set_of_feature_maps[
106
+ config.content_feature_maps_index].squeeze(axis=0)
107
+ target_style_representation = [
108
+ utils.gram_matrix(x)
109
+ for cnt, x in enumerate(style_img_set_of_feature_maps)
110
+ if cnt in config.style_feature_maps_indices
111
+ ]
112
+
113
+ if utils.yamlGet('optimizer') == 'Adam':
114
+ optimizer = Adam((optimizing_img, ), lr=utils.yamlGet('learning_rate'))
115
+ for cnt in range(utils.yamlGet("iterations")):
116
+
117
+ total_loss = Build(config, target_content_representation,
118
+ target_style_representation)(neural_net,
119
+ optimizing_img)
120
+
121
+ total_loss.backward()
122
+ optimizer.step()
123
+ optimizer.zero_grad()
124
+ with torch.no_grad():
125
+ utils.save_optimizing_image(optimizing_img, dump_path, cnt)
126
+
127
+ elif utils.yamlGet('optimizer') == 'LBFGS':
128
+ optimizer = LBFGS((optimizing_img, ),
129
+ max_iter=utils.yamlGet('iterations'),
130
+ line_search_fn='strong_wolfe')
131
+
132
+ def closure():
133
+ total_loss, _, _, _ = build_loss(
134
+ neural_net, optimizing_img, target_content_representation,
135
+ target_style_representation, config)
136
+ total_loss.backward()
137
+ optimizer.zero_grad()
138
+ with torch.no_grad():
139
+ utils.save_optimizing_image(optimizing_img, dump_path, cnt)
140
+ return total_loss
141
+
142
+ for cnt in range(utils.yamlGet("iterations")):
143
+ optimizer.step(closure)
144
+
145
+ create_video_from_intermediate_results(dump_path)
146
+
147
+
148
+ # some values of weights that worked for figures.jpg, vg_starry_night.jpg
149
+ # (starting point for finding good images)
150
+ # once you understand what each one does it gets really easy -> also see
151
+ # README.md
152
+
153
+ # lbfgs, content init -> (cw, sw, tv) = (1e5, 3e4, 1e0)
154
+ # lbfgs, style init -> (cw, sw, tv) = (1e5, 1e1, 1e-1)
155
+ # lbfgs, random init -> (cw, sw, tv) = (1e5, 1e3, 1e0)
156
+
157
+ # adam, content init -> (cw, sw, tv, lr) = (1e5, 1e5, 1e-1, 1e1)
158
+ # adam, style init -> (cw, sw, tv, lr) = (1e5, 1e2, 1e-1, 1e1)
159
+ # adam, random init -> (cw, sw, tv, lr) = (1e5, 1e2, 1e-1, 1e1)
160
+
161
+ # original NST Neural Style Transfer) algorithm (Gatys et al.)
162
+ # results_path = neural_style_transfer()
163
+ # create_video_from_intermediate_results(results_path)
src/reconstruct_image_from_representation.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import src.utils.utils as utils
3
+ from src.utils.video_utils import create_video_from_intermediate_results
4
+ import torch
5
+ from torch.autograd import Variable
6
+ from torch.optim import Adam, LBFGS
7
+ import numpy as np
8
+
9
+
10
+ def make_tuning_step(optimizer, config):
11
+
12
+ def tuning_step(optimizing_img):
13
+
14
+ config.current_set_of_feature_maps = config.neural_net(optimizing_img)
15
+ loss, config.current_representation = utils.getCurrentData(config)
16
+ loss.backward()
17
+ optimizer.step()
18
+ optimizer.zero_grad()
19
+ return loss.item(), config.current_representation
20
+
21
+ return tuning_step
22
+
23
+
24
+ def reconstruct_image_from_representation():
25
+
26
+ dump_path = os.path.join(os.path.dirname(__file__), "data/reconstruct")
27
+ config = utils.Config()
28
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
29
+
30
+ img, img_path = utils.getImageAndPath(device)
31
+ white_noise_img = np.random.uniform(-90., 90.,
32
+ img.shape).astype(np.float32)
33
+ init_img = torch.from_numpy(white_noise_img).float().to(device)
34
+ optimizing_img = Variable(init_img, requires_grad=True)
35
+
36
+ # indices pick relevant feature maps (say conv4_1, relu1_1, etc.)
37
+ output = list(utils.prepare_model(device))
38
+ config.neural_net = output[0]
39
+ content_feature_maps_index_name = output[1]
40
+ style_feature_maps_indices_names = output[2]
41
+
42
+ config.content_feature_maps_index = content_feature_maps_index_name[0]
43
+ config.style_feature_maps_indices = style_feature_maps_indices_names[0]
44
+
45
+ config.current_set_of_feature_maps = config.neural_net(img)
46
+
47
+ config.target_content_representation = config.current_set_of_feature_maps[
48
+ config.content_feature_maps_index].squeeze(axis=0)
49
+ config.target_style_representation = [
50
+ utils.gram_matrix(fmaps)
51
+ for i, fmaps in enumerate(config.current_set_of_feature_maps)
52
+ if i in config.style_feature_maps_indices
53
+ ]
54
+
55
+ if utils.yamlGet('reconstruct') == "Content":
56
+ config.target_representation = config.target_content_representation
57
+ num_of_feature_maps = config.target_content_representation.size()[0]
58
+ for i in range(num_of_feature_maps):
59
+ feature_map = config.target_content_representation[i].to(
60
+ 'cpu').numpy()
61
+ feature_map = np.uint8(utils.get_uint8_range(feature_map))
62
+ # filename = f'fm_{config["model"]}_{content_feature_maps_index_name[1]}_{str(i).zfill(config["img_format"][0])}{config["img_format"][1]}'
63
+ # utils.save_image(feature_map, os.path.join(dump_path, filename))
64
+
65
+ elif utils.yamlGet('reconstruct') == "Style":
66
+ config.target_representation = config.target_style_representation
67
+ num_of_gram_matrices = len(config.target_style_representation)
68
+ for i in range(num_of_gram_matrices):
69
+ Gram_matrix = config.target_style_representation[i].squeeze(
70
+ axis=0).to('cpu').numpy()
71
+ Gram_matrix = np.uint8(utils.get_uint8_range(Gram_matrix))
72
+ # filename = f'gram_{config["model"]}_{style_feature_maps_indices_names[1][i]}_{str(i).zfill(config["img_format"][0])}{config["img_format"][1]}'
73
+ # utils.save_image(Gram_matrix, os.path.join(dump_path, filename))
74
+
75
+ if utils.yamlGet('optimizer') == 'Adam':
76
+ optimizer = Adam((optimizing_img, ), lr=utils.yamlGet('learning_rate'))
77
+ tuning_step = make_tuning_step(optimizer, config)
78
+ for it in range(utils.yamlGet('optimizer')):
79
+ tuning_step(optimizing_img)
80
+ with torch.no_grad():
81
+ utils.save_optimizing_image(optimizing_img, dump_path, it)
82
+
83
+ elif utils.yamlGet('optimizer') == 'LBFGS':
84
+ optimizer = LBFGS((optimizing_img, ),
85
+ max_iter=utils.yamlGet('optimizer'),
86
+ line_search_fn='strong_wolfe')
87
+ cnt = 0
88
+
89
+ def closure():
90
+ nonlocal cnt
91
+ loss = utils.getLBFGSReconstructLoss(config, optimizing_img)
92
+ loss.backward()
93
+ with torch.no_grad():
94
+ utils.save_optimizing_image(optimizing_img, dump_path, cnt)
95
+ cnt += 1
96
+ return loss
97
+
98
+ optimizer.step(closure)
99
+
100
+ return dump_path
101
+
102
+
103
+ if __name__ == "__main__":
104
+
105
+ # reconstruct style or content image purely from their representation
106
+ results_path = reconstruct_image_from_representation()
107
+
108
+ create_video_from_intermediate_results(results_path)
src/utils/__init__.py ADDED
File without changes
src/utils/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (178 Bytes). View file
 
src/utils/__pycache__/utils.cpython-310.pyc ADDED
Binary file (8.67 kB). View file
 
src/utils/__pycache__/video_utils.cpython-310.pyc ADDED
Binary file (1.07 kB). View file
 
src/utils/utils.py ADDED
@@ -0,0 +1,282 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2 as cv
2
+ import numpy as np
3
+ import torch
4
+ from torchvision import transforms
5
+ import os
6
+ import yaml
7
+ import PIL.Image as Image
8
+ from src.models.definitions.vgg_nets import Vgg16, Vgg19, Vgg16Experimental
9
+
10
+ IMAGENET_MEAN_255 = [123.675, 116.28, 103.53]
11
+ IMAGENET_STD_NEUTRAL = [1, 1, 1]
12
+
13
+
14
+ def load_image(img_path, target_shape=None):
15
+ if not os.path.exists(img_path):
16
+ raise Exception(f'Path does not exist: {img_path}')
17
+ img = cv.imread(img_path)[:, :, ::-1]
18
+ if target_shape is not None: # resize section
19
+ current_height, current_width = img.shape[:2]
20
+ new_height = target_shape
21
+ new_width = int(current_width * (new_height / current_height))
22
+ img = cv.resize(img, (new_width, new_height),
23
+ interpolation=cv.INTER_CUBIC)
24
+
25
+ # this need to go after resizing - otherwise cv.resize will push values outside of [0,1] range
26
+ img = img.astype(np.float32) # convert from uint8 to float32
27
+ img /= 255.0 # get to [0, 1] range
28
+ return img
29
+
30
+
31
+ def getInitImage(content_img, style_img, device):
32
+
33
+ if yamlGet("initImage") == 'White Noise Image':
34
+ white_noise_img = np.random.uniform(
35
+ -90., 90., content_img.shape).astype(np.float32)
36
+ init_img = torch.from_numpy(white_noise_img).float().to(device)
37
+
38
+ elif yamlGet("initImage") == 'Gaussian Noise Image':
39
+ gaussian_noise_img = np.random.normal(loc=0,
40
+ scale=90.,
41
+ size=content_img.shape).astype(
42
+ np.float32)
43
+ init_img = torch.from_numpy(gaussian_noise_img).float().to(device)
44
+
45
+ elif yamlGet("initImage") == 'Content':
46
+ init_img = content_img
47
+
48
+ else:
49
+ # init image has same dimension as content image - this is a hard constraint
50
+ # feature maps need to be of same size for content image and init image
51
+ style_img_resized = prepare_img(style_img,
52
+ np.asarray(content_img.shape[2:]),
53
+ device)
54
+ init_img = style_img_resized
55
+ return init_img
56
+
57
+
58
+ def prepare_img(img_path, target_shape, device):
59
+ img = load_image(img_path, target_shape=target_shape)
60
+
61
+ # normalize using ImageNet's mean
62
+ # [0, 255] range worked much better for me than [0, 1] range (even though PyTorch models were trained on latter)
63
+ transform = transforms.Compose([
64
+ transforms.ToTensor(),
65
+ transforms.Lambda(lambda x: x.mul(255)),
66
+ transforms.Normalize(mean=IMAGENET_MEAN_255, std=IMAGENET_STD_NEUTRAL)
67
+ ])
68
+
69
+ img = transform(img).to(device).unsqueeze(0)
70
+
71
+ return img
72
+
73
+
74
+ def save_image(img, img_path):
75
+ if len(img.shape) == 2:
76
+ img = np.stack((img, ) * 3, axis=-1)
77
+ cv.imwrite(img_path, img[:, :, ::-1]
78
+ ) # [:, :, ::-1] converts rgb into bgr (opencv contraint...)
79
+
80
+
81
+ def save_optimizing_image(optimizing_img, dump_path, img_id):
82
+ img_format = (4, '.jpg')
83
+ saving_freq = yamlGet('reprSavFreq')
84
+ out_img = optimizing_img.squeeze(axis=0).to('cpu').detach().numpy()
85
+ out_img = np.moveaxis(
86
+ out_img, 0,
87
+ 2) # swap channel from 1st to 3rd position: ch, _, _ -> _, _, chr
88
+
89
+ if img_id == yamlGet('iterations') - 1 or \
90
+ (saving_freq > 0 and img_id % saving_freq == 0):
91
+
92
+ out_img_name = str(img_id).zfill(img_format[0]) + img_format[1] \
93
+ if saving_freq != -1 else None
94
+ dump_img = np.copy(out_img)
95
+ dump_img += np.array(IMAGENET_MEAN_255).reshape((1, 1, 3))
96
+ dump_img = np.clip(dump_img, 0, 255).astype('uint8')
97
+ cv.imwrite(os.path.join(dump_path, out_img_name), dump_img[:, :, ::-1])
98
+ print(f"{out_img_name} written to {dump_path}")
99
+
100
+ # if should_display:
101
+ # plt.imshow(np.uint8(get_uint8_range(out_img)))
102
+ # plt.show()
103
+
104
+
105
+ def get_uint8_range(x):
106
+ if isinstance(x, np.ndarray):
107
+ x -= np.min(x)
108
+ x /= np.max(x)
109
+ x *= 255
110
+ return x
111
+ else:
112
+ raise ValueError(f'Expected numpy array got {type(x)}')
113
+
114
+
115
+ def prepare_model(device):
116
+
117
+ model = yamlGet('model')
118
+ if model == 'VGG16':
119
+ model = Vgg16(requires_grad=False, show_progress=True)
120
+ elif model == 'VGG16-Experimental':
121
+ model = Vgg16Experimental(requires_grad=False, show_progress=True)
122
+ elif model == 'VGG19':
123
+ model = Vgg19(requires_grad=False, show_progress=True)
124
+ else:
125
+ raise ValueError(f'{model} not supported.')
126
+
127
+ content_feature_maps_index = model.content_feature_maps_index
128
+ style_feature_maps_indices = model.style_feature_maps_indices
129
+ layer_names = list(model.layer_names.keys())
130
+
131
+ content_fms_index_name = (content_feature_maps_index,
132
+ layer_names[content_feature_maps_index])
133
+ style_fms_indices_names = (style_feature_maps_indices, layer_names)
134
+ return model.to(
135
+ device).eval(), content_fms_index_name, style_fms_indices_names
136
+
137
+
138
+ def yamlSet(key, value):
139
+ with open('src/config.yaml', 'r') as f:
140
+ config = yaml.load(f, Loader=yaml.FullLoader)
141
+ config[key] = value
142
+ with open('src/config.yaml', 'w') as f:
143
+ yaml.dump(config, f, default_flow_style=False)
144
+
145
+
146
+ def yamlGet(key):
147
+ with open('src/config.yaml', 'r') as f:
148
+ config = yaml.load(f, Loader=yaml.FullLoader)
149
+ return config[key]
150
+
151
+
152
+ def save_numpy_array_as_jpg(array, name):
153
+ image = Image.fromarray(array)
154
+ image.save("src/data/" + str(name) + '.jpg')
155
+ return "src/data/" + str(name) + '.jpg'
156
+
157
+
158
+ def gram_matrix(x, should_normalize=True):
159
+ (b, ch, h, w) = x.size()
160
+ features = x.view(b, ch, w * h)
161
+ features_t = features.transpose(1, 2)
162
+ gram = features.bmm(features_t)
163
+ if should_normalize:
164
+ gram /= ch * h * w
165
+ return gram
166
+
167
+
168
+ def total_variation(y):
169
+ return
170
+
171
+
172
+ def getImageAndPath(device):
173
+
174
+ if yamlGet('reconstruct') == 'Content':
175
+ img_path = yamlGet('contentPath')
176
+ elif yamlGet('reconstruct') == 'Style':
177
+ img_path = yamlGet('stylePath')
178
+
179
+ img = prepare_img(img_path, yamlGet('height'), device)
180
+
181
+ return img, img_path
182
+
183
+
184
+ def getContentCurrentData(config):
185
+ current_representation = config.current_set_of_feature_maps[
186
+ config.content_feature_maps_index].squeeze(axis=0)
187
+ loss = torch.nn.MSELoss(reduction='mean')(config.target_representation,
188
+ current_representation)
189
+ return loss, current_representation
190
+
191
+
192
+ def getStyleCurrentData(config):
193
+ current_representation = [
194
+ gram_matrix(x)
195
+ for cnt, x in enumerate(config.current_set_of_feature_maps)
196
+ if cnt in config.style_feature_maps_indices
197
+ ]
198
+ loss = 0.0
199
+ for gram_gt, gram_hat in zip(config.target_style_representation,
200
+ current_representation):
201
+ loss += torch.nn.MSELoss(reduction='sum')(gram_gt[0], gram_hat[0])
202
+
203
+ loss /= len(config.target_style_representation)
204
+ return loss, current_representation
205
+
206
+
207
+ def getCurrentData(config):
208
+ if yamlGet('reconstruct') == 'Content':
209
+ return getContentCurrentData(config)
210
+
211
+ elif yamlGet('reconstruct') == 'Style':
212
+ return getStyleCurrentData(config)
213
+
214
+
215
+ def getLBFGSReconstructLoss(config, optimizing_img):
216
+
217
+ loss = 0.0
218
+
219
+ if yamlGet('reconstruct') == 'Content':
220
+ loss = torch.nn.MSELoss(reduction='mean')(
221
+ config.target_content_representation,
222
+ config.neural_net(optimizing_img)[
223
+ config.content_feature_maps_index].squeeze(axis=0))
224
+
225
+ else:
226
+ config.current_set_of_feature_maps = config.neural_net(optimizing_img)
227
+ current_style_representation = [
228
+ gram_matrix(fmaps)
229
+ for i, fmaps in enumerate(config.current_set_of_feature_maps)
230
+ if i in config.style_feature_maps_indices
231
+ ]
232
+ for gram_gt, gram_hat in zip(config.target_style_representation,
233
+ current_style_representation):
234
+
235
+ loss += (1 / len(config.target_style_representation)) * \
236
+ torch.nn.MSELoss(reduction='sum')(gram_gt[0], gram_hat[0])
237
+
238
+ return loss
239
+
240
+
241
+ class Config:
242
+
243
+ def __init__(self):
244
+ self.target_representation = 0
245
+ self.target_content_representation = 0
246
+ self.target_style_representation = 0
247
+ self.content_feature_maps_index = 0
248
+ self.style_feature_maps_indices = 0
249
+ self.current_set_of_feature_maps = 0
250
+ self.current_representation = 0
251
+ self.neural_net = 0
252
+
253
+
254
+ class Images:
255
+
256
+ def getImages(self, device):
257
+
258
+ return [
259
+ self.__getContentImage(device),
260
+ self.__getStyleImage(device),
261
+ self.__getInitImage(device),
262
+ ]
263
+
264
+ def __getContentImage(self, device):
265
+ return prepare_img(yamlGet('contentPath'), yamlGet('height'), device)
266
+
267
+ def __getStyleImage(self, device):
268
+ return prepare_img(yamlGet('stylePath'), yamlGet('height'), device)
269
+
270
+ def __getInitImage(self, device):
271
+ return getInitImage(self.__getContentImage(device),
272
+ self.__getStyleImage(device), device)
273
+
274
+
275
+ def clearDir():
276
+ path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "data")
277
+ reconstructPath = os.path.join(path, "reconstruct")
278
+ transferPath = os.path.join(path, "transfer")
279
+ for transfer_file in os.scandir(transferPath):
280
+ os.remove(transfer_file)
281
+ for reconstruct_file in os.scandir(reconstructPath):
282
+ os.remove(reconstruct_file)
src/utils/video_utils.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import subprocess
3
+ import shutil
4
+
5
+
6
+ def create_video_from_intermediate_results(results_path):
7
+ #
8
+ # change this depending on what you want to accomplish (modify out video
9
+ # name, change fps and trim video)
10
+ #
11
+ img_format = (4, '.jpg')
12
+ out_file_name = 'out.mp4'
13
+ fps = 10
14
+ first_frame = 0
15
+ number_of_frames_to_process = len(os.listdir(results_path))
16
+ ffmpeg = 'ffmpeg'
17
+ if shutil.which(ffmpeg): # if ffmpeg is in system path
18
+ # example: '%4d.png' for (4, '.png')
19
+ img_name_format = '%' + str(img_format[0]) + 'd' + img_format[1]
20
+ pattern = os.path.join(results_path, img_name_format)
21
+ out_video_path = os.path.join(results_path, out_file_name)
22
+
23
+ trim_video_command = [
24
+ '-start_number',
25
+ str(first_frame), '-vframes',
26
+ str(number_of_frames_to_process)
27
+ ]
28
+ input_options = ['-r', str(fps), '-i', pattern]
29
+ encoding_options = [
30
+ '-c:v', 'libx264', '-crf', '25', '-pix_fmt', 'yuv420p',
31
+ '-vf', "pad=ceil(iw/2)*2:ceil(ih/2)*2"
32
+ ]
33
+ subprocess.call([
34
+ ffmpeg, *input_options, *trim_video_command, *encoding_options,
35
+ out_video_path
36
+ ])
37
+ else:
38
+ print(f'{ffmpeg} not found in the system path, aborting.')