Badr AlKhamissi commited on
Commit
913d3e3
1 Parent(s): 32b316c

starting space

Browse files
.gitignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ *.pyc
2
+ .DS_Store
3
+ output
4
+ code/data/init
5
+ code/data/arabic-fonts/*.ttf
.gitmodules ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [submodule "diffvg"]
2
+ path = diffvg
3
+ url = https://github.com/BachiLi/diffvg.git
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
  title: Word To Image
3
- emoji: 📚
4
- colorFrom: purple
5
- colorTo: green
6
  sdk: gradio
7
  sdk_version: 3.29.0
8
  app_file: app.py
 
1
  ---
2
  title: Word To Image
3
+ emoji: ✒️ ➡️ 🎨
4
+ colorFrom: blue
5
+ colorTo: pink
6
  sdk: gradio
7
  sdk_version: 3.29.0
8
  app_file: app.py
app.py ADDED
@@ -0,0 +1,368 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import argparse
4
+ from easydict import EasyDict as edict
5
+ import yaml
6
+ import os.path as osp
7
+ import random
8
+ import numpy.random as npr
9
+ import sys
10
+
11
+ # sys.path.append('./code')
12
+
13
+ sys.path.append('/home/user/app/code')
14
+
15
+ # set up diffvg
16
+
17
+ # os.system('git clone https://github.com/BachiLi/diffvg.git')
18
+ os.system('git submodule update --init')
19
+ os.chdir('diffvg')
20
+ print(os.getcwd())
21
+ os.system('git submodule update --init --recursive')
22
+ print(os.getcwd())
23
+ os.system('python setup.py install --user')
24
+ sys.path.append("/home/user/.local/lib/python3.8/site-packages/diffvg-0.0.1-py3.8-linux-x86_64.egg")
25
+
26
+ os.chdir('/home/user/app')
27
+
28
+ import torch
29
+ from diffusers import StableDiffusionPipeline
30
+
31
+
32
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
33
+
34
+ model = None
35
+ model = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5",
36
+ torch_dtype=torch.float16).to(device)
37
+
38
+ from typing import Mapping
39
+ from tqdm import tqdm
40
+ import torch
41
+ from torch.optim.lr_scheduler import LambdaLR
42
+ import pydiffvg
43
+ import save_svg
44
+ from losses import SDSLoss, ToneLoss, ConformalLoss
45
+ from utils import (
46
+ edict_2_dict,
47
+ update,
48
+ check_and_create_dir,
49
+ get_data_augs,
50
+ save_image,
51
+ preprocess,
52
+ learning_rate_decay,
53
+ combine_word)
54
+ import warnings
55
+
56
+ TITLE="""<h1 style="font-size: 42px;" align="center">Word-As-Image for Semantic Typography</h1>"""
57
+ DESCRIPTION="""A demo for [Word-As-Image for Semantic Typography](https://wordasimage.github.io/Word-As-Image-Page/). By using Word-as-Image, a visual representation of the meaning of the word is created while maintaining legibility of the text and font style.
58
+ Please select a semantic concept word and a letter you wish to generate, it will take ~5 minutes to perform 500 iterations."""
59
+
60
+ DESCRIPTION += '\n<p>This demo is licensed under a <a rel="license" href="http://creativecommons.org/licenses/by-sa/4.0/"> Creative Commons Attribution-ShareAlike 4.0 International License</a>.</p>'
61
+
62
+ if (SPACE_ID := os.getenv('SPACE_ID')) is not None:
63
+ DESCRIPTION += f'\n<p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings. <a href="https://huggingface.co/spaces/{SPACE_ID}?duplicate=true"><img style="display: inline; margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space" /></a></p>'
64
+
65
+
66
+ warnings.filterwarnings("ignore")
67
+
68
+ pydiffvg.set_print_timing(False)
69
+ gamma = 1.0
70
+
71
+
72
+ def set_config(semantic_concept, word, letter, font_name, num_steps):
73
+
74
+ cfg_d = edict()
75
+ cfg_d.config = "code/config/base.yaml"
76
+ cfg_d.experiment = "demo"
77
+
78
+ with open(cfg_d.config, 'r') as f:
79
+ cfg_full = yaml.load(f, Loader=yaml.FullLoader)
80
+
81
+ cfg_key = cfg_d.experiment
82
+ cfgs = [cfg_d]
83
+ while cfg_key:
84
+ cfgs.append(cfg_full[cfg_key])
85
+ cfg_key = cfgs[-1].get('parent_config', 'baseline')
86
+
87
+ cfg = edict()
88
+ for options in reversed(cfgs):
89
+ update(cfg, options)
90
+ del cfgs
91
+
92
+ cfg.semantic_concept = semantic_concept
93
+ cfg.word = word
94
+ cfg.optimized_letter = letter
95
+ cfg.font = font_name
96
+ cfg.seed = 0
97
+ cfg.num_iter = num_steps
98
+
99
+ if ' ' in cfg.word:
100
+ raise gr.Error(f'should be only one word')
101
+ cfg.caption = f"a {cfg.semantic_concept}. {cfg.prompt_suffix}"
102
+ cfg.log_dir = f"output/{cfg.experiment}_{cfg.word}"
103
+ if cfg.optimized_letter in cfg.word:
104
+ cfg.optimized_letter = cfg.optimized_letter
105
+ else:
106
+ raise gr.Error(f'letter should be in word')
107
+
108
+ cfg.letter = f"{cfg.font}_{cfg.optimized_letter}_scaled"
109
+ cfg.target = f"code/data/init/{cfg.letter}"
110
+
111
+ # set experiment dir
112
+ signature = f"{cfg.letter}_concept_{cfg.semantic_concept}_seed_{cfg.seed}"
113
+ cfg.experiment_dir = \
114
+ osp.join(cfg.log_dir, cfg.font, signature)
115
+ configfile = osp.join(cfg.experiment_dir, 'config.yaml')
116
+
117
+ # create experiment dir and save config
118
+ check_and_create_dir(configfile)
119
+ with open(osp.join(configfile), 'w') as f:
120
+ yaml.dump(edict_2_dict(cfg), f)
121
+
122
+ if cfg.seed is not None:
123
+ random.seed(cfg.seed)
124
+ npr.seed(cfg.seed)
125
+ torch.manual_seed(cfg.seed)
126
+ torch.backends.cudnn.benchmark = False
127
+ else:
128
+ assert False
129
+ return cfg
130
+
131
+
132
+ def init_shapes(svg_path, trainable: Mapping[str, bool]):
133
+ svg = f'{svg_path}.svg'
134
+ canvas_width, canvas_height, shapes_init, shape_groups_init = pydiffvg.svg_to_scene(svg)
135
+
136
+ parameters = edict()
137
+
138
+ # path points
139
+ if trainable.point:
140
+ parameters.point = []
141
+ for path in shapes_init:
142
+ path.points.requires_grad = True
143
+ parameters.point.append(path.points)
144
+
145
+ return shapes_init, shape_groups_init, parameters
146
+
147
+
148
+ def run_main_ex(semantic_concept, word, letter, font_name, num_steps):
149
+ return list(next(run_main_app(semantic_concept, word, letter, font_name, num_steps, 1)))
150
+
151
+ def run_main_app(semantic_concept, word, letter, font_name, num_steps, example=0):
152
+
153
+ cfg = set_config(semantic_concept, word, letter, font_name, num_steps)
154
+
155
+ pydiffvg.set_use_gpu(torch.cuda.is_available())
156
+
157
+ print("preprocessing")
158
+ preprocess(cfg.font, cfg.word, cfg.optimized_letter, cfg.level_of_cc)
159
+ filename_init = os.path.join("code/data/init/", f"{cfg.font}_{cfg.word}_scaled.svg").replace(" ", "_")
160
+ if not example:
161
+ yield gr.update(value=filename_init,visible=True),gr.update(visible=False),gr.update(visible=False)
162
+
163
+ sds_loss = SDSLoss(cfg, device, model)
164
+
165
+ h, w = cfg.render_size, cfg.render_size
166
+
167
+ data_augs = get_data_augs(cfg.cut_size)
168
+
169
+ render = pydiffvg.RenderFunction.apply
170
+
171
+ # initialize shape
172
+ print('initializing shape')
173
+ shapes, shape_groups, parameters = init_shapes(svg_path=cfg.target, trainable=cfg.trainable)
174
+
175
+ scene_args = pydiffvg.RenderFunction.serialize_scene(w, h, shapes, shape_groups)
176
+ img_init = render(w, h, 2, 2, 0, None, *scene_args)
177
+ img_init = img_init[:, :, 3:4] * img_init[:, :, :3] + \
178
+ torch.ones(img_init.shape[0], img_init.shape[1], 3, device=device) * (1 - img_init[:, :, 3:4])
179
+ img_init = img_init[:, :, :3]
180
+
181
+ tone_loss = ToneLoss(cfg)
182
+ tone_loss.set_image_init(img_init)
183
+
184
+ num_iter = cfg.num_iter
185
+ pg = [{'params': parameters["point"], 'lr': cfg.lr_base["point"]}]
186
+ optim = torch.optim.Adam(pg, betas=(0.9, 0.9), eps=1e-6)
187
+
188
+ conformal_loss = ConformalLoss(parameters, device, cfg.optimized_letter, shape_groups)
189
+
190
+ lr_lambda = lambda step: learning_rate_decay(step, cfg.lr.lr_init, cfg.lr.lr_final, num_iter,
191
+ lr_delay_steps=cfg.lr.lr_delay_steps,
192
+ lr_delay_mult=cfg.lr.lr_delay_mult) / cfg.lr.lr_init
193
+
194
+ scheduler = LambdaLR(optim, lr_lambda=lr_lambda, last_epoch=-1) # lr.base * lrlambda_f
195
+
196
+ print("start training")
197
+ # training loop
198
+ t_range = tqdm(range(num_iter))
199
+ for step in t_range:
200
+ optim.zero_grad()
201
+
202
+ # render image
203
+ scene_args = pydiffvg.RenderFunction.serialize_scene(w, h, shapes, shape_groups)
204
+ img = render(w, h, 2, 2, step, None, *scene_args)
205
+
206
+ # compose image with white background
207
+ img = img[:, :, 3:4] * img[:, :, :3] + torch.ones(img.shape[0], img.shape[1], 3, device=device) * (
208
+ 1 - img[:, :, 3:4])
209
+ img = img[:, :, :3]
210
+
211
+ filename = os.path.join(
212
+ cfg.experiment_dir, "video-svg", f"iter{step:04d}.svg")
213
+ check_and_create_dir(filename)
214
+ save_svg.save_svg(filename, w, h, shapes, shape_groups)
215
+ if not example:
216
+ yield gr.update(visible=True),gr.update(value=filename, label=f'iters: {step} / {num_iter}', visible=True),gr.update(visible=False)
217
+
218
+ x = img.unsqueeze(0).permute(0, 3, 1, 2) # HWC -> NCHW
219
+ x = x.repeat(cfg.batch_size, 1, 1, 1)
220
+ x_aug = data_augs.forward(x)
221
+
222
+ # compute diffusion loss per pixel
223
+ loss = sds_loss(x_aug)
224
+
225
+ tone_loss_res = tone_loss(x, step)
226
+ loss = loss + tone_loss_res
227
+
228
+ loss_angles = conformal_loss()
229
+ loss_angles = cfg.loss.conformal.angeles_w * loss_angles
230
+ loss = loss + loss_angles
231
+
232
+ loss.backward()
233
+ optim.step()
234
+ scheduler.step()
235
+
236
+
237
+ filename = os.path.join(
238
+ cfg.experiment_dir, "output-svg", "output.svg")
239
+ check_and_create_dir(filename)
240
+ save_svg.save_svg(
241
+ filename, w, h, shapes, shape_groups)
242
+
243
+ combine_word(cfg.word, cfg.optimized_letter, cfg.font, cfg.experiment_dir)
244
+
245
+ image = os.path.join(cfg.experiment_dir,f"{cfg.font}_{cfg.word}_{cfg.optimized_letter}.svg")
246
+ yield gr.update(value=filename_init,visible=True),gr.update(visible=False),gr.update(value=image,visible=True)
247
+
248
+
249
+ with gr.Blocks() as demo:
250
+
251
+ gr.HTML(TITLE)
252
+ gr.Markdown(DESCRIPTION)
253
+
254
+ with gr.Row():
255
+ with gr.Column():
256
+
257
+ semantic_concept = gr.Text(
258
+ label='Semantic Concept',
259
+ max_lines=1,
260
+ placeholder=
261
+ 'Enter a semantic concept. For example: BUNNY'
262
+ )
263
+
264
+ word = gr.Text(
265
+ label='Word',
266
+ max_lines=1,
267
+ placeholder=
268
+ 'Enter a word. For example: BUNNY'
269
+ )
270
+
271
+ letter = gr.Text(
272
+ label='Letter',
273
+ max_lines=1,
274
+ placeholder=
275
+ 'Choose a letter in the word to optimize. For example: Y'
276
+ )
277
+
278
+ num_steps = gr.Slider(label='Optimization Iterations',
279
+ minimum=0,
280
+ maximum=500,
281
+ step=10,
282
+ value=500)
283
+
284
+ font_name = gr.Text(value=None,visible=False,label="Font Name")
285
+
286
+
287
+ def on_select(evt: gr.SelectData):
288
+ return evt.value
289
+
290
+ font_name = "ArefRuqaa.ttf"
291
+
292
+ run = gr.Button('Generate')
293
+
294
+ with gr.Column():
295
+ result0 = gr.Image(type="filepath", label="Initial Word").style(height=333)
296
+ result1 = gr.Image(type="filepath", label="Optimization Process").style(height=110)
297
+ result2 = gr.Image(type="filepath", label="Final Result",visible=False).style(height=333)
298
+
299
+
300
+ with gr.Row():
301
+ # examples
302
+ examples = [
303
+ [
304
+ "BUNNY",
305
+ "BUNNY",
306
+ "Y",
307
+ "KaushanScript-Regular",
308
+ 500
309
+ ],
310
+ [
311
+ "LION",
312
+ "LION",
313
+ "O",
314
+ "Quicksand",
315
+ 500
316
+ ],
317
+ [
318
+ "FROG",
319
+ "FROG",
320
+ "G",
321
+ "IndieFlower-Regular",
322
+ 500
323
+ ],
324
+ [
325
+ "CAT",
326
+ "CAT",
327
+ "C",
328
+ "LuckiestGuy-Regular",
329
+ 500
330
+ ],
331
+ ]
332
+ demo.queue(max_size=10, concurrency_count=2)
333
+ # gr.Examples(examples=examples,
334
+ # inputs=[
335
+ # semantic_concept,
336
+ # word,
337
+ # letter,
338
+ # font_name,
339
+ # num_steps
340
+ # ],
341
+ # outputs=[
342
+ # result0,
343
+ # result1,
344
+ # result2
345
+ # ],
346
+ # fn=run_main_ex,
347
+ # cache_examples=True)
348
+
349
+
350
+ # inputs
351
+ inputs = [
352
+ semantic_concept,
353
+ word,
354
+ letter,
355
+ font_name,
356
+ num_steps
357
+ ]
358
+
359
+ outputs = [
360
+ result0,
361
+ result1,
362
+ result2
363
+ ]
364
+
365
+ run.click(fn=run_main_app, inputs=inputs, outputs=outputs, queue=True)
366
+
367
+
368
+ demo.launch(share=False)
code/bezier.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import matplotlib.pyplot as plt
3
+ from scipy.special import binom
4
+ from numpy.linalg import norm
5
+
6
+ def num_bezier(n_ctrl, degree=3):
7
+ if type(n_ctrl) == np.ndarray:
8
+ n_ctrl = len(n_ctrl)
9
+ return int((n_ctrl - 1) / degree)
10
+
11
+ def bernstein(n, i):
12
+ bi = binom(n, i)
13
+ return lambda t, bi=bi, n=n, i=i: bi * t**i * (1 - t)**(n - i)
14
+
15
+ def bezier(P, t, d=0):
16
+ '''Bezier curve of degree len(P)-1. d is the derivative order (0 gives positions)'''
17
+ n = P.shape[0] - 1
18
+ if d > 0:
19
+ Q = np.diff(P, axis=0)*n
20
+ return bezier(Q, t, d-1)
21
+ B = np.vstack([bernstein(n, i)(t) for i, p in enumerate(P)])
22
+ return (P.T @ B).T
23
+
24
+ def cubic_bezier(P, t):
25
+ return (1.0-t)**3*P[0] + 3*(1.0-t)**2*t*P[1] + 3*(1.0-t)*t**2*P[2] + t**3*P[3]
26
+
27
+ def bezier_piecewise(Cp, subd=100, degree=3, d=0):
28
+ ''' sample a piecewise Bezier curve given a sequence of control points'''
29
+ num = num_bezier(Cp.shape[0], degree)
30
+ X = []
31
+ for i in range(num):
32
+ P = Cp[i*degree:i*degree+degree+1, :]
33
+ t = np.linspace(0, 1., subd)[:-1]
34
+ Y = bezier(P, t, d)
35
+ X += [Y]
36
+ X.append(Cp[-1])
37
+ X = np.vstack(X)
38
+ return X
39
+
40
+ def compute_beziers(beziers, subd=100, degree=3):
41
+ chain = beziers_to_chain(beziers)
42
+ return bezier_piecewise(chain, subd, degree)
43
+
44
+ def plot_control_polygon(Cp, degree=3, lw=0.5, linecolor=np.ones(3)*0.1):
45
+ n_bezier = num_bezier(len(Cp), degree)
46
+ for i in range(n_bezier):
47
+ cp = Cp[i*degree:i*degree+degree+1, :]
48
+ if degree==3:
49
+ plt.plot(cp[0:2,0], cp[0:2, 1], ':', color=linecolor, linewidth=lw)
50
+ plt.plot(cp[2:,0], cp[2:,1], ':', color=linecolor, linewidth=lw)
51
+ plt.plot(cp[:,0], cp[:,1], 'o', color=[0, 0.5, 1.], markersize=4)
52
+ else:
53
+ plt.plot(cp[:,0], cp[:,1], ':', color=linecolor, linewidth=lw)
54
+ plt.plot(cp[:,0], cp[:,1], 'o', color=[0, 0.5, 1.])
55
+
56
+
57
+ def chain_to_beziers(chain, degree=3):
58
+ ''' Convert Bezier chain to list of curve segments (4 control points each)'''
59
+ num = num_bezier(chain.shape[0], degree)
60
+ beziers = []
61
+ for i in range(num):
62
+ beziers.append(chain[i*degree:i*degree+degree+1,:])
63
+ return beziers
64
+
65
+
66
+ def beziers_to_chain(beziers):
67
+ ''' Convert list of Bezier curve segments to a piecewise bezier chain (shares vertices)'''
68
+ n = len(beziers)
69
+ chain = []
70
+ for i in range(n):
71
+ chain.append(list(beziers[i][:-1]))
72
+ chain.append([beziers[-1][-1]])
73
+ return np.array(sum(chain, []))
74
+
75
+
76
+ def split_cubic(bez, t):
77
+ p1, p2, p3, p4 = bez
78
+
79
+ p12 = (p2 - p1) * t + p1
80
+ p23 = (p3 - p2) * t + p2
81
+ p34 = (p4 - p3) * t + p3
82
+
83
+ p123 = (p23 - p12) * t + p12
84
+ p234 = (p34 - p23) * t + p23
85
+
86
+ p1234 = (p234 - p123) * t + p123
87
+
88
+ return np.array([p1, p12, p123, p1234]), np.array([p1234, p234, p34, p4])
89
+
90
+
91
+ def approx_arc_length(bez):
92
+ c0, c1, c2, c3 = bez
93
+ v0 = norm(c1-c0)*0.15
94
+ v1 = norm(-0.558983582205757*c0 + 0.325650248872424*c1 + 0.208983582205757*c2 + 0.024349751127576*c3)
95
+ v2 = norm(c3-c0+c2-c1)*0.26666666666666666
96
+ v3 = norm(-0.024349751127576*c0 - 0.208983582205757*c1 - 0.325650248872424*c2 + 0.558983582205757*c3)
97
+ v4 = norm(c3-c2)*.15
98
+ return v0 + v1 + v2 + v3 + v4
99
+
100
+
101
+ def subdivide_bezier(bez, thresh):
102
+ stack = [bez]
103
+ res = []
104
+ while stack:
105
+ bez = stack.pop()
106
+ l = approx_arc_length(bez)
107
+ if l < thresh:
108
+ res.append(bez)
109
+ else:
110
+ b1, b2 = split_cubic(bez, 0.5)
111
+ stack += [b2, b1]
112
+ return res
113
+
114
+ def subdivide_bezier_chain(C, thresh):
115
+ beziers = chain_to_beziers(C)
116
+ res = []
117
+ for bez in beziers:
118
+ res += subdivide_bezier(bez, thresh)
119
+ return beziers_to_chain(res)
120
+
121
+
122
+
code/collage.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import imageio
3
+ import numpy as np
4
+ from glob import glob
5
+ from PIL import Image, ImageSequence
6
+
7
+ if __name__ == "__main__":
8
+
9
+ path = "/Users/bkhmsi/Desktop/Animal-Words/*.gif"
10
+ save_path = os.path.join(os.path.dirname(path), "collage.gif")
11
+
12
+
13
+ width, height = 400, 400
14
+ nx, ny = 5, 5
15
+ n_frames = 67
16
+ collage = np.ones((n_frames+10, width*nx, height*ny)).astype(np.uint8)
17
+
18
+ filenames = [p for p in glob(path) if os.path.basename(p)[:-4] not in ["palestine", "amin", "collage"]]
19
+ print(f"> {len(filenames)} Files Found")
20
+ for file in filenames:
21
+ print(os.path.basename(file))
22
+
23
+ assert nx*ny <= len(filenames)
24
+
25
+ for i in range(nx):
26
+ for j in range(ny):
27
+ image = Image.open(filenames[i*ny+j])
28
+ assert image.is_animated
29
+ idx = 0
30
+ for frame_idx in range(image.n_frames):
31
+ image.seek(frame_idx)
32
+ frame = image.convert('L').copy()
33
+ if frame_idx == 0 or frame_idx == image.n_frames-1:
34
+ for _ in range(5):
35
+ collage[idx, i*width:(i+1)*width,j*height:(j+1)*height] = np.asarray(frame)[100:500, 100:500]
36
+ idx += 1
37
+ else:
38
+ collage[idx, i*width:(i+1)*width,j*height:(j+1)*height] = np.asarray(frame)[100:500, 100:500]
39
+ idx += 1
40
+
41
+ imageio.mimsave(save_path, collage)
code/config.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import os.path as osp
3
+ import yaml
4
+ import random
5
+ from easydict import EasyDict as edict
6
+ import numpy.random as npr
7
+ import torch
8
+ from utils import (
9
+ edict_2_dict,
10
+ check_and_create_dir,
11
+ update)
12
+ import wandb
13
+ import warnings
14
+ warnings.filterwarnings("ignore")
15
+
16
+
17
+ def parse_args():
18
+ parser = argparse.ArgumentParser()
19
+ parser.add_argument("--config", type=str, default="code/config/base.yaml")
20
+ parser.add_argument("--experiment", type=str, default="conformal_0.5_dist_pixel_100_kernel201")
21
+ parser.add_argument("--seed", type=int, default=0)
22
+ parser.add_argument('--log_dir', metavar='DIR', default="output")
23
+ parser.add_argument('--font', type=str, default="none", help="font name")
24
+ parser.add_argument('--semantic_concept', type=str, help="the semantic concept to insert")
25
+ parser.add_argument('--word', type=str, default="none", help="the text to work on")
26
+ parser.add_argument('--prompt_suffix', type=str, default="minimal flat 2d vector. lineal color."
27
+ " trending on artstation")
28
+ parser.add_argument('--optimized_letter', type=str, default="none", help="the letter in the word to optimize")
29
+ parser.add_argument('--batch_size', type=int, default=1)
30
+ parser.add_argument('--use_wandb', type=int, default=0)
31
+ parser.add_argument('--wandb_user', type=str, default="none")
32
+
33
+ cfg = edict()
34
+ args = parser.parse_args()
35
+ with open('TOKEN', 'r') as f:
36
+ setattr(args, 'token', f.read().replace('\n', ''))
37
+ cfg.config = args.config
38
+ cfg.experiment = args.experiment
39
+ cfg.seed = args.seed
40
+ cfg.font = args.font
41
+ cfg.semantic_concept = args.semantic_concept
42
+ cfg.word = cfg.semantic_concept if args.word == "none" else args.word
43
+ if " " in cfg.word:
44
+ raise ValueError(f'no spaces are allowed')
45
+ if "jpeg" in args.semantic_concept:
46
+ cfg.caption = args.semantic_concept
47
+ else:
48
+ cfg.caption = f"a {args.semantic_concept}. {args.prompt_suffix}"
49
+
50
+ cfg.log_dir = f"{args.log_dir}/{args.experiment}_{cfg.word}"
51
+ if args.optimized_letter in cfg.word:
52
+ cfg.optimized_letter = args.optimized_letter
53
+ else:
54
+ raise ValueError(f'letter should be in word')
55
+ cfg.batch_size = args.batch_size
56
+ cfg.token = args.token
57
+ cfg.use_wandb = args.use_wandb
58
+ cfg.wandb_user = args.wandb_user
59
+ cfg.letter = f"{args.font}_{args.optimized_letter}_scaled"
60
+ cfg.target = f"code/data/init/{cfg.letter}"
61
+
62
+ return cfg
63
+
64
+
65
+ def set_config():
66
+
67
+ cfg_arg = parse_args()
68
+ with open(cfg_arg.config, 'r') as f:
69
+ cfg_full = yaml.load(f, Loader=yaml.FullLoader)
70
+
71
+ # recursively traverse parent_config pointers in the config dicts
72
+ cfg_key = cfg_arg.experiment
73
+ cfgs = [cfg_arg]
74
+ while cfg_key:
75
+ cfgs.append(cfg_full[cfg_key])
76
+ cfg_key = cfgs[-1].get('parent_config', 'baseline')
77
+
78
+ # allowing children configs to override their parents
79
+ cfg = edict()
80
+ for options in reversed(cfgs):
81
+ update(cfg, options)
82
+ del cfgs
83
+
84
+ # set experiment dir
85
+ signature = f"{cfg.letter}_concept_{cfg.semantic_concept}_seed_{cfg.seed}"
86
+ cfg.experiment_dir = \
87
+ osp.join(cfg.log_dir, cfg.font, signature)
88
+ configfile = osp.join(cfg.experiment_dir, 'config.yaml')
89
+ print('Config:', cfg)
90
+
91
+ # create experiment dir and save config
92
+ check_and_create_dir(configfile)
93
+ with open(osp.join(configfile), 'w') as f:
94
+ yaml.dump(edict_2_dict(cfg), f)
95
+
96
+ if cfg.use_wandb:
97
+ wandb.init(project="Word-As-Image", entity=cfg.wandb_user,
98
+ config=cfg, name=f"{signature}", id=wandb.util.generate_id())
99
+
100
+ if cfg.seed is not None:
101
+ random.seed(cfg.seed)
102
+ npr.seed(cfg.seed)
103
+ torch.manual_seed(cfg.seed)
104
+ torch.backends.cudnn.benchmark = False
105
+ else:
106
+ assert False
107
+
108
+ return cfg
code/config/base.yaml ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ baseline:
2
+ parent_config: ''
3
+ save:
4
+ init: true
5
+ image: true
6
+ video: true
7
+ video_frame_freq: 1
8
+ trainable:
9
+ point: true
10
+ lr_base:
11
+ point: 1
12
+ lr:
13
+ lr_init: 0.002
14
+ lr_final: 0.0008
15
+ lr_delay_mult: 0.1
16
+ lr_delay_steps: 100
17
+ num_iter: 500
18
+ render_size: 600
19
+ cut_size: 512
20
+ level_of_cc: 0 # 0 - original number of cc / 1 - recommended / 2 - more control points
21
+ seed: 0
22
+ diffusion:
23
+ model: "runwayml/stable-diffusion-v1-5" #"stabilityai/stable-diffusion-2-1"
24
+ timesteps: 1000
25
+ guidance_scale: 100
26
+ loss:
27
+ use_sds_loss: true
28
+ tone:
29
+ use_tone_loss: false
30
+ conformal:
31
+ use_conformal_loss: false
32
+
33
+ conformal_0.5_dist_pixel_100_kernel201:
34
+ parent_config: baseline
35
+ level_of_cc: 1
36
+ loss:
37
+ tone:
38
+ use_tone_loss: true
39
+ dist_loss_weight: 100
40
+ pixel_dist_kernel_blur: 201
41
+ pixel_dist_sigma: 30
42
+ conformal:
43
+ use_conformal_loss: true
44
+ angeles_w: 0.5
45
+
46
+ Animals:
47
+ parent_config: baseline
48
+ level_of_cc: 1
49
+ num_iter: 500
50
+ loss:
51
+ tone:
52
+ use_tone_loss: true
53
+ dist_loss_weight: 100
54
+ pixel_dist_kernel_blur: 201
55
+ pixel_dist_sigma: 30
56
+ conformal:
57
+ use_conformal_loss: true
58
+ angeles_w: 0.5
59
+
code/data/arabic-fonts/dl-fonts.sh ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wget https://arbfonts.com//wp-content/fonts/diwany-arabic-fonts//mcs-diwany-jaly-s-u.ttf -O 01.ttf
2
+ wget https://arbfonts.com//wp-content/fonts/new-arabic-fonts//46-Diwani-Bent.ttf -O 02.ttf
3
+ wget https://arbfonts.com//wp-content/fonts/diwany-arabic-fonts//diwany-edited.ttf -O 03.ttf
4
+ wget https://arbfonts.com/wp-content/fonts/diwany-arabic-fonts//arbfonts-samt-7017.ttf -O 04.ttf
5
+ wget https://arbfonts.com//wp-content/fonts/kufi-arabic-fonts//QadasiRegular.ttf -O 05.ttf
6
+ wget https://arbfonts.com//wp-content/fonts/kufi-arabic-fonts//Spirit-Of-Doha-Black.otf -O 06.ttf
7
+ wget https://arbfonts.com//wp-content/fonts/new-arabic-fonts//AlQalam-alavi.ttf -O 07.ttf
8
+ wget https://arbfonts.com//wp-content/fonts/new-arabic-fonts//22-andlso.ttf -O 08.ttf
9
+ wget https://arbfonts.com//wp-content/fonts/new-arabic-fonts//ArefRuqaa-Bold-1.ttf -O 09.ttf
10
+ wget https://arbfonts.com//wp-content/fonts/new-arabic-fonts//ArefRuqaa-Regular-1.ttf -O 10.ttf
11
+ wget https://arbfonts.com//wp-content/fonts/new-arabic-fonts//117-Barada-Reqa.ttf -O 11.ttf
12
+ wget https://arbfonts.com/wp-content/fonts/diwany-arabic-fonts//arbfonts-diwany-thuluth.ttf -O 12.ttf
13
+ wget https://arbfonts.com//wp-content/fonts/unlimited-free-arabic-fonts//UthmanicHafs1-Ver09_2.otf -O 13.ttf
14
+ wget https://arbfonts.com//wp-content/fonts/new-arabic-fonts//ALAMERAH-Thin.ttf -O 14.ttf
15
+ wget https://arbfonts.com/font_files/horr/unicode/Boahmed%20Alhour.ttf -O 15.ttf
16
+ wget https://arbfonts.com//wp-content/fonts/misc//K-Kamran.ttf -O 16.ttf
17
+ wget https://arbfonts.com//wp-content/fonts/farsi-free-fonts//Jamil-nory.ttf -O 17.ttf
18
+ wget https://arbfonts.com//wp-content/fonts/brands-arasbic-fonts//Mobily.ttf -O 18.ttf
19
+ wget https://arbfonts.com//wp-content/fonts/new-arabic-fonts//QTSManga-Regular-1.ttf -O 19.ttf
20
+ wget https://arbfonts.com//wp-content/fonts/arabic-fonts/new//Al-Jazeera-Arabic-Regular.ttf -O 20.ttf
code/data/arabic-fonts/font_names.txt ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ديواني جلي
2
+ ديواني مشكل
3
+ ديواني طويل
4
+ ديواني بسيط
5
+ كوفي بسيط
6
+ كوفي منحني
7
+ فارسي بسيط
8
+ مغربي اندلس
9
+ رقعة مدبب
10
+ رقعة بسيط
11
+ رقعة سريع
12
+ ثلث ديواني
13
+ ثلث بسيط
14
+ مربع بسيط
15
+ حر مدبب
16
+ حر بسيط
17
+ حر طويل
18
+ موبايلي
19
+ منجا
20
+ الجزيرة
code/data/fonts/ArefRuqaa.ttf ADDED
Binary file (111 kB). View file
 
code/data/fonts/Bell MT.ttf ADDED
Binary file (84.8 kB). View file
 
code/data/fonts/DeliusUnicase-Regular.ttf ADDED
Binary file (31.5 kB). View file
 
code/data/fonts/HobeauxRococeaux-Sherman.ttf ADDED
Binary file (117 kB). View file
 
code/data/fonts/IndieFlower-Regular.ttf ADDED
Binary file (55.4 kB). View file
 
code/data/fonts/JosefinSans-Light.ttf ADDED
Binary file (59.3 kB). View file
 
code/data/fonts/KaushanScript-Regular.ttf ADDED
Binary file (184 kB). View file
 
code/data/fonts/LuckiestGuy-Regular.ttf ADDED
Binary file (58.3 kB). View file
 
code/data/fonts/Noteworthy-Bold.ttf ADDED
Binary file (248 kB). View file
 
code/data/fonts/Quicksand.ttf ADDED
Binary file (124 kB). View file
 
code/data/fonts/Saira-Regular.ttf ADDED
Binary file (82.8 kB). View file
 
code/harfbuzz_test.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import vharfbuzz as hv
2
+
3
+ animal_names = [
4
+ {"english": "cat", "arabic": "قطة"},
5
+ {"english": "Lion", "arabic": "أسد"},
6
+ {"english": "Elephant", "arabic": "فيل"},
7
+ {"english": "Tiger", "arabic": "نمر"},
8
+ {"english": "Cheetah", "arabic": "فهد"},
9
+ {"english": "Monkey", "arabic": "قرد"},
10
+ {"english": "Dolphin", "arabic": "دلفين"},
11
+ {"english": "Penguin", "arabic": "بطريق"},
12
+ {"english": "Kangaroo", "arabic": "كنغر"},
13
+ {"english": "Fox", "arabic": "ثعلب"},
14
+ {"english": "Eagle", "arabic": "نسر"},
15
+ {"english": "Wolf", "arabic": "ذئب"},
16
+ {"english": "Turtle", "arabic": "سلحفاة"},
17
+ {"english": "Panda", "arabic": "باندا"},
18
+ {"english": "Giraffe", "arabic": "زرافة"},
19
+ {"english": "Bear", "arabic": "دب"},
20
+ {"english": "Owl", "arabic": "بومة"}
21
+ ]
22
+
23
+ fontpath = './data/fonts/ArefRuqaa.ttf'
24
+ vhb = hv.Vharfbuzz(fontpath)
25
+
26
+ path_templ = "/Users/bkhmsi/Desktop/Animal-Words/correct/{}.svg"
27
+
28
+ for animal in animal_names:
29
+ txt = animal["arabic"]
30
+ buf = vhb.shape(txt, {"features": {"kern": True, "liga": True}})
31
+ svg = vhb.buf_to_svg(buf)
32
+ with open(path_templ.format(animal["english"]), 'w') as fout:
33
+ fout.write(svg)
code/losses.py ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch.nn as nn
2
+ import torchvision
3
+ from scipy.spatial import Delaunay
4
+ import torch
5
+ import numpy as np
6
+ from torch.nn import functional as nnf
7
+ from easydict import EasyDict
8
+ from shapely.geometry import Point
9
+ from shapely.geometry.polygon import Polygon
10
+ from torchvision import transforms
11
+ from PIL import Image
12
+ from transformers import CLIPProcessor, CLIPModel
13
+
14
+ from diffusers import StableDiffusionPipeline
15
+
16
+ class SDSLoss(nn.Module):
17
+ def __init__(self, cfg, device):
18
+ super(SDSLoss, self).__init__()
19
+ self.cfg = cfg
20
+ self.device = device
21
+ self.pipe = StableDiffusionPipeline.from_pretrained(cfg.diffusion.model,
22
+ torch_dtype=torch.float16, use_auth_token=cfg.token)
23
+ self.pipe = self.pipe.to(self.device)
24
+
25
+ self.clip_model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14").to(self.device)
26
+ self.clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")
27
+
28
+ # default scheduler: PNDMScheduler(beta_start=0.00085, beta_end=0.012,
29
+ # beta_schedule="scaled_linear", num_train_timesteps=1000)
30
+ self.alphas = self.pipe.scheduler.alphas_cumprod.to(self.device)
31
+ self.sigmas = (1 - self.pipe.scheduler.alphas_cumprod).to(self.device)
32
+
33
+ self.text_embeddings = None
34
+ self.embed_text()
35
+
36
+ def embed_text(self):
37
+ # tokenizer and embed text
38
+
39
+ if "jpeg" not in self.cfg.caption:
40
+ text_input = self.pipe.tokenizer(self.cfg.caption, padding="max_length",
41
+ max_length=self.pipe.tokenizer.model_max_length,
42
+ truncation=True, return_tensors="pt")
43
+ uncond_input = self.pipe.tokenizer([""], padding="max_length",
44
+ max_length=text_input.input_ids.shape[-1],
45
+ return_tensors="pt")
46
+ with torch.no_grad():
47
+ text_embeddings = self.pipe.text_encoder(text_input.input_ids.to(self.device))[0]
48
+ uncond_embeddings = self.pipe.text_encoder(uncond_input.input_ids.to(self.device))[0]
49
+ else:
50
+ print(f"> Reading Image {self.cfg.caption}")
51
+ with torch.no_grad():
52
+ image = Image.open(self.cfg.caption)
53
+ inputs = self.clip_processor(images=image, return_tensors="pt").to(self.device)
54
+ img_emb = self.clip_model.get_image_features(**inputs)
55
+ text_embeddings = img_emb
56
+ uncond_embeddings = img_emb
57
+
58
+ print(text_embeddings.size())
59
+ print(uncond_embeddings.size())
60
+ self.text_embeddings = torch.cat([uncond_embeddings, text_embeddings])
61
+ self.text_embeddings = self.text_embeddings.repeat_interleave(self.cfg.batch_size, 0)
62
+ del self.pipe.tokenizer
63
+ del self.pipe.text_encoder
64
+
65
+
66
+ def forward(self, x_aug):
67
+ sds_loss = 0
68
+
69
+ # encode rendered image
70
+ x = x_aug * 2. - 1.
71
+ with torch.cuda.amp.autocast():
72
+ init_latent_z = (self.pipe.vae.encode(x).latent_dist.sample())
73
+ latent_z = 0.18215 * init_latent_z # scaling_factor * init_latents
74
+
75
+ with torch.inference_mode():
76
+ # sample timesteps
77
+ timestep = torch.randint(
78
+ low=50,
79
+ high=min(950, self.cfg.diffusion.timesteps) - 1, # avoid highest timestep | diffusion.timesteps=1000
80
+ size=(latent_z.shape[0],),
81
+ device=self.device, dtype=torch.long)
82
+
83
+ # add noise
84
+ eps = torch.randn_like(latent_z)
85
+ # zt = alpha_t * latent_z + sigma_t * eps
86
+ noised_latent_zt = self.pipe.scheduler.add_noise(latent_z, eps, timestep)
87
+
88
+ # denoise
89
+ z_in = torch.cat([noised_latent_zt] * 2) # expand latents for classifier free guidance
90
+ timestep_in = torch.cat([timestep] * 2)
91
+ with torch.autocast(device_type="cuda", dtype=torch.float16):
92
+ eps_t_uncond, eps_t = self.pipe.unet(z_in, timestep, encoder_hidden_states=self.text_embeddings).sample.float().chunk(2)
93
+
94
+ eps_t = eps_t_uncond + self.cfg.diffusion.guidance_scale * (eps_t - eps_t_uncond)
95
+
96
+ # w = alphas[timestep]^0.5 * (1 - alphas[timestep]) = alphas[timestep]^0.5 * sigmas[timestep]
97
+ grad_z = self.alphas[timestep]**0.5 * self.sigmas[timestep] * (eps_t - eps)
98
+ assert torch.isfinite(grad_z).all()
99
+ grad_z = torch.nan_to_num(grad_z.detach().float(), 0.0, 0.0, 0.0)
100
+
101
+ sds_loss = grad_z.clone() * latent_z
102
+ del grad_z
103
+
104
+ sds_loss = sds_loss.sum(1).mean()
105
+ return sds_loss
106
+
107
+
108
+ class ToneLoss(nn.Module):
109
+ def __init__(self, cfg):
110
+ super(ToneLoss, self).__init__()
111
+ self.dist_loss_weight = cfg.loss.tone.dist_loss_weight
112
+ self.im_init = None
113
+ self.cfg = cfg
114
+ self.mse_loss = nn.MSELoss()
115
+ self.blurrer = torchvision.transforms.GaussianBlur(kernel_size=(cfg.loss.tone.pixel_dist_kernel_blur,
116
+ cfg.loss.tone.pixel_dist_kernel_blur), sigma=(cfg.loss.tone.pixel_dist_sigma))
117
+
118
+ def set_image_init(self, im_init):
119
+ self.im_init = im_init.permute(2, 0, 1).unsqueeze(0)
120
+ self.init_blurred = self.blurrer(self.im_init)
121
+
122
+
123
+ def get_scheduler(self, step=None):
124
+ if step is not None:
125
+ return self.dist_loss_weight * np.exp(-(1/5)*((step-300)/(20)) ** 2)
126
+ else:
127
+ return self.dist_loss_weight
128
+
129
+ def forward(self, cur_raster, step=None):
130
+ blurred_cur = self.blurrer(cur_raster)
131
+ return self.mse_loss(self.init_blurred.detach(), blurred_cur) * self.get_scheduler(step)
132
+
133
+
134
+ class ConformalLoss:
135
+ def __init__(self, parameters: EasyDict, device: torch.device, target_letter: str, shape_groups):
136
+ self.parameters = parameters
137
+ self.target_letter = target_letter
138
+ self.shape_groups = shape_groups
139
+ self.faces = self.init_faces(device)
140
+ self.faces_roll_a = [torch.roll(self.faces[i], 1, 1) for i in range(len(self.faces))]
141
+
142
+ with torch.no_grad():
143
+ self.angles = []
144
+ self.reset()
145
+
146
+
147
+ def get_angles(self, points: torch.Tensor) -> torch.Tensor:
148
+ angles_ = []
149
+ for i in range(len(self.faces)):
150
+ triangles = points[self.faces[i]]
151
+ triangles_roll_a = points[self.faces_roll_a[i]]
152
+ edges = triangles_roll_a - triangles
153
+ length = edges.norm(dim=-1)
154
+ edges = edges / (length + 1e-1)[:, :, None]
155
+ edges_roll = torch.roll(edges, 1, 1)
156
+ cosine = torch.einsum('ned,ned->ne', edges, edges_roll)
157
+ angles = torch.arccos(cosine)
158
+ angles_.append(angles)
159
+ return angles_
160
+
161
+ def get_letter_inds(self, letter_to_insert):
162
+ for group, l in zip(self.shape_groups, self.target_letter):
163
+ if l == letter_to_insert:
164
+ letter_inds = group.shape_ids
165
+ return letter_inds[0], letter_inds[-1], len(letter_inds)
166
+
167
+ def reset(self):
168
+ points = torch.cat([point.clone().detach() for point in self.parameters.point])
169
+ self.angles = self.get_angles(points)
170
+
171
+ def init_faces(self, device: torch.device) -> torch.tensor:
172
+ faces_ = []
173
+ for j, c in enumerate(self.target_letter):
174
+ points_np = [self.parameters.point[i].clone().detach().cpu().numpy() for i in range(len(self.parameters.point))]
175
+ start_ind, end_ind, shapes_per_letter = self.get_letter_inds(c)
176
+ print(c, start_ind, end_ind)
177
+ holes = []
178
+ if shapes_per_letter > 1:
179
+ holes = points_np[start_ind+1:end_ind]
180
+ poly = Polygon(points_np[start_ind], holes=holes)
181
+ poly = poly.buffer(0)
182
+ points_np = np.concatenate(points_np)
183
+ faces = Delaunay(points_np).simplices
184
+ is_intersect = np.array([poly.contains(Point(points_np[face].mean(0))) for face in faces], dtype=np.bool_)
185
+ faces_.append(torch.from_numpy(faces[is_intersect]).to(device, dtype=torch.int64))
186
+ return faces_
187
+
188
+ def __call__(self) -> torch.Tensor:
189
+ loss_angles = 0
190
+ points = torch.cat(self.parameters.point)
191
+ angles = self.get_angles(points)
192
+ for i in range(len(self.faces)):
193
+ loss_angles += (nnf.mse_loss(angles[i], self.angles[i]))
194
+ return loss_angles
195
+
196
+
197
+
198
+
code/main.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Mapping
2
+ import os
3
+ from tqdm import tqdm
4
+ from easydict import EasyDict as edict
5
+ import matplotlib.pyplot as plt
6
+ import torch
7
+ from torch.optim.lr_scheduler import LambdaLR
8
+ import pydiffvg
9
+ import save_svg
10
+ from losses import SDSLoss, ToneLoss, ConformalLoss
11
+ from config import set_config
12
+ from utils import (
13
+ check_and_create_dir,
14
+ get_data_augs,
15
+ save_image,
16
+ preprocess,
17
+ learning_rate_decay,
18
+ combine_word,
19
+ create_video)
20
+ import wandb
21
+ import warnings
22
+ warnings.filterwarnings("ignore")
23
+
24
+ pydiffvg.set_print_timing(False)
25
+ gamma = 1.0
26
+
27
+
28
+ def init_shapes(svg_path, trainable: Mapping[str, bool]):
29
+
30
+ svg = f'{svg_path}.svg'
31
+ canvas_width, canvas_height, shapes_init, shape_groups_init = pydiffvg.svg_to_scene(svg)
32
+
33
+ parameters = edict()
34
+
35
+ # path points
36
+ if trainable.point:
37
+ parameters.point = []
38
+ for path in shapes_init:
39
+ path.points.requires_grad = True
40
+ parameters.point.append(path.points)
41
+
42
+ return shapes_init, shape_groups_init, parameters
43
+
44
+
45
+ if __name__ == "__main__":
46
+
47
+ cfg = set_config()
48
+
49
+ # use GPU if available
50
+ pydiffvg.set_use_gpu(torch.cuda.is_available())
51
+ device = pydiffvg.get_device()
52
+
53
+ # cfg.word = cfg.word[::-1]
54
+
55
+ print("preprocessing")
56
+ preprocess(cfg.font, cfg.word, cfg.optimized_letter, cfg.level_of_cc)
57
+
58
+ if cfg.loss.use_sds_loss:
59
+ sds_loss = SDSLoss(cfg, device)
60
+
61
+ h, w = cfg.render_size, cfg.render_size
62
+
63
+ data_augs = get_data_augs(cfg.cut_size)
64
+
65
+ render = pydiffvg.RenderFunction.apply
66
+
67
+ # initialize shape
68
+ print('initializing shape')
69
+ shapes, shape_groups, parameters = init_shapes(svg_path=cfg.target, trainable=cfg.trainable)
70
+
71
+ scene_args = pydiffvg.RenderFunction.serialize_scene(w, h, shapes, shape_groups)
72
+ img_init = render(w, h, 2, 2, 0, None, *scene_args)
73
+ img_init = img_init[:, :, 3:4] * img_init[:, :, :3] + \
74
+ torch.ones(img_init.shape[0], img_init.shape[1], 3, device=device) * (1 - img_init[:, :, 3:4])
75
+ img_init = img_init[:, :, :3]
76
+ if cfg.use_wandb:
77
+ plt.imshow(img_init.detach().cpu())
78
+ wandb.log({"init": wandb.Image(plt)}, step=0)
79
+ plt.close()
80
+
81
+ if cfg.loss.tone.use_tone_loss:
82
+ tone_loss = ToneLoss(cfg)
83
+ tone_loss.set_image_init(img_init)
84
+
85
+ if cfg.save.init:
86
+ print('saving init')
87
+ filename = os.path.join(
88
+ cfg.experiment_dir, "svg-init", "init.svg")
89
+ check_and_create_dir(filename)
90
+ save_svg.save_svg(filename, w, h, shapes, shape_groups)
91
+
92
+ num_iter = cfg.num_iter
93
+ pg = [{'params': parameters["point"], 'lr': cfg.lr_base["point"]}]
94
+ optim = torch.optim.Adam(pg, betas=(0.9, 0.9), eps=1e-6)
95
+
96
+ if cfg.loss.conformal.use_conformal_loss:
97
+ conformal_loss = ConformalLoss(parameters, device, cfg.optimized_letter, shape_groups)
98
+
99
+ lr_lambda = lambda step: learning_rate_decay(step, cfg.lr.lr_init, cfg.lr.lr_final, num_iter,
100
+ lr_delay_steps=cfg.lr.lr_delay_steps,
101
+ lr_delay_mult=cfg.lr.lr_delay_mult) / cfg.lr.lr_init
102
+
103
+ scheduler = LambdaLR(optim, lr_lambda=lr_lambda, last_epoch=-1) # lr.base * lrlambda_f
104
+
105
+ print("start training")
106
+ # training loop
107
+ t_range = tqdm(range(num_iter))
108
+ for step in t_range:
109
+ if cfg.use_wandb:
110
+ wandb.log({"learning_rate": optim.param_groups[0]['lr']}, step=step)
111
+ optim.zero_grad()
112
+
113
+ # render image
114
+ scene_args = pydiffvg.RenderFunction.serialize_scene(w, h, shapes, shape_groups)
115
+ img = render(w, h, 2, 2, step, None, *scene_args)
116
+
117
+ # compose image with white background
118
+ img = img[:, :, 3:4] * img[:, :, :3] + torch.ones(img.shape[0], img.shape[1], 3, device=device) * (1 - img[:, :, 3:4])
119
+ img = img[:, :, :3]
120
+
121
+ if cfg.save.video and (step % cfg.save.video_frame_freq == 0 or step == num_iter - 1):
122
+ save_image(img, os.path.join(cfg.experiment_dir, "video-png", f"iter{step:04d}.png"), gamma)
123
+ filename = os.path.join(
124
+ cfg.experiment_dir, "video-svg", f"iter{step:04d}.svg")
125
+ check_and_create_dir(filename)
126
+ save_svg.save_svg(
127
+ filename, w, h, shapes, shape_groups)
128
+ if cfg.use_wandb:
129
+ plt.imshow(img.detach().cpu())
130
+ wandb.log({"img": wandb.Image(plt)}, step=step)
131
+ plt.close()
132
+
133
+ x = img.unsqueeze(0).permute(0, 3, 1, 2) # HWC -> NCHW
134
+ x = x.repeat(cfg.batch_size, 1, 1, 1)
135
+ x_aug = data_augs.forward(x)
136
+
137
+ # compute diffusion loss per pixel
138
+ loss = sds_loss(x_aug)
139
+ if cfg.use_wandb:
140
+ wandb.log({"sds_loss": loss.item()}, step=step)
141
+
142
+ if cfg.loss.tone.use_tone_loss:
143
+ tone_loss_res = tone_loss(x, step)
144
+ if cfg.use_wandb:
145
+ wandb.log({"dist_loss": tone_loss_res}, step=step)
146
+ loss = loss + tone_loss_res
147
+
148
+ if cfg.loss.conformal.use_conformal_loss:
149
+ loss_angles = conformal_loss()
150
+ loss_angles = cfg.loss.conformal.angeles_w * loss_angles
151
+ if cfg.use_wandb:
152
+ wandb.log({"loss_angles": loss_angles}, step=step)
153
+ loss = loss + loss_angles
154
+
155
+ t_range.set_postfix({'loss': loss.item()})
156
+ loss.backward()
157
+ optim.step()
158
+ scheduler.step()
159
+
160
+ filename = os.path.join(
161
+ cfg.experiment_dir, "output-svg", "output.svg")
162
+ check_and_create_dir(filename)
163
+ save_svg.save_svg(
164
+ filename, w, h, shapes, shape_groups)
165
+
166
+ combine_word(cfg.word, cfg.optimized_letter, cfg.font, cfg.experiment_dir)
167
+
168
+ if cfg.save.image:
169
+ filename = os.path.join(
170
+ cfg.experiment_dir, "output-png", "output.png")
171
+ check_and_create_dir(filename)
172
+ imshow = img.detach().cpu()
173
+ pydiffvg.imwrite(imshow, filename, gamma=gamma)
174
+ if cfg.use_wandb:
175
+ plt.imshow(img.detach().cpu())
176
+ wandb.log({"img": wandb.Image(plt)}, step=step)
177
+ plt.close()
178
+
179
+ if cfg.save.video:
180
+ print("saving video")
181
+ create_video(cfg.num_iter, cfg.experiment_dir, cfg.save.video_frame_freq)
182
+
183
+ if cfg.use_wandb:
184
+ wandb.finish()
code/save_svg.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import pydiffvg
3
+ import xml.etree.ElementTree as etree
4
+ from xml.dom import minidom
5
+ def prettify(elem):
6
+ """Return a pretty-printed XML string for the Element.
7
+ """
8
+ rough_string = etree.tostring(elem, 'utf-8')
9
+ reparsed = minidom.parseString(rough_string)
10
+ return reparsed.toprettyxml(indent=" ")
11
+ def save_svg(filename, width, height, shapes, shape_groups, use_gamma = False, background=None):
12
+ root = etree.Element('svg')
13
+ root.set('version', '1.1')
14
+ root.set('xmlns', 'http://www.w3.org/2000/svg')
15
+ root.set('width', str(width))
16
+ root.set('height', str(height))
17
+ if background is not None:
18
+ print(f"setting background to {background}")
19
+ root.set('style', str(background))
20
+ defs = etree.SubElement(root, 'defs')
21
+ g = etree.SubElement(root, 'g')
22
+ if use_gamma:
23
+ f = etree.SubElement(defs, 'filter')
24
+ f.set('id', 'gamma')
25
+ f.set('x', '0')
26
+ f.set('y', '0')
27
+ f.set('width', '100%')
28
+ f.set('height', '100%')
29
+ gamma = etree.SubElement(f, 'feComponentTransfer')
30
+ gamma.set('color-interpolation-filters', 'sRGB')
31
+ feFuncR = etree.SubElement(gamma, 'feFuncR')
32
+ feFuncR.set('type', 'gamma')
33
+ feFuncR.set('amplitude', str(1))
34
+ feFuncR.set('exponent', str(1/2.2))
35
+ feFuncG = etree.SubElement(gamma, 'feFuncG')
36
+ feFuncG.set('type', 'gamma')
37
+ feFuncG.set('amplitude', str(1))
38
+ feFuncG.set('exponent', str(1/2.2))
39
+ feFuncB = etree.SubElement(gamma, 'feFuncB')
40
+ feFuncB.set('type', 'gamma')
41
+ feFuncB.set('amplitude', str(1))
42
+ feFuncB.set('exponent', str(1/2.2))
43
+ feFuncA = etree.SubElement(gamma, 'feFuncA')
44
+ feFuncA.set('type', 'gamma')
45
+ feFuncA.set('amplitude', str(1))
46
+ feFuncA.set('exponent', str(1/2.2))
47
+ g.set('style', 'filter:url(#gamma)')
48
+ # Store color
49
+ for i, shape_group in enumerate(shape_groups):
50
+ def add_color(shape_color, name):
51
+ if isinstance(shape_color, pydiffvg.LinearGradient):
52
+ lg = shape_color
53
+ color = etree.SubElement(defs, 'linearGradient')
54
+ color.set('id', name)
55
+ color.set('x1', str(lg.begin[0].item()/width))
56
+ color.set('y1', str(lg.begin[1].item()/height))
57
+ color.set('x2', str(lg.end[0].item()/width))
58
+ color.set('y2', str(lg.end[1].item()/height))
59
+ offsets = lg.offsets.data.cpu().numpy()
60
+ stop_colors = lg.stop_colors.data.cpu().numpy()
61
+ for j in range(offsets.shape[0]):
62
+ stop = etree.SubElement(color, 'stop')
63
+ stop.set('offset', str(offsets[j]))
64
+ c = lg.stop_colors[j, :]
65
+ stop.set('stop-color', 'rgb({}, {}, {})'.format(\
66
+ int(255 * c[0]), int(255 * c[1]), int(255 * c[2])))
67
+ stop.set('stop-opacity', '{}'.format(c[3]))
68
+ if isinstance(shape_color, pydiffvg.RadialGradient):
69
+ lg = shape_color
70
+ color = etree.SubElement(defs, 'radialGradient')
71
+ color.set('id', name)
72
+ color.set('cx', str(lg.center[0].item()/width))
73
+ color.set('cy', str(lg.center[1].item()/height))
74
+ # this only support width=height
75
+ color.set('r', str(lg.radius[0].item()/width))
76
+ offsets = lg.offsets.data.cpu().numpy()
77
+ stop_colors = lg.stop_colors.data.cpu().numpy()
78
+ for j in range(offsets.shape[0]):
79
+ stop = etree.SubElement(color, 'stop')
80
+ stop.set('offset', str(offsets[j]))
81
+ c = lg.stop_colors[j, :]
82
+ stop.set('stop-color', 'rgb({}, {}, {})'.format(\
83
+ int(255 * c[0]), int(255 * c[1]), int(255 * c[2])))
84
+ stop.set('stop-opacity', '{}'.format(c[3]))
85
+ if shape_group.fill_color is not None:
86
+ add_color(shape_group.fill_color, 'shape_{}_fill'.format(i))
87
+ if shape_group.stroke_color is not None:
88
+ add_color(shape_group.stroke_color, 'shape_{}_stroke'.format(i))
89
+ for i, shape_group in enumerate(shape_groups):
90
+ # shape = shapes[shape_group.shape_ids[0]]
91
+ for j,id in enumerate(shape_group.shape_ids):
92
+ shape = shapes[id]
93
+ if isinstance(shape, pydiffvg.Path):
94
+ if j == 0:
95
+ shape_node = etree.SubElement(g, 'path')
96
+ path_str = ''
97
+ # shape_node = etree.SubElement(g, 'path')
98
+ num_segments = shape.num_control_points.shape[0]
99
+ num_control_points = shape.num_control_points.data.cpu().numpy()
100
+ points = shape.points.data.cpu().numpy()
101
+ num_points = shape.points.shape[0]
102
+ path_str += 'M {} {}'.format(points[0, 0], points[0, 1])
103
+ point_id = 1
104
+ for j in range(0, num_segments):
105
+ if num_control_points[j] == 0:
106
+ p = point_id % num_points
107
+ path_str += ' L {} {}'.format(\
108
+ points[p, 0], points[p, 1])
109
+ point_id += 1
110
+ elif num_control_points[j] == 1:
111
+ p1 = (point_id + 1) % num_points
112
+ path_str += ' Q {} {} {} {}'.format(\
113
+ points[point_id, 0], points[point_id, 1],
114
+ points[p1, 0], points[p1, 1])
115
+ point_id += 2
116
+ elif num_control_points[j] == 2:
117
+ p2 = (point_id + 2) % num_points
118
+ path_str += ' C {} {} {} {} {} {}'.format(\
119
+ points[point_id, 0], points[point_id, 1],
120
+ points[point_id + 1, 0], points[point_id + 1, 1],
121
+ points[p2, 0], points[p2, 1])
122
+ point_id += 3
123
+ else:
124
+ assert(False)
125
+ # shape_node.set('stroke-width', str(2 * shape.stroke_width.data.cpu().item()))
126
+ shape_node.set('stroke-width', str(0)) # no strokes
127
+ if shape_group.fill_color is not None:
128
+ if isinstance(shape_group.fill_color, pydiffvg.LinearGradient):
129
+ shape_node.set('fill', 'url(#shape_{}_fill)'.format(i))
130
+ elif isinstance(shape_group.fill_color, pydiffvg.RadialGradient):
131
+ shape_node.set('fill', 'url(#shape_{}_fill)'.format(i))
132
+ else:
133
+ c = shape_group.fill_color.data.cpu().numpy()
134
+ shape_node.set('fill', 'rgb({}, {}, {})'.format(\
135
+ int(255 * c[0]), int(255 * c[1]), int(255 * c[2])))
136
+ shape_node.set('opacity', str(c[3]))
137
+ else:
138
+ shape_node.set('fill', 'none')
139
+ if shape_group.stroke_color is not None:
140
+ if isinstance(shape_group.stroke_color, pydiffvg.LinearGradient):
141
+ shape_node.set('stroke', 'url(#shape_{}_stroke)'.format(i))
142
+ elif isinstance(shape_group.stroke_color, pydiffvg.LinearGradient):
143
+ shape_node.set('stroke', 'url(#shape_{}_stroke)'.format(i))
144
+ else:
145
+ c = shape_group.stroke_color.data.cpu().numpy()
146
+ shape_node.set('stroke', 'rgb({}, {}, {})'.format(\
147
+ int(255 * c[0]), int(255 * c[1]), int(255 * c[2])))
148
+ shape_node.set('stroke-opacity', str(c[3]))
149
+ shape_node.set('stroke-linecap', 'round')
150
+ shape_node.set('stroke-linejoin', 'round')
151
+
152
+ shape_node.set('d', path_str)
153
+
154
+ with open(filename, "w") as f:
155
+ f.write(prettify(root))
code/ttf.py ADDED
@@ -0,0 +1,409 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from importlib import reload
2
+ import os
3
+ import numpy as np
4
+ import bezier
5
+ import freetype as ft
6
+ import pydiffvg
7
+ import torch
8
+ import save_svg
9
+ import vharfbuzz as hb
10
+ from svgpathtools import svgstr2paths
11
+ import xml.etree.ElementTree as ET
12
+
13
+
14
+ device = torch.device("cuda" if (
15
+ torch.cuda.is_available() and torch.cuda.device_count() > 0) else "cpu")
16
+
17
+ reload(bezier)
18
+
19
+ def fix_single_svg(svg_path, all_word=False):
20
+ target_h_letter = 360
21
+ target_canvas_width, target_canvas_height = 600, 600
22
+
23
+ canvas_width, canvas_height, shapes, shape_groups = pydiffvg.svg_to_scene(svg_path)
24
+
25
+ letter_h = canvas_height
26
+ letter_w = canvas_width
27
+
28
+ if all_word:
29
+ if letter_w > letter_h:
30
+ scale_canvas_w = target_h_letter / letter_w
31
+ hsize = int(letter_h * scale_canvas_w)
32
+ scale_canvas_h = hsize / letter_h
33
+ else:
34
+ scale_canvas_h = target_h_letter / letter_h
35
+ wsize = int(letter_w * scale_canvas_h)
36
+ scale_canvas_w = wsize / letter_w
37
+ else:
38
+ scale_canvas_h = target_h_letter / letter_h
39
+ wsize = int(letter_w * scale_canvas_h)
40
+ scale_canvas_w = wsize / letter_w
41
+
42
+ for num, p in enumerate(shapes):
43
+ p.points[:, 0] = p.points[:, 0] * scale_canvas_w
44
+ p.points[:, 1] = p.points[:, 1] * scale_canvas_h + target_h_letter
45
+ p.points[:, 1] = -p.points[:, 1]
46
+ # p.points[:, 0] = -p.points[:, 0]
47
+
48
+ w_min, w_max = min([torch.min(p.points[:, 0]) for p in shapes]), max([torch.max(p.points[:, 0]) for p in shapes])
49
+ h_min, h_max = min([torch.min(p.points[:, 1]) for p in shapes]), max([torch.max(p.points[:, 1]) for p in shapes])
50
+
51
+ for num, p in enumerate(shapes):
52
+ p.points[:, 0] = p.points[:, 0] + target_canvas_width/2 - int(w_min + (w_max - w_min) / 2)
53
+ p.points[:, 1] = p.points[:, 1] + target_canvas_height/2 - int(h_min + (h_max - h_min) / 2)
54
+
55
+ output_path = f"{svg_path[:-4]}_scaled.svg"
56
+ save_svg.save_svg(output_path, target_canvas_width, target_canvas_height, shapes, shape_groups)
57
+
58
+ def normalize_letter_size(dest_path, font, txt, chars):
59
+ fontname = os.path.splitext(os.path.basename(font))[0]
60
+ # for i, c in enumerate(chars):
61
+ # fname = f"{dest_path}/{fontname}_{c}.svg"
62
+ # fname = fname.replace(" ", "_")
63
+ # fix_single_svg(fname)
64
+
65
+ fname = f"{dest_path}/{fontname}_{txt}.svg"
66
+ fname = fname.replace(" ", "_")
67
+ fix_single_svg(fname, all_word=True)
68
+
69
+
70
+ def glyph_to_cubics(face, x=0, y=0):
71
+ ''' Convert current font face glyph to cubic beziers'''
72
+
73
+ def linear_to_cubic(Q):
74
+ a, b = Q
75
+ return [a + (b - a) * t for t in np.linspace(0, 1, 4)]
76
+
77
+ def quadratic_to_cubic(Q):
78
+ return [Q[0],
79
+ Q[0] + (2 / 3) * (Q[1] - Q[0]),
80
+ Q[2] + (2 / 3) * (Q[1] - Q[2]),
81
+ Q[2]]
82
+
83
+ beziers = []
84
+ pt = lambda p: np.array([x + p.x, - p.y - y]) # Flipping here since freetype has y-up
85
+ last = lambda: beziers[-1][-1]
86
+
87
+ def move_to(a, beziers):
88
+ beziers.append([pt(a)])
89
+
90
+ def line_to(a, beziers):
91
+ Q = linear_to_cubic([last(), pt(a)])
92
+ beziers[-1] += Q[1:]
93
+
94
+ def conic_to(a, b, beziers):
95
+ Q = quadratic_to_cubic([last(), pt(a), pt(b)])
96
+ beziers[-1] += Q[1:]
97
+
98
+ def cubic_to(a, b, c, beziers):
99
+ beziers[-1] += [pt(a), pt(b), pt(c)]
100
+
101
+ face.glyph.outline.decompose(beziers, move_to=move_to, line_to=line_to, conic_to=conic_to, cubic_to=cubic_to)
102
+ beziers = [np.array(C).astype(float) for C in beziers]
103
+ return beziers
104
+
105
+ # def handle_ligature(glyph_infos, glyph_positions):
106
+ # combined_advance = sum(pos.x_advance for pos in glyph_positions)
107
+ # first_x_offset = glyph_positions[0].x_offset
108
+
109
+ # combined_advance = x_adv_1 + x_adv_2
110
+
111
+
112
+
113
+
114
+ # # Adjust the x_offset values based on the difference between the first glyph's x_offset and the combined_advance
115
+ # for pos in glyph_positions:
116
+ # pos.x_offset += combined_advance - pos.x_advance - first_x_offset
117
+
118
+ # # Render the ligature using the adjusted glyph positions
119
+ # render_glyphs(glyph_infos, glyph_positions)
120
+
121
+
122
+ def font_string_to_beziers(font, txt, size=30, spacing=1.0, merge=True, target_control=None):
123
+ ''' Load a font and convert the outlines for a given string to cubic bezier curves,
124
+ if merge is True, simply return a list of all bezier curves,
125
+ otherwise return a list of lists with the bezier curves for each glyph'''
126
+ print(font)
127
+
128
+ vhb = hb.Vharfbuzz(font)
129
+ buf = vhb.shape(txt, {"features": {"kern": True, "liga": True}})
130
+
131
+ buf.guess_segment_properties()
132
+
133
+ glyph_infos = buf.glyph_infos
134
+ glyph_positions = buf.glyph_positions
135
+ glyph_count = {glyph_infos[i].cluster: 0 for i in range(len(glyph_infos))}
136
+
137
+ svg = vhb.buf_to_svg(buf)
138
+ paths, attributes = svgstr2paths(svg)
139
+
140
+ face = ft.Face(font)
141
+ face.set_char_size(64 * size)
142
+ pindex = -1
143
+
144
+ x, y = 0, 0
145
+ beziers, chars = [], []
146
+
147
+ for path_idx, path in enumerate(paths):
148
+ segment_vals = []
149
+ print("="*20 + str(path_idx) + "="*20)
150
+ for segment in path:
151
+ segment_type = segment.__class__.__name__
152
+ t_values = np.linspace(0, 1, 10)
153
+ points = [segment.point(t) for t in t_values]
154
+ for pt in points:
155
+ segment_vals += [[pt.real, -pt.imag]]
156
+
157
+ # points = [bezier.point(t) for t in t_values]
158
+
159
+ if segment_type == 'Line':
160
+ # Line segment
161
+ start = segment.start
162
+ end = segment.end
163
+ print(f"Line: ({start.real}, {start.imag}) to ({end.real}, {end.imag})")
164
+
165
+ elif segment_type == 'QuadraticBezier':
166
+ # Quadratic Bézier segment
167
+ start = segment.start
168
+ control = segment.control
169
+ end = segment.end
170
+ print(f"Quadratic Bézier: ({start.real}, {start.imag}) to ({end.real}, {end.imag}) with control point ({control.real}, {control.imag})")
171
+
172
+ elif segment_type == 'CubicBezier':
173
+ # Cubic Bézier segment
174
+ start = segment.start
175
+ control1 = segment.control1
176
+ control2 = segment.control2
177
+ end = segment.end
178
+ print(f"Cubic Bézier: ({start.real}, {start.imag}) to ({end.real}, {end.imag}) with control points ({control1.real}, {control1.imag}) and ({control2.real}, {control2.imag})")
179
+
180
+ else:
181
+ # Other segment types (Arc, Close)
182
+ print(f"Segment type: {segment_type}")
183
+
184
+ beziers += [[np.array(segment_vals)]]
185
+
186
+ beziers_2 = []
187
+ glyph_infos = glyph_infos[::-1]
188
+ glyph_positions = glyph_positions[::-1]
189
+ for i, (info, pos) in enumerate(zip(glyph_infos, glyph_positions)):
190
+ index = info.cluster
191
+ c = f"{txt[index]}_{glyph_count[index]}"
192
+ chars += [c]
193
+ glyph_count[index] += 1
194
+ glyph_index = info.codepoint
195
+ face.load_glyph(glyph_index, flags=ft.FT_LOAD_DEFAULT | ft.FT_LOAD_NO_BITMAP)
196
+ # face.load_char(c, ft.FT_LOAD_DEFAULT | ft.FT_LOAD_NO_BITMAP)
197
+
198
+ findex = -1
199
+ if i+1 < len(glyph_infos):
200
+ findex = glyph_infos[i+1].cluster
201
+ foffset = (glyph_positions[i+1].x_offset, glyph_positions[i+1].y_offset)
202
+ fadvance = (glyph_positions[i+1].x_advance, glyph_positions[i+1].y_advance)
203
+
204
+ # bez = glyph_to_cubics(face, x+pos.x_offset+pos.x_advance, y+pos.y_offset+pos.y_advance)
205
+ # if findex != index:
206
+ # x += pos.x_offset
207
+ # y += pos.y_offset
208
+ # else:
209
+ # x += pos.x_offset
210
+ # y += pos.y_offset
211
+
212
+
213
+ bez = glyph_to_cubics(face, x, y)
214
+
215
+
216
+ # Check number of control points if desired
217
+ if target_control is not None:
218
+ if c in target_control.keys():
219
+ nctrl = np.sum([len(C) for C in bez])
220
+ while nctrl < target_control[c]:
221
+ longest = np.max(
222
+ sum([[bezier.approx_arc_length(b) for b in bezier.chain_to_beziers(C)] for C in bez], []))
223
+ thresh = longest * 0.5
224
+ bez = [bezier.subdivide_bezier_chain(C, thresh) for C in bez]
225
+ nctrl = np.sum([len(C) for C in bez])
226
+ print(nctrl)
227
+
228
+ if merge:
229
+ beziers_2 += bez
230
+ else:
231
+ beziers_2.append(bez)
232
+
233
+ # kerning = face.get_kerning(index, findex)
234
+ # x += (slot.advance.x + kerning.x) * spacing
235
+ # previous = txt[index]
236
+
237
+ # print(f"C: {txt[index]}/{index} | X: {x+pos.x_offset}| Y: {y+pos.y_offset}")
238
+ print(f"C: {txt[index]}/{index} | X: {x}: {pos.x_advance}/{pos.x_offset} | Y: {y}: {pos.y_advance}/{pos.y_offset}")
239
+
240
+ # if findex != index:
241
+ x -= pos.x_advance
242
+ # y += pos.y_advance + pos.y_offset
243
+
244
+ pindex = index
245
+
246
+ return beziers_2, chars
247
+
248
+
249
+ def bezier_chain_to_commands(C, closed=True):
250
+ curves = bezier.chain_to_beziers(C)
251
+ cmds = 'M %f %f ' % (C[0][0], C[0][1])
252
+ n = len(curves)
253
+ for i, bez in enumerate(curves):
254
+ if i == n - 1 and closed:
255
+ cmds += 'C %f %f %f %f %f %fz ' % (*bez[1], *bez[2], *bez[3])
256
+ else:
257
+ cmds += 'C %f %f %f %f %f %f ' % (*bez[1], *bez[2], *bez[3])
258
+ return cmds
259
+
260
+
261
+ def count_cp(file_name, font_name):
262
+ canvas_width, canvas_height, shapes, shape_groups = pydiffvg.svg_to_scene(file_name)
263
+ p_counter = 0
264
+ for path in shapes:
265
+ p_counter += path.points.shape[0]
266
+ print(f"TOTAL CP: [{p_counter}]")
267
+ return p_counter
268
+
269
+
270
+ def write_letter_svg(c, header, fontname, beziers, subdivision_thresh, dest_path):
271
+ cmds = ''
272
+ svg = header
273
+
274
+ path = '<g><path d="'
275
+ for C in beziers:
276
+ if subdivision_thresh is not None:
277
+ print('subd')
278
+ C = bezier.subdivide_bezier_chain(C, subdivision_thresh)
279
+ cmds += bezier_chain_to_commands(C, True)
280
+ path += cmds + '"/>\n'
281
+ svg += path + '</g></svg>\n'
282
+
283
+ fname = f"{dest_path}/{fontname}_{c}.svg"
284
+ fname = fname.replace(" ", "_")
285
+ f = open(fname, 'w')
286
+ f.write(svg)
287
+ f.close()
288
+ return fname, path
289
+
290
+ def write_letter_svg_hb(vhb, c, dest_path, fontname):
291
+ buf = vhb.shape(c, {"features": {"kern": True, "liga": True}})
292
+ svg = vhb.buf_to_svg(buf)
293
+
294
+ fname = f"{dest_path}/{fontname}_{c}.svg"
295
+ fname = fname.replace(" ", "_")
296
+ f = open(fname, 'w')
297
+ f.write(svg)
298
+ f.close()
299
+ return fname
300
+
301
+ def font_string_to_svgs(dest_path, font, txt, size=30, spacing=1.0, target_control=None, subdivision_thresh=None):
302
+
303
+ fontname = os.path.splitext(os.path.basename(font))[0]
304
+ glyph_beziers, chars = font_string_to_beziers(font, txt, size, spacing, merge=False, target_control=target_control)
305
+ if not os.path.isdir(dest_path):
306
+ os.mkdir(dest_path)
307
+ # Compute boundig box
308
+ points = np.vstack(sum(glyph_beziers, []))
309
+ lt = np.min(points, axis=0)
310
+ rb = np.max(points, axis=0)
311
+ size = rb - lt
312
+
313
+ sizestr = 'width="%.1f" height="%.1f"' % (size[0], size[1])
314
+ boxstr = ' viewBox="%.1f %.1f %.1f %.1f"' % (lt[0], lt[1], size[0], size[1])
315
+ header = '''<?xml version="1.0" encoding="utf-8"?>
316
+ <svg xmlns="http://www.w3.org/2000/svg" xmlns:ev="http://www.w3.org/2001/xml-events" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" baseProfile="full" '''
317
+ header += sizestr
318
+ header += boxstr
319
+ header += '>\n<defs/>\n'
320
+
321
+ svg_all = header
322
+
323
+ print(f"Len Glyph Bezier: {len(glyph_beziers)} | Chars: {len(chars)}")
324
+ for i, (c, beziers) in enumerate(zip(chars, glyph_beziers)):
325
+ print(f"==== {c} ====")
326
+ fname, path = write_letter_svg(c, header, fontname, beziers, subdivision_thresh, dest_path)
327
+
328
+ num_cp = count_cp(fname, fontname)
329
+ print(num_cp)
330
+ print(font, c)
331
+ # Add to global svg
332
+ svg_all += path + '</g>\n'
333
+
334
+ vhb = hb.Vharfbuzz(font)
335
+ buf = vhb.shape(txt, {"features": {"kern": True, "liga": True}})
336
+ svg = vhb.buf_to_svg(buf)
337
+
338
+ # Save global svg
339
+ svg_all += '</svg>\n'
340
+ fname = f"{dest_path}/{fontname}_{txt}.svg"
341
+ fname = fname.replace(" ", "_")
342
+ f = open(fname, 'w')
343
+ f.write(svg)
344
+ f.close()
345
+ return chars
346
+
347
+ def font_string_to_svgs_hb(dest_path, font, txt, size=30, spacing=1.0, target_control=None, subdivision_thresh=None):
348
+
349
+ fontname = os.path.splitext(os.path.basename(font))[0]
350
+ if not os.path.isdir(dest_path):
351
+ os.mkdir(dest_path)
352
+
353
+ vhb = hb.Vharfbuzz(font)
354
+ buf = vhb.shape(txt, {"features": {"kern": True, "liga": True}})
355
+ buf.guess_segment_properties()
356
+
357
+ buf = vhb.shape(txt, {"features": {"kern": True, "liga": True}})
358
+ svg = vhb.buf_to_svg(buf)
359
+
360
+ # Save global svg
361
+ fname = f"{dest_path}/{fontname}_{txt}.svg"
362
+ fname = fname.replace(" ", "_")
363
+ f = open(fname, 'w')
364
+ f.write(svg)
365
+ f.close()
366
+ return None
367
+
368
+ if __name__ == '__main__':
369
+
370
+ fonts = ["KaushanScript-Regular"]
371
+ level_of_cc = 1
372
+
373
+ if level_of_cc == 0:
374
+ target_cp = None
375
+
376
+ else:
377
+ target_cp = {"A": 120, "B": 120, "C": 100, "D": 100,
378
+ "E": 120, "F": 120, "G": 120, "H": 120,
379
+ "I": 35, "J": 80, "K": 100, "L": 80,
380
+ "M": 100, "N": 100, "O": 100, "P": 120,
381
+ "Q": 120, "R": 130, "S": 110, "T": 90,
382
+ "U": 100, "V": 100, "W": 100, "X": 130,
383
+ "Y": 120, "Z": 120,
384
+ "a": 120, "b": 120, "c": 100, "d": 100,
385
+ "e": 120, "f": 120, "g": 120, "h": 120,
386
+ "i": 35, "j": 80, "k": 100, "l": 80,
387
+ "m": 100, "n": 100, "o": 100, "p": 120,
388
+ "q": 120, "r": 130, "s": 110, "t": 90,
389
+ "u": 100, "v": 100, "w": 100, "x": 130,
390
+ "y": 120, "z": 120
391
+ }
392
+
393
+ target_cp = {k: v * level_of_cc for k, v in target_cp.items()}
394
+
395
+ for f in fonts:
396
+ print(f"======= {f} =======")
397
+ font_path = f"data/fonts/{f}.ttf"
398
+ output_path = f"data/init"
399
+ txt = "BUNNY"
400
+ subdivision_thresh = None
401
+ font_string_to_svgs(output_path, font_path, txt, target_control=target_cp,
402
+ subdivision_thresh=subdivision_thresh)
403
+ normalize_letter_size(output_path, font_path, txt)
404
+
405
+ print("DONE")
406
+
407
+
408
+
409
+
code/utils.py ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import collections.abc
2
+ import os
3
+ import os.path as osp
4
+ from torch import nn
5
+ import kornia.augmentation as K
6
+ import pydiffvg
7
+ import save_svg
8
+ import cv2
9
+ from ttf import font_string_to_svgs, font_string_to_svgs_hb, normalize_letter_size
10
+ import torch
11
+ import numpy as np
12
+
13
+
14
+ def edict_2_dict(x):
15
+ if isinstance(x, dict):
16
+ xnew = {}
17
+ for k in x:
18
+ xnew[k] = edict_2_dict(x[k])
19
+ return xnew
20
+ elif isinstance(x, list):
21
+ xnew = []
22
+ for i in range(len(x)):
23
+ xnew.append( edict_2_dict(x[i]))
24
+ return xnew
25
+ else:
26
+ return x
27
+
28
+
29
+ def check_and_create_dir(path):
30
+ pathdir = osp.split(path)[0]
31
+ if osp.isdir(pathdir):
32
+ pass
33
+ else:
34
+ os.makedirs(pathdir)
35
+
36
+
37
+ def update(d, u):
38
+ """https://stackoverflow.com/questions/3232943/update-value-of-a-nested-dictionary-of-varying-depth"""
39
+ for k, v in u.items():
40
+ if isinstance(v, collections.abc.Mapping):
41
+ d[k] = update(d.get(k, {}), v)
42
+ else:
43
+ d[k] = v
44
+ return d
45
+
46
+
47
+ def preprocess(font, word, letter, level_of_cc=1):
48
+
49
+ if level_of_cc == 0:
50
+ target_cp = None
51
+ else:
52
+ target_cp = {"A": 120, "B": 120, "C": 100, "D": 100,
53
+ "E": 120, "F": 120, "G": 120, "H": 120,
54
+ "I": 35, "J": 80, "K": 100, "L": 80,
55
+ "M": 100, "N": 100, "O": 100, "P": 120,
56
+ "Q": 120, "R": 130, "S": 110, "T": 90,
57
+ "U": 100, "V": 100, "W": 100, "X": 130,
58
+ "Y": 120, "Z": 120,
59
+ "a": 120, "b": 120, "c": 100, "d": 100,
60
+ "e": 120, "f": 120, "g": 120, "h": 120,
61
+ "i": 35, "j": 80, "k": 100, "l": 80,
62
+ "m": 100, "n": 100, "o": 100, "p": 120,
63
+ "q": 120, "r": 130, "s": 110, "t": 90,
64
+ "u": 100, "v": 100, "w": 100, "x": 130,
65
+ "y": 120, "z": 120
66
+ }
67
+ target_cp = {k: v * level_of_cc for k, v in target_cp.items()}
68
+
69
+ print(f"======= {font} =======")
70
+ if font[0] in ['0', '1', '2']:
71
+ font_path = f"code/data/arabic-fonts/{font}.ttf"
72
+ else:
73
+ font_path = f"code/data/fonts/{font}.ttf"
74
+
75
+ init_path = f"code/data/init"
76
+ subdivision_thresh = None
77
+ chars = font_string_to_svgs_hb(init_path, font_path, word, target_control=target_cp,
78
+ subdivision_thresh=subdivision_thresh)
79
+ normalize_letter_size(init_path, font_path, word, chars)
80
+
81
+ # optimaize two adjacent letters
82
+ if len(letter) > 1:
83
+ subdivision_thresh = None
84
+ font_string_to_svgs_hb(init_path, font_path, letter, target_control=target_cp,
85
+ subdivision_thresh=subdivision_thresh)
86
+ normalize_letter_size(init_path, font_path, letter, chars)
87
+
88
+ print("Done preprocess")
89
+
90
+ def get_data_augs(cut_size):
91
+ augmentations = []
92
+ augmentations.append(K.RandomPerspective(distortion_scale=0.5, p=0.7))
93
+ augmentations.append(K.RandomCrop(size=(cut_size, cut_size), pad_if_needed=True, padding_mode='reflect', p=1.0))
94
+ return nn.Sequential(*augmentations)
95
+
96
+
97
+ '''pytorch adaptation of https://github.com/google/mipnerf'''
98
+ def learning_rate_decay(step,
99
+ lr_init,
100
+ lr_final,
101
+ max_steps,
102
+ lr_delay_steps=0,
103
+ lr_delay_mult=1):
104
+ """Continuous learning rate decay function.
105
+ The returned rate is lr_init when step=0 and lr_final when step=max_steps, and
106
+ is log-linearly interpolated elsewhere (equivalent to exponential decay).
107
+ If lr_delay_steps>0 then the learning rate will be scaled by some smooth
108
+ function of lr_delay_mult, such that the initial learning rate is
109
+ lr_init*lr_delay_mult at the beginning of optimization but will be eased back
110
+ to the normal learning rate when steps>lr_delay_steps.
111
+ Args:
112
+ step: int, the current optimization step.
113
+ lr_init: float, the initial learning rate.
114
+ lr_final: float, the final learning rate.
115
+ max_steps: int, the number of steps during optimization.
116
+ lr_delay_steps: int, the number of steps to delay the full learning rate.
117
+ lr_delay_mult: float, the multiplier on the rate when delaying it.
118
+ Returns:
119
+ lr: the learning for current step 'step'.
120
+ """
121
+ if lr_delay_steps > 0:
122
+ # A kind of reverse cosine decay.
123
+ delay_rate = lr_delay_mult + (1 - lr_delay_mult) * np.sin(
124
+ 0.5 * np.pi * np.clip(step / lr_delay_steps, 0, 1))
125
+ else:
126
+ delay_rate = 1.
127
+ t = np.clip(step / max_steps, 0, 1)
128
+ log_lerp = np.exp(np.log(lr_init) * (1 - t) + np.log(lr_final) * t)
129
+ return delay_rate * log_lerp
130
+
131
+
132
+
133
+ def save_image(img, filename, gamma=1):
134
+ check_and_create_dir(filename)
135
+ imshow = img.detach().cpu()
136
+ pydiffvg.imwrite(imshow, filename, gamma=gamma)
137
+
138
+
139
+ def get_letter_ids(letter, word, shape_groups):
140
+ for group, l in zip(shape_groups, word):
141
+ if l == letter:
142
+ return group.shape_ids
143
+
144
+
145
+ def combine_word(word, letter, font, experiment_dir):
146
+ word_svg_scaled = f"./code/data/init/{font}_{word}_scaled.svg"
147
+ canvas_width_word, canvas_height_word, shapes_word, shape_groups_word = pydiffvg.svg_to_scene(word_svg_scaled)
148
+
149
+ letter_ids = []
150
+ for l in letter:
151
+ letter_ids += get_letter_ids(l, word, shape_groups_word)
152
+
153
+ w_min, w_max = min([torch.min(shapes_word[ids].points[:, 0]) for ids in letter_ids]), max(
154
+ [torch.max(shapes_word[ids].points[:, 0]) for ids in letter_ids])
155
+ h_min, h_max = min([torch.min(shapes_word[ids].points[:, 1]) for ids in letter_ids]), max(
156
+ [torch.max(shapes_word[ids].points[:, 1]) for ids in letter_ids])
157
+
158
+ c_w = (-w_min + w_max) / 2
159
+ c_h = (-h_min + h_max) / 2
160
+
161
+ svg_result = os.path.join(experiment_dir, "output-svg", "output.svg")
162
+ canvas_width, canvas_height, shapes, shape_groups = pydiffvg.svg_to_scene(svg_result)
163
+
164
+ out_w_min, out_w_max = min([torch.min(p.points[:, 0]) for p in shapes]), max(
165
+ [torch.max(p.points[:, 0]) for p in shapes])
166
+ out_h_min, out_h_max = min([torch.min(p.points[:, 1]) for p in shapes]), max(
167
+ [torch.max(p.points[:, 1]) for p in shapes])
168
+
169
+ out_c_w = (-out_w_min + out_w_max) / 2
170
+ out_c_h = (-out_h_min + out_h_max) / 2
171
+
172
+ scale_canvas_w = (w_max - w_min) / (out_w_max - out_w_min)
173
+ scale_canvas_h = (h_max - h_min) / (out_h_max - out_h_min)
174
+
175
+ if scale_canvas_h > scale_canvas_w:
176
+ wsize = int((out_w_max - out_w_min) * scale_canvas_h)
177
+ scale_canvas_w = wsize / (out_w_max - out_w_min)
178
+ shift_w = -out_c_w * scale_canvas_w + c_w
179
+ else:
180
+ hsize = int((out_h_max - out_h_min) * scale_canvas_w)
181
+ scale_canvas_h = hsize / (out_h_max - out_h_min)
182
+ shift_h = -out_c_h * scale_canvas_h + c_h
183
+
184
+ for num, p in enumerate(shapes):
185
+ p.points[:, 0] = p.points[:, 0] * scale_canvas_w
186
+ p.points[:, 1] = p.points[:, 1] * scale_canvas_h
187
+ if scale_canvas_h > scale_canvas_w:
188
+ p.points[:, 0] = p.points[:, 0] - out_w_min * scale_canvas_w + w_min + shift_w
189
+ p.points[:, 1] = p.points[:, 1] - out_h_min * scale_canvas_h + h_min
190
+ else:
191
+ p.points[:, 0] = p.points[:, 0] - out_w_min * scale_canvas_w + w_min
192
+ p.points[:, 1] = p.points[:, 1] - out_h_min * scale_canvas_h + h_min + shift_h
193
+
194
+ for j, s in enumerate(letter_ids):
195
+ shapes_word[s] = shapes[j]
196
+
197
+ save_svg.save_svg(
198
+ f"{experiment_dir}/{font}_{word}_{letter}.svg", canvas_width, canvas_height, shapes_word,
199
+ shape_groups_word)
200
+
201
+ render = pydiffvg.RenderFunction.apply
202
+ scene_args = pydiffvg.RenderFunction.serialize_scene(canvas_width, canvas_height, shapes_word, shape_groups_word)
203
+ img = render(canvas_width, canvas_height, 2, 2, 0, None, *scene_args)
204
+ img = img[:, :, 3:4] * img[:, :, :3] + \
205
+ torch.ones(img.shape[0], img.shape[1], 3, device="cuda:0") * (1 - img[:, :, 3:4])
206
+ img = img[:, :, :3]
207
+ save_image(img, f"{experiment_dir}/{font}_{word}_{letter}.png")
208
+
209
+
210
+ def create_video(num_iter, experiment_dir, video_frame_freq):
211
+ img_array = []
212
+ for ii in range(0, num_iter):
213
+ if ii % video_frame_freq == 0 or ii == num_iter - 1:
214
+ filename = os.path.join(
215
+ experiment_dir, "video-png", f"iter{ii:04d}.png")
216
+ img = cv2.imread(filename)
217
+ img_array.append(img)
218
+
219
+ video_name = os.path.join(
220
+ experiment_dir, "video.mp4")
221
+ check_and_create_dir(video_name)
222
+ out = cv2.VideoWriter(video_name, cv2.VideoWriter_fourcc(*'mp4v'), 30.0, (600, 600))
223
+ for iii in range(len(img_array)):
224
+ out.write(img_array[iii])
225
+ out.release()
diffvg ADDED
@@ -0,0 +1 @@
 
 
1
+ Subproject commit adb04d04ee63e82d6569a2fa178ba0dd49115561
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ python3-dev
requirements.txt ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ --extra-index-url https://download.pytorch.org/whl/cu113
2
+ torch==1.12.1+cu113
3
+ torchvision==0.13.1+cu113
4
+
5
+ cmake
6
+ numpy
7
+ scikit-image
8
+ ffmpeg
9
+ svgwrite
10
+ svgpathtools
11
+ cssutils
12
+ numba
13
+ torch-tools
14
+ scikit-fmm
15
+ easydict
16
+ visdom
17
+ opencv-python==4.5.4.60
18
+
19
+ diffusers==0.8
20
+ transformers
21
+ scipy
22
+ ftfy
23
+ accelerate
24
+
25
+ vharfbuzz
26
+ freetype-py
27
+ shapely
28
+ kornia==0.6.8