Spaces:
Runtime error
Runtime error
Badr AlKhamissi
commited on
Commit
·
913d3e3
1
Parent(s):
32b316c
starting space
Browse files- .gitignore +5 -0
- .gitmodules +3 -0
- README.md +3 -3
- app.py +368 -0
- code/bezier.py +122 -0
- code/collage.py +41 -0
- code/config.py +108 -0
- code/config/base.yaml +59 -0
- code/data/arabic-fonts/dl-fonts.sh +20 -0
- code/data/arabic-fonts/font_names.txt +20 -0
- code/data/fonts/ArefRuqaa.ttf +0 -0
- code/data/fonts/Bell MT.ttf +0 -0
- code/data/fonts/DeliusUnicase-Regular.ttf +0 -0
- code/data/fonts/HobeauxRococeaux-Sherman.ttf +0 -0
- code/data/fonts/IndieFlower-Regular.ttf +0 -0
- code/data/fonts/JosefinSans-Light.ttf +0 -0
- code/data/fonts/KaushanScript-Regular.ttf +0 -0
- code/data/fonts/LuckiestGuy-Regular.ttf +0 -0
- code/data/fonts/Noteworthy-Bold.ttf +0 -0
- code/data/fonts/Quicksand.ttf +0 -0
- code/data/fonts/Saira-Regular.ttf +0 -0
- code/harfbuzz_test.py +33 -0
- code/losses.py +198 -0
- code/main.py +184 -0
- code/save_svg.py +155 -0
- code/ttf.py +409 -0
- code/utils.py +225 -0
- diffvg +1 -0
- packages.txt +1 -0
- requirements.txt +28 -0
.gitignore
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.pyc
|
2 |
+
.DS_Store
|
3 |
+
output
|
4 |
+
code/data/init
|
5 |
+
code/data/arabic-fonts/*.ttf
|
.gitmodules
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
[submodule "diffvg"]
|
2 |
+
path = diffvg
|
3 |
+
url = https://github.com/BachiLi/diffvg.git
|
README.md
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
---
|
2 |
title: Word To Image
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
sdk_version: 3.29.0
|
8 |
app_file: app.py
|
|
|
1 |
---
|
2 |
title: Word To Image
|
3 |
+
emoji: ✒️ ➡️ 🎨
|
4 |
+
colorFrom: blue
|
5 |
+
colorTo: pink
|
6 |
sdk: gradio
|
7 |
sdk_version: 3.29.0
|
8 |
app_file: app.py
|
app.py
ADDED
@@ -0,0 +1,368 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import os
|
3 |
+
import argparse
|
4 |
+
from easydict import EasyDict as edict
|
5 |
+
import yaml
|
6 |
+
import os.path as osp
|
7 |
+
import random
|
8 |
+
import numpy.random as npr
|
9 |
+
import sys
|
10 |
+
|
11 |
+
# sys.path.append('./code')
|
12 |
+
|
13 |
+
sys.path.append('/home/user/app/code')
|
14 |
+
|
15 |
+
# set up diffvg
|
16 |
+
|
17 |
+
# os.system('git clone https://github.com/BachiLi/diffvg.git')
|
18 |
+
os.system('git submodule update --init')
|
19 |
+
os.chdir('diffvg')
|
20 |
+
print(os.getcwd())
|
21 |
+
os.system('git submodule update --init --recursive')
|
22 |
+
print(os.getcwd())
|
23 |
+
os.system('python setup.py install --user')
|
24 |
+
sys.path.append("/home/user/.local/lib/python3.8/site-packages/diffvg-0.0.1-py3.8-linux-x86_64.egg")
|
25 |
+
|
26 |
+
os.chdir('/home/user/app')
|
27 |
+
|
28 |
+
import torch
|
29 |
+
from diffusers import StableDiffusionPipeline
|
30 |
+
|
31 |
+
|
32 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
33 |
+
|
34 |
+
model = None
|
35 |
+
model = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5",
|
36 |
+
torch_dtype=torch.float16).to(device)
|
37 |
+
|
38 |
+
from typing import Mapping
|
39 |
+
from tqdm import tqdm
|
40 |
+
import torch
|
41 |
+
from torch.optim.lr_scheduler import LambdaLR
|
42 |
+
import pydiffvg
|
43 |
+
import save_svg
|
44 |
+
from losses import SDSLoss, ToneLoss, ConformalLoss
|
45 |
+
from utils import (
|
46 |
+
edict_2_dict,
|
47 |
+
update,
|
48 |
+
check_and_create_dir,
|
49 |
+
get_data_augs,
|
50 |
+
save_image,
|
51 |
+
preprocess,
|
52 |
+
learning_rate_decay,
|
53 |
+
combine_word)
|
54 |
+
import warnings
|
55 |
+
|
56 |
+
TITLE="""<h1 style="font-size: 42px;" align="center">Word-As-Image for Semantic Typography</h1>"""
|
57 |
+
DESCRIPTION="""A demo for [Word-As-Image for Semantic Typography](https://wordasimage.github.io/Word-As-Image-Page/). By using Word-as-Image, a visual representation of the meaning of the word is created while maintaining legibility of the text and font style.
|
58 |
+
Please select a semantic concept word and a letter you wish to generate, it will take ~5 minutes to perform 500 iterations."""
|
59 |
+
|
60 |
+
DESCRIPTION += '\n<p>This demo is licensed under a <a rel="license" href="http://creativecommons.org/licenses/by-sa/4.0/"> Creative Commons Attribution-ShareAlike 4.0 International License</a>.</p>'
|
61 |
+
|
62 |
+
if (SPACE_ID := os.getenv('SPACE_ID')) is not None:
|
63 |
+
DESCRIPTION += f'\n<p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings. <a href="https://huggingface.co/spaces/{SPACE_ID}?duplicate=true"><img style="display: inline; margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space" /></a></p>'
|
64 |
+
|
65 |
+
|
66 |
+
warnings.filterwarnings("ignore")
|
67 |
+
|
68 |
+
pydiffvg.set_print_timing(False)
|
69 |
+
gamma = 1.0
|
70 |
+
|
71 |
+
|
72 |
+
def set_config(semantic_concept, word, letter, font_name, num_steps):
|
73 |
+
|
74 |
+
cfg_d = edict()
|
75 |
+
cfg_d.config = "code/config/base.yaml"
|
76 |
+
cfg_d.experiment = "demo"
|
77 |
+
|
78 |
+
with open(cfg_d.config, 'r') as f:
|
79 |
+
cfg_full = yaml.load(f, Loader=yaml.FullLoader)
|
80 |
+
|
81 |
+
cfg_key = cfg_d.experiment
|
82 |
+
cfgs = [cfg_d]
|
83 |
+
while cfg_key:
|
84 |
+
cfgs.append(cfg_full[cfg_key])
|
85 |
+
cfg_key = cfgs[-1].get('parent_config', 'baseline')
|
86 |
+
|
87 |
+
cfg = edict()
|
88 |
+
for options in reversed(cfgs):
|
89 |
+
update(cfg, options)
|
90 |
+
del cfgs
|
91 |
+
|
92 |
+
cfg.semantic_concept = semantic_concept
|
93 |
+
cfg.word = word
|
94 |
+
cfg.optimized_letter = letter
|
95 |
+
cfg.font = font_name
|
96 |
+
cfg.seed = 0
|
97 |
+
cfg.num_iter = num_steps
|
98 |
+
|
99 |
+
if ' ' in cfg.word:
|
100 |
+
raise gr.Error(f'should be only one word')
|
101 |
+
cfg.caption = f"a {cfg.semantic_concept}. {cfg.prompt_suffix}"
|
102 |
+
cfg.log_dir = f"output/{cfg.experiment}_{cfg.word}"
|
103 |
+
if cfg.optimized_letter in cfg.word:
|
104 |
+
cfg.optimized_letter = cfg.optimized_letter
|
105 |
+
else:
|
106 |
+
raise gr.Error(f'letter should be in word')
|
107 |
+
|
108 |
+
cfg.letter = f"{cfg.font}_{cfg.optimized_letter}_scaled"
|
109 |
+
cfg.target = f"code/data/init/{cfg.letter}"
|
110 |
+
|
111 |
+
# set experiment dir
|
112 |
+
signature = f"{cfg.letter}_concept_{cfg.semantic_concept}_seed_{cfg.seed}"
|
113 |
+
cfg.experiment_dir = \
|
114 |
+
osp.join(cfg.log_dir, cfg.font, signature)
|
115 |
+
configfile = osp.join(cfg.experiment_dir, 'config.yaml')
|
116 |
+
|
117 |
+
# create experiment dir and save config
|
118 |
+
check_and_create_dir(configfile)
|
119 |
+
with open(osp.join(configfile), 'w') as f:
|
120 |
+
yaml.dump(edict_2_dict(cfg), f)
|
121 |
+
|
122 |
+
if cfg.seed is not None:
|
123 |
+
random.seed(cfg.seed)
|
124 |
+
npr.seed(cfg.seed)
|
125 |
+
torch.manual_seed(cfg.seed)
|
126 |
+
torch.backends.cudnn.benchmark = False
|
127 |
+
else:
|
128 |
+
assert False
|
129 |
+
return cfg
|
130 |
+
|
131 |
+
|
132 |
+
def init_shapes(svg_path, trainable: Mapping[str, bool]):
|
133 |
+
svg = f'{svg_path}.svg'
|
134 |
+
canvas_width, canvas_height, shapes_init, shape_groups_init = pydiffvg.svg_to_scene(svg)
|
135 |
+
|
136 |
+
parameters = edict()
|
137 |
+
|
138 |
+
# path points
|
139 |
+
if trainable.point:
|
140 |
+
parameters.point = []
|
141 |
+
for path in shapes_init:
|
142 |
+
path.points.requires_grad = True
|
143 |
+
parameters.point.append(path.points)
|
144 |
+
|
145 |
+
return shapes_init, shape_groups_init, parameters
|
146 |
+
|
147 |
+
|
148 |
+
def run_main_ex(semantic_concept, word, letter, font_name, num_steps):
|
149 |
+
return list(next(run_main_app(semantic_concept, word, letter, font_name, num_steps, 1)))
|
150 |
+
|
151 |
+
def run_main_app(semantic_concept, word, letter, font_name, num_steps, example=0):
|
152 |
+
|
153 |
+
cfg = set_config(semantic_concept, word, letter, font_name, num_steps)
|
154 |
+
|
155 |
+
pydiffvg.set_use_gpu(torch.cuda.is_available())
|
156 |
+
|
157 |
+
print("preprocessing")
|
158 |
+
preprocess(cfg.font, cfg.word, cfg.optimized_letter, cfg.level_of_cc)
|
159 |
+
filename_init = os.path.join("code/data/init/", f"{cfg.font}_{cfg.word}_scaled.svg").replace(" ", "_")
|
160 |
+
if not example:
|
161 |
+
yield gr.update(value=filename_init,visible=True),gr.update(visible=False),gr.update(visible=False)
|
162 |
+
|
163 |
+
sds_loss = SDSLoss(cfg, device, model)
|
164 |
+
|
165 |
+
h, w = cfg.render_size, cfg.render_size
|
166 |
+
|
167 |
+
data_augs = get_data_augs(cfg.cut_size)
|
168 |
+
|
169 |
+
render = pydiffvg.RenderFunction.apply
|
170 |
+
|
171 |
+
# initialize shape
|
172 |
+
print('initializing shape')
|
173 |
+
shapes, shape_groups, parameters = init_shapes(svg_path=cfg.target, trainable=cfg.trainable)
|
174 |
+
|
175 |
+
scene_args = pydiffvg.RenderFunction.serialize_scene(w, h, shapes, shape_groups)
|
176 |
+
img_init = render(w, h, 2, 2, 0, None, *scene_args)
|
177 |
+
img_init = img_init[:, :, 3:4] * img_init[:, :, :3] + \
|
178 |
+
torch.ones(img_init.shape[0], img_init.shape[1], 3, device=device) * (1 - img_init[:, :, 3:4])
|
179 |
+
img_init = img_init[:, :, :3]
|
180 |
+
|
181 |
+
tone_loss = ToneLoss(cfg)
|
182 |
+
tone_loss.set_image_init(img_init)
|
183 |
+
|
184 |
+
num_iter = cfg.num_iter
|
185 |
+
pg = [{'params': parameters["point"], 'lr': cfg.lr_base["point"]}]
|
186 |
+
optim = torch.optim.Adam(pg, betas=(0.9, 0.9), eps=1e-6)
|
187 |
+
|
188 |
+
conformal_loss = ConformalLoss(parameters, device, cfg.optimized_letter, shape_groups)
|
189 |
+
|
190 |
+
lr_lambda = lambda step: learning_rate_decay(step, cfg.lr.lr_init, cfg.lr.lr_final, num_iter,
|
191 |
+
lr_delay_steps=cfg.lr.lr_delay_steps,
|
192 |
+
lr_delay_mult=cfg.lr.lr_delay_mult) / cfg.lr.lr_init
|
193 |
+
|
194 |
+
scheduler = LambdaLR(optim, lr_lambda=lr_lambda, last_epoch=-1) # lr.base * lrlambda_f
|
195 |
+
|
196 |
+
print("start training")
|
197 |
+
# training loop
|
198 |
+
t_range = tqdm(range(num_iter))
|
199 |
+
for step in t_range:
|
200 |
+
optim.zero_grad()
|
201 |
+
|
202 |
+
# render image
|
203 |
+
scene_args = pydiffvg.RenderFunction.serialize_scene(w, h, shapes, shape_groups)
|
204 |
+
img = render(w, h, 2, 2, step, None, *scene_args)
|
205 |
+
|
206 |
+
# compose image with white background
|
207 |
+
img = img[:, :, 3:4] * img[:, :, :3] + torch.ones(img.shape[0], img.shape[1], 3, device=device) * (
|
208 |
+
1 - img[:, :, 3:4])
|
209 |
+
img = img[:, :, :3]
|
210 |
+
|
211 |
+
filename = os.path.join(
|
212 |
+
cfg.experiment_dir, "video-svg", f"iter{step:04d}.svg")
|
213 |
+
check_and_create_dir(filename)
|
214 |
+
save_svg.save_svg(filename, w, h, shapes, shape_groups)
|
215 |
+
if not example:
|
216 |
+
yield gr.update(visible=True),gr.update(value=filename, label=f'iters: {step} / {num_iter}', visible=True),gr.update(visible=False)
|
217 |
+
|
218 |
+
x = img.unsqueeze(0).permute(0, 3, 1, 2) # HWC -> NCHW
|
219 |
+
x = x.repeat(cfg.batch_size, 1, 1, 1)
|
220 |
+
x_aug = data_augs.forward(x)
|
221 |
+
|
222 |
+
# compute diffusion loss per pixel
|
223 |
+
loss = sds_loss(x_aug)
|
224 |
+
|
225 |
+
tone_loss_res = tone_loss(x, step)
|
226 |
+
loss = loss + tone_loss_res
|
227 |
+
|
228 |
+
loss_angles = conformal_loss()
|
229 |
+
loss_angles = cfg.loss.conformal.angeles_w * loss_angles
|
230 |
+
loss = loss + loss_angles
|
231 |
+
|
232 |
+
loss.backward()
|
233 |
+
optim.step()
|
234 |
+
scheduler.step()
|
235 |
+
|
236 |
+
|
237 |
+
filename = os.path.join(
|
238 |
+
cfg.experiment_dir, "output-svg", "output.svg")
|
239 |
+
check_and_create_dir(filename)
|
240 |
+
save_svg.save_svg(
|
241 |
+
filename, w, h, shapes, shape_groups)
|
242 |
+
|
243 |
+
combine_word(cfg.word, cfg.optimized_letter, cfg.font, cfg.experiment_dir)
|
244 |
+
|
245 |
+
image = os.path.join(cfg.experiment_dir,f"{cfg.font}_{cfg.word}_{cfg.optimized_letter}.svg")
|
246 |
+
yield gr.update(value=filename_init,visible=True),gr.update(visible=False),gr.update(value=image,visible=True)
|
247 |
+
|
248 |
+
|
249 |
+
with gr.Blocks() as demo:
|
250 |
+
|
251 |
+
gr.HTML(TITLE)
|
252 |
+
gr.Markdown(DESCRIPTION)
|
253 |
+
|
254 |
+
with gr.Row():
|
255 |
+
with gr.Column():
|
256 |
+
|
257 |
+
semantic_concept = gr.Text(
|
258 |
+
label='Semantic Concept',
|
259 |
+
max_lines=1,
|
260 |
+
placeholder=
|
261 |
+
'Enter a semantic concept. For example: BUNNY'
|
262 |
+
)
|
263 |
+
|
264 |
+
word = gr.Text(
|
265 |
+
label='Word',
|
266 |
+
max_lines=1,
|
267 |
+
placeholder=
|
268 |
+
'Enter a word. For example: BUNNY'
|
269 |
+
)
|
270 |
+
|
271 |
+
letter = gr.Text(
|
272 |
+
label='Letter',
|
273 |
+
max_lines=1,
|
274 |
+
placeholder=
|
275 |
+
'Choose a letter in the word to optimize. For example: Y'
|
276 |
+
)
|
277 |
+
|
278 |
+
num_steps = gr.Slider(label='Optimization Iterations',
|
279 |
+
minimum=0,
|
280 |
+
maximum=500,
|
281 |
+
step=10,
|
282 |
+
value=500)
|
283 |
+
|
284 |
+
font_name = gr.Text(value=None,visible=False,label="Font Name")
|
285 |
+
|
286 |
+
|
287 |
+
def on_select(evt: gr.SelectData):
|
288 |
+
return evt.value
|
289 |
+
|
290 |
+
font_name = "ArefRuqaa.ttf"
|
291 |
+
|
292 |
+
run = gr.Button('Generate')
|
293 |
+
|
294 |
+
with gr.Column():
|
295 |
+
result0 = gr.Image(type="filepath", label="Initial Word").style(height=333)
|
296 |
+
result1 = gr.Image(type="filepath", label="Optimization Process").style(height=110)
|
297 |
+
result2 = gr.Image(type="filepath", label="Final Result",visible=False).style(height=333)
|
298 |
+
|
299 |
+
|
300 |
+
with gr.Row():
|
301 |
+
# examples
|
302 |
+
examples = [
|
303 |
+
[
|
304 |
+
"BUNNY",
|
305 |
+
"BUNNY",
|
306 |
+
"Y",
|
307 |
+
"KaushanScript-Regular",
|
308 |
+
500
|
309 |
+
],
|
310 |
+
[
|
311 |
+
"LION",
|
312 |
+
"LION",
|
313 |
+
"O",
|
314 |
+
"Quicksand",
|
315 |
+
500
|
316 |
+
],
|
317 |
+
[
|
318 |
+
"FROG",
|
319 |
+
"FROG",
|
320 |
+
"G",
|
321 |
+
"IndieFlower-Regular",
|
322 |
+
500
|
323 |
+
],
|
324 |
+
[
|
325 |
+
"CAT",
|
326 |
+
"CAT",
|
327 |
+
"C",
|
328 |
+
"LuckiestGuy-Regular",
|
329 |
+
500
|
330 |
+
],
|
331 |
+
]
|
332 |
+
demo.queue(max_size=10, concurrency_count=2)
|
333 |
+
# gr.Examples(examples=examples,
|
334 |
+
# inputs=[
|
335 |
+
# semantic_concept,
|
336 |
+
# word,
|
337 |
+
# letter,
|
338 |
+
# font_name,
|
339 |
+
# num_steps
|
340 |
+
# ],
|
341 |
+
# outputs=[
|
342 |
+
# result0,
|
343 |
+
# result1,
|
344 |
+
# result2
|
345 |
+
# ],
|
346 |
+
# fn=run_main_ex,
|
347 |
+
# cache_examples=True)
|
348 |
+
|
349 |
+
|
350 |
+
# inputs
|
351 |
+
inputs = [
|
352 |
+
semantic_concept,
|
353 |
+
word,
|
354 |
+
letter,
|
355 |
+
font_name,
|
356 |
+
num_steps
|
357 |
+
]
|
358 |
+
|
359 |
+
outputs = [
|
360 |
+
result0,
|
361 |
+
result1,
|
362 |
+
result2
|
363 |
+
]
|
364 |
+
|
365 |
+
run.click(fn=run_main_app, inputs=inputs, outputs=outputs, queue=True)
|
366 |
+
|
367 |
+
|
368 |
+
demo.launch(share=False)
|
code/bezier.py
ADDED
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import matplotlib.pyplot as plt
|
3 |
+
from scipy.special import binom
|
4 |
+
from numpy.linalg import norm
|
5 |
+
|
6 |
+
def num_bezier(n_ctrl, degree=3):
|
7 |
+
if type(n_ctrl) == np.ndarray:
|
8 |
+
n_ctrl = len(n_ctrl)
|
9 |
+
return int((n_ctrl - 1) / degree)
|
10 |
+
|
11 |
+
def bernstein(n, i):
|
12 |
+
bi = binom(n, i)
|
13 |
+
return lambda t, bi=bi, n=n, i=i: bi * t**i * (1 - t)**(n - i)
|
14 |
+
|
15 |
+
def bezier(P, t, d=0):
|
16 |
+
'''Bezier curve of degree len(P)-1. d is the derivative order (0 gives positions)'''
|
17 |
+
n = P.shape[0] - 1
|
18 |
+
if d > 0:
|
19 |
+
Q = np.diff(P, axis=0)*n
|
20 |
+
return bezier(Q, t, d-1)
|
21 |
+
B = np.vstack([bernstein(n, i)(t) for i, p in enumerate(P)])
|
22 |
+
return (P.T @ B).T
|
23 |
+
|
24 |
+
def cubic_bezier(P, t):
|
25 |
+
return (1.0-t)**3*P[0] + 3*(1.0-t)**2*t*P[1] + 3*(1.0-t)*t**2*P[2] + t**3*P[3]
|
26 |
+
|
27 |
+
def bezier_piecewise(Cp, subd=100, degree=3, d=0):
|
28 |
+
''' sample a piecewise Bezier curve given a sequence of control points'''
|
29 |
+
num = num_bezier(Cp.shape[0], degree)
|
30 |
+
X = []
|
31 |
+
for i in range(num):
|
32 |
+
P = Cp[i*degree:i*degree+degree+1, :]
|
33 |
+
t = np.linspace(0, 1., subd)[:-1]
|
34 |
+
Y = bezier(P, t, d)
|
35 |
+
X += [Y]
|
36 |
+
X.append(Cp[-1])
|
37 |
+
X = np.vstack(X)
|
38 |
+
return X
|
39 |
+
|
40 |
+
def compute_beziers(beziers, subd=100, degree=3):
|
41 |
+
chain = beziers_to_chain(beziers)
|
42 |
+
return bezier_piecewise(chain, subd, degree)
|
43 |
+
|
44 |
+
def plot_control_polygon(Cp, degree=3, lw=0.5, linecolor=np.ones(3)*0.1):
|
45 |
+
n_bezier = num_bezier(len(Cp), degree)
|
46 |
+
for i in range(n_bezier):
|
47 |
+
cp = Cp[i*degree:i*degree+degree+1, :]
|
48 |
+
if degree==3:
|
49 |
+
plt.plot(cp[0:2,0], cp[0:2, 1], ':', color=linecolor, linewidth=lw)
|
50 |
+
plt.plot(cp[2:,0], cp[2:,1], ':', color=linecolor, linewidth=lw)
|
51 |
+
plt.plot(cp[:,0], cp[:,1], 'o', color=[0, 0.5, 1.], markersize=4)
|
52 |
+
else:
|
53 |
+
plt.plot(cp[:,0], cp[:,1], ':', color=linecolor, linewidth=lw)
|
54 |
+
plt.plot(cp[:,0], cp[:,1], 'o', color=[0, 0.5, 1.])
|
55 |
+
|
56 |
+
|
57 |
+
def chain_to_beziers(chain, degree=3):
|
58 |
+
''' Convert Bezier chain to list of curve segments (4 control points each)'''
|
59 |
+
num = num_bezier(chain.shape[0], degree)
|
60 |
+
beziers = []
|
61 |
+
for i in range(num):
|
62 |
+
beziers.append(chain[i*degree:i*degree+degree+1,:])
|
63 |
+
return beziers
|
64 |
+
|
65 |
+
|
66 |
+
def beziers_to_chain(beziers):
|
67 |
+
''' Convert list of Bezier curve segments to a piecewise bezier chain (shares vertices)'''
|
68 |
+
n = len(beziers)
|
69 |
+
chain = []
|
70 |
+
for i in range(n):
|
71 |
+
chain.append(list(beziers[i][:-1]))
|
72 |
+
chain.append([beziers[-1][-1]])
|
73 |
+
return np.array(sum(chain, []))
|
74 |
+
|
75 |
+
|
76 |
+
def split_cubic(bez, t):
|
77 |
+
p1, p2, p3, p4 = bez
|
78 |
+
|
79 |
+
p12 = (p2 - p1) * t + p1
|
80 |
+
p23 = (p3 - p2) * t + p2
|
81 |
+
p34 = (p4 - p3) * t + p3
|
82 |
+
|
83 |
+
p123 = (p23 - p12) * t + p12
|
84 |
+
p234 = (p34 - p23) * t + p23
|
85 |
+
|
86 |
+
p1234 = (p234 - p123) * t + p123
|
87 |
+
|
88 |
+
return np.array([p1, p12, p123, p1234]), np.array([p1234, p234, p34, p4])
|
89 |
+
|
90 |
+
|
91 |
+
def approx_arc_length(bez):
|
92 |
+
c0, c1, c2, c3 = bez
|
93 |
+
v0 = norm(c1-c0)*0.15
|
94 |
+
v1 = norm(-0.558983582205757*c0 + 0.325650248872424*c1 + 0.208983582205757*c2 + 0.024349751127576*c3)
|
95 |
+
v2 = norm(c3-c0+c2-c1)*0.26666666666666666
|
96 |
+
v3 = norm(-0.024349751127576*c0 - 0.208983582205757*c1 - 0.325650248872424*c2 + 0.558983582205757*c3)
|
97 |
+
v4 = norm(c3-c2)*.15
|
98 |
+
return v0 + v1 + v2 + v3 + v4
|
99 |
+
|
100 |
+
|
101 |
+
def subdivide_bezier(bez, thresh):
|
102 |
+
stack = [bez]
|
103 |
+
res = []
|
104 |
+
while stack:
|
105 |
+
bez = stack.pop()
|
106 |
+
l = approx_arc_length(bez)
|
107 |
+
if l < thresh:
|
108 |
+
res.append(bez)
|
109 |
+
else:
|
110 |
+
b1, b2 = split_cubic(bez, 0.5)
|
111 |
+
stack += [b2, b1]
|
112 |
+
return res
|
113 |
+
|
114 |
+
def subdivide_bezier_chain(C, thresh):
|
115 |
+
beziers = chain_to_beziers(C)
|
116 |
+
res = []
|
117 |
+
for bez in beziers:
|
118 |
+
res += subdivide_bezier(bez, thresh)
|
119 |
+
return beziers_to_chain(res)
|
120 |
+
|
121 |
+
|
122 |
+
|
code/collage.py
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import imageio
|
3 |
+
import numpy as np
|
4 |
+
from glob import glob
|
5 |
+
from PIL import Image, ImageSequence
|
6 |
+
|
7 |
+
if __name__ == "__main__":
|
8 |
+
|
9 |
+
path = "/Users/bkhmsi/Desktop/Animal-Words/*.gif"
|
10 |
+
save_path = os.path.join(os.path.dirname(path), "collage.gif")
|
11 |
+
|
12 |
+
|
13 |
+
width, height = 400, 400
|
14 |
+
nx, ny = 5, 5
|
15 |
+
n_frames = 67
|
16 |
+
collage = np.ones((n_frames+10, width*nx, height*ny)).astype(np.uint8)
|
17 |
+
|
18 |
+
filenames = [p for p in glob(path) if os.path.basename(p)[:-4] not in ["palestine", "amin", "collage"]]
|
19 |
+
print(f"> {len(filenames)} Files Found")
|
20 |
+
for file in filenames:
|
21 |
+
print(os.path.basename(file))
|
22 |
+
|
23 |
+
assert nx*ny <= len(filenames)
|
24 |
+
|
25 |
+
for i in range(nx):
|
26 |
+
for j in range(ny):
|
27 |
+
image = Image.open(filenames[i*ny+j])
|
28 |
+
assert image.is_animated
|
29 |
+
idx = 0
|
30 |
+
for frame_idx in range(image.n_frames):
|
31 |
+
image.seek(frame_idx)
|
32 |
+
frame = image.convert('L').copy()
|
33 |
+
if frame_idx == 0 or frame_idx == image.n_frames-1:
|
34 |
+
for _ in range(5):
|
35 |
+
collage[idx, i*width:(i+1)*width,j*height:(j+1)*height] = np.asarray(frame)[100:500, 100:500]
|
36 |
+
idx += 1
|
37 |
+
else:
|
38 |
+
collage[idx, i*width:(i+1)*width,j*height:(j+1)*height] = np.asarray(frame)[100:500, 100:500]
|
39 |
+
idx += 1
|
40 |
+
|
41 |
+
imageio.mimsave(save_path, collage)
|
code/config.py
ADDED
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import os.path as osp
|
3 |
+
import yaml
|
4 |
+
import random
|
5 |
+
from easydict import EasyDict as edict
|
6 |
+
import numpy.random as npr
|
7 |
+
import torch
|
8 |
+
from utils import (
|
9 |
+
edict_2_dict,
|
10 |
+
check_and_create_dir,
|
11 |
+
update)
|
12 |
+
import wandb
|
13 |
+
import warnings
|
14 |
+
warnings.filterwarnings("ignore")
|
15 |
+
|
16 |
+
|
17 |
+
def parse_args():
|
18 |
+
parser = argparse.ArgumentParser()
|
19 |
+
parser.add_argument("--config", type=str, default="code/config/base.yaml")
|
20 |
+
parser.add_argument("--experiment", type=str, default="conformal_0.5_dist_pixel_100_kernel201")
|
21 |
+
parser.add_argument("--seed", type=int, default=0)
|
22 |
+
parser.add_argument('--log_dir', metavar='DIR', default="output")
|
23 |
+
parser.add_argument('--font', type=str, default="none", help="font name")
|
24 |
+
parser.add_argument('--semantic_concept', type=str, help="the semantic concept to insert")
|
25 |
+
parser.add_argument('--word', type=str, default="none", help="the text to work on")
|
26 |
+
parser.add_argument('--prompt_suffix', type=str, default="minimal flat 2d vector. lineal color."
|
27 |
+
" trending on artstation")
|
28 |
+
parser.add_argument('--optimized_letter', type=str, default="none", help="the letter in the word to optimize")
|
29 |
+
parser.add_argument('--batch_size', type=int, default=1)
|
30 |
+
parser.add_argument('--use_wandb', type=int, default=0)
|
31 |
+
parser.add_argument('--wandb_user', type=str, default="none")
|
32 |
+
|
33 |
+
cfg = edict()
|
34 |
+
args = parser.parse_args()
|
35 |
+
with open('TOKEN', 'r') as f:
|
36 |
+
setattr(args, 'token', f.read().replace('\n', ''))
|
37 |
+
cfg.config = args.config
|
38 |
+
cfg.experiment = args.experiment
|
39 |
+
cfg.seed = args.seed
|
40 |
+
cfg.font = args.font
|
41 |
+
cfg.semantic_concept = args.semantic_concept
|
42 |
+
cfg.word = cfg.semantic_concept if args.word == "none" else args.word
|
43 |
+
if " " in cfg.word:
|
44 |
+
raise ValueError(f'no spaces are allowed')
|
45 |
+
if "jpeg" in args.semantic_concept:
|
46 |
+
cfg.caption = args.semantic_concept
|
47 |
+
else:
|
48 |
+
cfg.caption = f"a {args.semantic_concept}. {args.prompt_suffix}"
|
49 |
+
|
50 |
+
cfg.log_dir = f"{args.log_dir}/{args.experiment}_{cfg.word}"
|
51 |
+
if args.optimized_letter in cfg.word:
|
52 |
+
cfg.optimized_letter = args.optimized_letter
|
53 |
+
else:
|
54 |
+
raise ValueError(f'letter should be in word')
|
55 |
+
cfg.batch_size = args.batch_size
|
56 |
+
cfg.token = args.token
|
57 |
+
cfg.use_wandb = args.use_wandb
|
58 |
+
cfg.wandb_user = args.wandb_user
|
59 |
+
cfg.letter = f"{args.font}_{args.optimized_letter}_scaled"
|
60 |
+
cfg.target = f"code/data/init/{cfg.letter}"
|
61 |
+
|
62 |
+
return cfg
|
63 |
+
|
64 |
+
|
65 |
+
def set_config():
|
66 |
+
|
67 |
+
cfg_arg = parse_args()
|
68 |
+
with open(cfg_arg.config, 'r') as f:
|
69 |
+
cfg_full = yaml.load(f, Loader=yaml.FullLoader)
|
70 |
+
|
71 |
+
# recursively traverse parent_config pointers in the config dicts
|
72 |
+
cfg_key = cfg_arg.experiment
|
73 |
+
cfgs = [cfg_arg]
|
74 |
+
while cfg_key:
|
75 |
+
cfgs.append(cfg_full[cfg_key])
|
76 |
+
cfg_key = cfgs[-1].get('parent_config', 'baseline')
|
77 |
+
|
78 |
+
# allowing children configs to override their parents
|
79 |
+
cfg = edict()
|
80 |
+
for options in reversed(cfgs):
|
81 |
+
update(cfg, options)
|
82 |
+
del cfgs
|
83 |
+
|
84 |
+
# set experiment dir
|
85 |
+
signature = f"{cfg.letter}_concept_{cfg.semantic_concept}_seed_{cfg.seed}"
|
86 |
+
cfg.experiment_dir = \
|
87 |
+
osp.join(cfg.log_dir, cfg.font, signature)
|
88 |
+
configfile = osp.join(cfg.experiment_dir, 'config.yaml')
|
89 |
+
print('Config:', cfg)
|
90 |
+
|
91 |
+
# create experiment dir and save config
|
92 |
+
check_and_create_dir(configfile)
|
93 |
+
with open(osp.join(configfile), 'w') as f:
|
94 |
+
yaml.dump(edict_2_dict(cfg), f)
|
95 |
+
|
96 |
+
if cfg.use_wandb:
|
97 |
+
wandb.init(project="Word-As-Image", entity=cfg.wandb_user,
|
98 |
+
config=cfg, name=f"{signature}", id=wandb.util.generate_id())
|
99 |
+
|
100 |
+
if cfg.seed is not None:
|
101 |
+
random.seed(cfg.seed)
|
102 |
+
npr.seed(cfg.seed)
|
103 |
+
torch.manual_seed(cfg.seed)
|
104 |
+
torch.backends.cudnn.benchmark = False
|
105 |
+
else:
|
106 |
+
assert False
|
107 |
+
|
108 |
+
return cfg
|
code/config/base.yaml
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
baseline:
|
2 |
+
parent_config: ''
|
3 |
+
save:
|
4 |
+
init: true
|
5 |
+
image: true
|
6 |
+
video: true
|
7 |
+
video_frame_freq: 1
|
8 |
+
trainable:
|
9 |
+
point: true
|
10 |
+
lr_base:
|
11 |
+
point: 1
|
12 |
+
lr:
|
13 |
+
lr_init: 0.002
|
14 |
+
lr_final: 0.0008
|
15 |
+
lr_delay_mult: 0.1
|
16 |
+
lr_delay_steps: 100
|
17 |
+
num_iter: 500
|
18 |
+
render_size: 600
|
19 |
+
cut_size: 512
|
20 |
+
level_of_cc: 0 # 0 - original number of cc / 1 - recommended / 2 - more control points
|
21 |
+
seed: 0
|
22 |
+
diffusion:
|
23 |
+
model: "runwayml/stable-diffusion-v1-5" #"stabilityai/stable-diffusion-2-1"
|
24 |
+
timesteps: 1000
|
25 |
+
guidance_scale: 100
|
26 |
+
loss:
|
27 |
+
use_sds_loss: true
|
28 |
+
tone:
|
29 |
+
use_tone_loss: false
|
30 |
+
conformal:
|
31 |
+
use_conformal_loss: false
|
32 |
+
|
33 |
+
conformal_0.5_dist_pixel_100_kernel201:
|
34 |
+
parent_config: baseline
|
35 |
+
level_of_cc: 1
|
36 |
+
loss:
|
37 |
+
tone:
|
38 |
+
use_tone_loss: true
|
39 |
+
dist_loss_weight: 100
|
40 |
+
pixel_dist_kernel_blur: 201
|
41 |
+
pixel_dist_sigma: 30
|
42 |
+
conformal:
|
43 |
+
use_conformal_loss: true
|
44 |
+
angeles_w: 0.5
|
45 |
+
|
46 |
+
Animals:
|
47 |
+
parent_config: baseline
|
48 |
+
level_of_cc: 1
|
49 |
+
num_iter: 500
|
50 |
+
loss:
|
51 |
+
tone:
|
52 |
+
use_tone_loss: true
|
53 |
+
dist_loss_weight: 100
|
54 |
+
pixel_dist_kernel_blur: 201
|
55 |
+
pixel_dist_sigma: 30
|
56 |
+
conformal:
|
57 |
+
use_conformal_loss: true
|
58 |
+
angeles_w: 0.5
|
59 |
+
|
code/data/arabic-fonts/dl-fonts.sh
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
wget https://arbfonts.com//wp-content/fonts/diwany-arabic-fonts//mcs-diwany-jaly-s-u.ttf -O 01.ttf
|
2 |
+
wget https://arbfonts.com//wp-content/fonts/new-arabic-fonts//46-Diwani-Bent.ttf -O 02.ttf
|
3 |
+
wget https://arbfonts.com//wp-content/fonts/diwany-arabic-fonts//diwany-edited.ttf -O 03.ttf
|
4 |
+
wget https://arbfonts.com/wp-content/fonts/diwany-arabic-fonts//arbfonts-samt-7017.ttf -O 04.ttf
|
5 |
+
wget https://arbfonts.com//wp-content/fonts/kufi-arabic-fonts//QadasiRegular.ttf -O 05.ttf
|
6 |
+
wget https://arbfonts.com//wp-content/fonts/kufi-arabic-fonts//Spirit-Of-Doha-Black.otf -O 06.ttf
|
7 |
+
wget https://arbfonts.com//wp-content/fonts/new-arabic-fonts//AlQalam-alavi.ttf -O 07.ttf
|
8 |
+
wget https://arbfonts.com//wp-content/fonts/new-arabic-fonts//22-andlso.ttf -O 08.ttf
|
9 |
+
wget https://arbfonts.com//wp-content/fonts/new-arabic-fonts//ArefRuqaa-Bold-1.ttf -O 09.ttf
|
10 |
+
wget https://arbfonts.com//wp-content/fonts/new-arabic-fonts//ArefRuqaa-Regular-1.ttf -O 10.ttf
|
11 |
+
wget https://arbfonts.com//wp-content/fonts/new-arabic-fonts//117-Barada-Reqa.ttf -O 11.ttf
|
12 |
+
wget https://arbfonts.com/wp-content/fonts/diwany-arabic-fonts//arbfonts-diwany-thuluth.ttf -O 12.ttf
|
13 |
+
wget https://arbfonts.com//wp-content/fonts/unlimited-free-arabic-fonts//UthmanicHafs1-Ver09_2.otf -O 13.ttf
|
14 |
+
wget https://arbfonts.com//wp-content/fonts/new-arabic-fonts//ALAMERAH-Thin.ttf -O 14.ttf
|
15 |
+
wget https://arbfonts.com/font_files/horr/unicode/Boahmed%20Alhour.ttf -O 15.ttf
|
16 |
+
wget https://arbfonts.com//wp-content/fonts/misc//K-Kamran.ttf -O 16.ttf
|
17 |
+
wget https://arbfonts.com//wp-content/fonts/farsi-free-fonts//Jamil-nory.ttf -O 17.ttf
|
18 |
+
wget https://arbfonts.com//wp-content/fonts/brands-arasbic-fonts//Mobily.ttf -O 18.ttf
|
19 |
+
wget https://arbfonts.com//wp-content/fonts/new-arabic-fonts//QTSManga-Regular-1.ttf -O 19.ttf
|
20 |
+
wget https://arbfonts.com//wp-content/fonts/arabic-fonts/new//Al-Jazeera-Arabic-Regular.ttf -O 20.ttf
|
code/data/arabic-fonts/font_names.txt
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ديواني جلي
|
2 |
+
ديواني مشكل
|
3 |
+
ديواني طويل
|
4 |
+
ديواني بسيط
|
5 |
+
كوفي بسيط
|
6 |
+
كوفي منحني
|
7 |
+
فارسي بسيط
|
8 |
+
مغربي اندلس
|
9 |
+
رقعة مدبب
|
10 |
+
رقعة بسيط
|
11 |
+
رقعة سريع
|
12 |
+
ثلث ديواني
|
13 |
+
ثلث بسيط
|
14 |
+
مربع بسيط
|
15 |
+
حر مدبب
|
16 |
+
حر بسيط
|
17 |
+
حر طويل
|
18 |
+
موبايلي
|
19 |
+
منجا
|
20 |
+
الجزيرة
|
code/data/fonts/ArefRuqaa.ttf
ADDED
Binary file (111 kB). View file
|
|
code/data/fonts/Bell MT.ttf
ADDED
Binary file (84.8 kB). View file
|
|
code/data/fonts/DeliusUnicase-Regular.ttf
ADDED
Binary file (31.5 kB). View file
|
|
code/data/fonts/HobeauxRococeaux-Sherman.ttf
ADDED
Binary file (117 kB). View file
|
|
code/data/fonts/IndieFlower-Regular.ttf
ADDED
Binary file (55.4 kB). View file
|
|
code/data/fonts/JosefinSans-Light.ttf
ADDED
Binary file (59.3 kB). View file
|
|
code/data/fonts/KaushanScript-Regular.ttf
ADDED
Binary file (184 kB). View file
|
|
code/data/fonts/LuckiestGuy-Regular.ttf
ADDED
Binary file (58.3 kB). View file
|
|
code/data/fonts/Noteworthy-Bold.ttf
ADDED
Binary file (248 kB). View file
|
|
code/data/fonts/Quicksand.ttf
ADDED
Binary file (124 kB). View file
|
|
code/data/fonts/Saira-Regular.ttf
ADDED
Binary file (82.8 kB). View file
|
|
code/harfbuzz_test.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import vharfbuzz as hv
|
2 |
+
|
3 |
+
animal_names = [
|
4 |
+
{"english": "cat", "arabic": "قطة"},
|
5 |
+
{"english": "Lion", "arabic": "أسد"},
|
6 |
+
{"english": "Elephant", "arabic": "فيل"},
|
7 |
+
{"english": "Tiger", "arabic": "نمر"},
|
8 |
+
{"english": "Cheetah", "arabic": "فهد"},
|
9 |
+
{"english": "Monkey", "arabic": "قرد"},
|
10 |
+
{"english": "Dolphin", "arabic": "دلفين"},
|
11 |
+
{"english": "Penguin", "arabic": "بطريق"},
|
12 |
+
{"english": "Kangaroo", "arabic": "كنغر"},
|
13 |
+
{"english": "Fox", "arabic": "ثعلب"},
|
14 |
+
{"english": "Eagle", "arabic": "نسر"},
|
15 |
+
{"english": "Wolf", "arabic": "ذئب"},
|
16 |
+
{"english": "Turtle", "arabic": "سلحفاة"},
|
17 |
+
{"english": "Panda", "arabic": "باندا"},
|
18 |
+
{"english": "Giraffe", "arabic": "زرافة"},
|
19 |
+
{"english": "Bear", "arabic": "دب"},
|
20 |
+
{"english": "Owl", "arabic": "بومة"}
|
21 |
+
]
|
22 |
+
|
23 |
+
fontpath = './data/fonts/ArefRuqaa.ttf'
|
24 |
+
vhb = hv.Vharfbuzz(fontpath)
|
25 |
+
|
26 |
+
path_templ = "/Users/bkhmsi/Desktop/Animal-Words/correct/{}.svg"
|
27 |
+
|
28 |
+
for animal in animal_names:
|
29 |
+
txt = animal["arabic"]
|
30 |
+
buf = vhb.shape(txt, {"features": {"kern": True, "liga": True}})
|
31 |
+
svg = vhb.buf_to_svg(buf)
|
32 |
+
with open(path_templ.format(animal["english"]), 'w') as fout:
|
33 |
+
fout.write(svg)
|
code/losses.py
ADDED
@@ -0,0 +1,198 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch.nn as nn
|
2 |
+
import torchvision
|
3 |
+
from scipy.spatial import Delaunay
|
4 |
+
import torch
|
5 |
+
import numpy as np
|
6 |
+
from torch.nn import functional as nnf
|
7 |
+
from easydict import EasyDict
|
8 |
+
from shapely.geometry import Point
|
9 |
+
from shapely.geometry.polygon import Polygon
|
10 |
+
from torchvision import transforms
|
11 |
+
from PIL import Image
|
12 |
+
from transformers import CLIPProcessor, CLIPModel
|
13 |
+
|
14 |
+
from diffusers import StableDiffusionPipeline
|
15 |
+
|
16 |
+
class SDSLoss(nn.Module):
|
17 |
+
def __init__(self, cfg, device):
|
18 |
+
super(SDSLoss, self).__init__()
|
19 |
+
self.cfg = cfg
|
20 |
+
self.device = device
|
21 |
+
self.pipe = StableDiffusionPipeline.from_pretrained(cfg.diffusion.model,
|
22 |
+
torch_dtype=torch.float16, use_auth_token=cfg.token)
|
23 |
+
self.pipe = self.pipe.to(self.device)
|
24 |
+
|
25 |
+
self.clip_model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14").to(self.device)
|
26 |
+
self.clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")
|
27 |
+
|
28 |
+
# default scheduler: PNDMScheduler(beta_start=0.00085, beta_end=0.012,
|
29 |
+
# beta_schedule="scaled_linear", num_train_timesteps=1000)
|
30 |
+
self.alphas = self.pipe.scheduler.alphas_cumprod.to(self.device)
|
31 |
+
self.sigmas = (1 - self.pipe.scheduler.alphas_cumprod).to(self.device)
|
32 |
+
|
33 |
+
self.text_embeddings = None
|
34 |
+
self.embed_text()
|
35 |
+
|
36 |
+
def embed_text(self):
|
37 |
+
# tokenizer and embed text
|
38 |
+
|
39 |
+
if "jpeg" not in self.cfg.caption:
|
40 |
+
text_input = self.pipe.tokenizer(self.cfg.caption, padding="max_length",
|
41 |
+
max_length=self.pipe.tokenizer.model_max_length,
|
42 |
+
truncation=True, return_tensors="pt")
|
43 |
+
uncond_input = self.pipe.tokenizer([""], padding="max_length",
|
44 |
+
max_length=text_input.input_ids.shape[-1],
|
45 |
+
return_tensors="pt")
|
46 |
+
with torch.no_grad():
|
47 |
+
text_embeddings = self.pipe.text_encoder(text_input.input_ids.to(self.device))[0]
|
48 |
+
uncond_embeddings = self.pipe.text_encoder(uncond_input.input_ids.to(self.device))[0]
|
49 |
+
else:
|
50 |
+
print(f"> Reading Image {self.cfg.caption}")
|
51 |
+
with torch.no_grad():
|
52 |
+
image = Image.open(self.cfg.caption)
|
53 |
+
inputs = self.clip_processor(images=image, return_tensors="pt").to(self.device)
|
54 |
+
img_emb = self.clip_model.get_image_features(**inputs)
|
55 |
+
text_embeddings = img_emb
|
56 |
+
uncond_embeddings = img_emb
|
57 |
+
|
58 |
+
print(text_embeddings.size())
|
59 |
+
print(uncond_embeddings.size())
|
60 |
+
self.text_embeddings = torch.cat([uncond_embeddings, text_embeddings])
|
61 |
+
self.text_embeddings = self.text_embeddings.repeat_interleave(self.cfg.batch_size, 0)
|
62 |
+
del self.pipe.tokenizer
|
63 |
+
del self.pipe.text_encoder
|
64 |
+
|
65 |
+
|
66 |
+
def forward(self, x_aug):
|
67 |
+
sds_loss = 0
|
68 |
+
|
69 |
+
# encode rendered image
|
70 |
+
x = x_aug * 2. - 1.
|
71 |
+
with torch.cuda.amp.autocast():
|
72 |
+
init_latent_z = (self.pipe.vae.encode(x).latent_dist.sample())
|
73 |
+
latent_z = 0.18215 * init_latent_z # scaling_factor * init_latents
|
74 |
+
|
75 |
+
with torch.inference_mode():
|
76 |
+
# sample timesteps
|
77 |
+
timestep = torch.randint(
|
78 |
+
low=50,
|
79 |
+
high=min(950, self.cfg.diffusion.timesteps) - 1, # avoid highest timestep | diffusion.timesteps=1000
|
80 |
+
size=(latent_z.shape[0],),
|
81 |
+
device=self.device, dtype=torch.long)
|
82 |
+
|
83 |
+
# add noise
|
84 |
+
eps = torch.randn_like(latent_z)
|
85 |
+
# zt = alpha_t * latent_z + sigma_t * eps
|
86 |
+
noised_latent_zt = self.pipe.scheduler.add_noise(latent_z, eps, timestep)
|
87 |
+
|
88 |
+
# denoise
|
89 |
+
z_in = torch.cat([noised_latent_zt] * 2) # expand latents for classifier free guidance
|
90 |
+
timestep_in = torch.cat([timestep] * 2)
|
91 |
+
with torch.autocast(device_type="cuda", dtype=torch.float16):
|
92 |
+
eps_t_uncond, eps_t = self.pipe.unet(z_in, timestep, encoder_hidden_states=self.text_embeddings).sample.float().chunk(2)
|
93 |
+
|
94 |
+
eps_t = eps_t_uncond + self.cfg.diffusion.guidance_scale * (eps_t - eps_t_uncond)
|
95 |
+
|
96 |
+
# w = alphas[timestep]^0.5 * (1 - alphas[timestep]) = alphas[timestep]^0.5 * sigmas[timestep]
|
97 |
+
grad_z = self.alphas[timestep]**0.5 * self.sigmas[timestep] * (eps_t - eps)
|
98 |
+
assert torch.isfinite(grad_z).all()
|
99 |
+
grad_z = torch.nan_to_num(grad_z.detach().float(), 0.0, 0.0, 0.0)
|
100 |
+
|
101 |
+
sds_loss = grad_z.clone() * latent_z
|
102 |
+
del grad_z
|
103 |
+
|
104 |
+
sds_loss = sds_loss.sum(1).mean()
|
105 |
+
return sds_loss
|
106 |
+
|
107 |
+
|
108 |
+
class ToneLoss(nn.Module):
|
109 |
+
def __init__(self, cfg):
|
110 |
+
super(ToneLoss, self).__init__()
|
111 |
+
self.dist_loss_weight = cfg.loss.tone.dist_loss_weight
|
112 |
+
self.im_init = None
|
113 |
+
self.cfg = cfg
|
114 |
+
self.mse_loss = nn.MSELoss()
|
115 |
+
self.blurrer = torchvision.transforms.GaussianBlur(kernel_size=(cfg.loss.tone.pixel_dist_kernel_blur,
|
116 |
+
cfg.loss.tone.pixel_dist_kernel_blur), sigma=(cfg.loss.tone.pixel_dist_sigma))
|
117 |
+
|
118 |
+
def set_image_init(self, im_init):
|
119 |
+
self.im_init = im_init.permute(2, 0, 1).unsqueeze(0)
|
120 |
+
self.init_blurred = self.blurrer(self.im_init)
|
121 |
+
|
122 |
+
|
123 |
+
def get_scheduler(self, step=None):
|
124 |
+
if step is not None:
|
125 |
+
return self.dist_loss_weight * np.exp(-(1/5)*((step-300)/(20)) ** 2)
|
126 |
+
else:
|
127 |
+
return self.dist_loss_weight
|
128 |
+
|
129 |
+
def forward(self, cur_raster, step=None):
|
130 |
+
blurred_cur = self.blurrer(cur_raster)
|
131 |
+
return self.mse_loss(self.init_blurred.detach(), blurred_cur) * self.get_scheduler(step)
|
132 |
+
|
133 |
+
|
134 |
+
class ConformalLoss:
|
135 |
+
def __init__(self, parameters: EasyDict, device: torch.device, target_letter: str, shape_groups):
|
136 |
+
self.parameters = parameters
|
137 |
+
self.target_letter = target_letter
|
138 |
+
self.shape_groups = shape_groups
|
139 |
+
self.faces = self.init_faces(device)
|
140 |
+
self.faces_roll_a = [torch.roll(self.faces[i], 1, 1) for i in range(len(self.faces))]
|
141 |
+
|
142 |
+
with torch.no_grad():
|
143 |
+
self.angles = []
|
144 |
+
self.reset()
|
145 |
+
|
146 |
+
|
147 |
+
def get_angles(self, points: torch.Tensor) -> torch.Tensor:
|
148 |
+
angles_ = []
|
149 |
+
for i in range(len(self.faces)):
|
150 |
+
triangles = points[self.faces[i]]
|
151 |
+
triangles_roll_a = points[self.faces_roll_a[i]]
|
152 |
+
edges = triangles_roll_a - triangles
|
153 |
+
length = edges.norm(dim=-1)
|
154 |
+
edges = edges / (length + 1e-1)[:, :, None]
|
155 |
+
edges_roll = torch.roll(edges, 1, 1)
|
156 |
+
cosine = torch.einsum('ned,ned->ne', edges, edges_roll)
|
157 |
+
angles = torch.arccos(cosine)
|
158 |
+
angles_.append(angles)
|
159 |
+
return angles_
|
160 |
+
|
161 |
+
def get_letter_inds(self, letter_to_insert):
|
162 |
+
for group, l in zip(self.shape_groups, self.target_letter):
|
163 |
+
if l == letter_to_insert:
|
164 |
+
letter_inds = group.shape_ids
|
165 |
+
return letter_inds[0], letter_inds[-1], len(letter_inds)
|
166 |
+
|
167 |
+
def reset(self):
|
168 |
+
points = torch.cat([point.clone().detach() for point in self.parameters.point])
|
169 |
+
self.angles = self.get_angles(points)
|
170 |
+
|
171 |
+
def init_faces(self, device: torch.device) -> torch.tensor:
|
172 |
+
faces_ = []
|
173 |
+
for j, c in enumerate(self.target_letter):
|
174 |
+
points_np = [self.parameters.point[i].clone().detach().cpu().numpy() for i in range(len(self.parameters.point))]
|
175 |
+
start_ind, end_ind, shapes_per_letter = self.get_letter_inds(c)
|
176 |
+
print(c, start_ind, end_ind)
|
177 |
+
holes = []
|
178 |
+
if shapes_per_letter > 1:
|
179 |
+
holes = points_np[start_ind+1:end_ind]
|
180 |
+
poly = Polygon(points_np[start_ind], holes=holes)
|
181 |
+
poly = poly.buffer(0)
|
182 |
+
points_np = np.concatenate(points_np)
|
183 |
+
faces = Delaunay(points_np).simplices
|
184 |
+
is_intersect = np.array([poly.contains(Point(points_np[face].mean(0))) for face in faces], dtype=np.bool_)
|
185 |
+
faces_.append(torch.from_numpy(faces[is_intersect]).to(device, dtype=torch.int64))
|
186 |
+
return faces_
|
187 |
+
|
188 |
+
def __call__(self) -> torch.Tensor:
|
189 |
+
loss_angles = 0
|
190 |
+
points = torch.cat(self.parameters.point)
|
191 |
+
angles = self.get_angles(points)
|
192 |
+
for i in range(len(self.faces)):
|
193 |
+
loss_angles += (nnf.mse_loss(angles[i], self.angles[i]))
|
194 |
+
return loss_angles
|
195 |
+
|
196 |
+
|
197 |
+
|
198 |
+
|
code/main.py
ADDED
@@ -0,0 +1,184 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Mapping
|
2 |
+
import os
|
3 |
+
from tqdm import tqdm
|
4 |
+
from easydict import EasyDict as edict
|
5 |
+
import matplotlib.pyplot as plt
|
6 |
+
import torch
|
7 |
+
from torch.optim.lr_scheduler import LambdaLR
|
8 |
+
import pydiffvg
|
9 |
+
import save_svg
|
10 |
+
from losses import SDSLoss, ToneLoss, ConformalLoss
|
11 |
+
from config import set_config
|
12 |
+
from utils import (
|
13 |
+
check_and_create_dir,
|
14 |
+
get_data_augs,
|
15 |
+
save_image,
|
16 |
+
preprocess,
|
17 |
+
learning_rate_decay,
|
18 |
+
combine_word,
|
19 |
+
create_video)
|
20 |
+
import wandb
|
21 |
+
import warnings
|
22 |
+
warnings.filterwarnings("ignore")
|
23 |
+
|
24 |
+
pydiffvg.set_print_timing(False)
|
25 |
+
gamma = 1.0
|
26 |
+
|
27 |
+
|
28 |
+
def init_shapes(svg_path, trainable: Mapping[str, bool]):
|
29 |
+
|
30 |
+
svg = f'{svg_path}.svg'
|
31 |
+
canvas_width, canvas_height, shapes_init, shape_groups_init = pydiffvg.svg_to_scene(svg)
|
32 |
+
|
33 |
+
parameters = edict()
|
34 |
+
|
35 |
+
# path points
|
36 |
+
if trainable.point:
|
37 |
+
parameters.point = []
|
38 |
+
for path in shapes_init:
|
39 |
+
path.points.requires_grad = True
|
40 |
+
parameters.point.append(path.points)
|
41 |
+
|
42 |
+
return shapes_init, shape_groups_init, parameters
|
43 |
+
|
44 |
+
|
45 |
+
if __name__ == "__main__":
|
46 |
+
|
47 |
+
cfg = set_config()
|
48 |
+
|
49 |
+
# use GPU if available
|
50 |
+
pydiffvg.set_use_gpu(torch.cuda.is_available())
|
51 |
+
device = pydiffvg.get_device()
|
52 |
+
|
53 |
+
# cfg.word = cfg.word[::-1]
|
54 |
+
|
55 |
+
print("preprocessing")
|
56 |
+
preprocess(cfg.font, cfg.word, cfg.optimized_letter, cfg.level_of_cc)
|
57 |
+
|
58 |
+
if cfg.loss.use_sds_loss:
|
59 |
+
sds_loss = SDSLoss(cfg, device)
|
60 |
+
|
61 |
+
h, w = cfg.render_size, cfg.render_size
|
62 |
+
|
63 |
+
data_augs = get_data_augs(cfg.cut_size)
|
64 |
+
|
65 |
+
render = pydiffvg.RenderFunction.apply
|
66 |
+
|
67 |
+
# initialize shape
|
68 |
+
print('initializing shape')
|
69 |
+
shapes, shape_groups, parameters = init_shapes(svg_path=cfg.target, trainable=cfg.trainable)
|
70 |
+
|
71 |
+
scene_args = pydiffvg.RenderFunction.serialize_scene(w, h, shapes, shape_groups)
|
72 |
+
img_init = render(w, h, 2, 2, 0, None, *scene_args)
|
73 |
+
img_init = img_init[:, :, 3:4] * img_init[:, :, :3] + \
|
74 |
+
torch.ones(img_init.shape[0], img_init.shape[1], 3, device=device) * (1 - img_init[:, :, 3:4])
|
75 |
+
img_init = img_init[:, :, :3]
|
76 |
+
if cfg.use_wandb:
|
77 |
+
plt.imshow(img_init.detach().cpu())
|
78 |
+
wandb.log({"init": wandb.Image(plt)}, step=0)
|
79 |
+
plt.close()
|
80 |
+
|
81 |
+
if cfg.loss.tone.use_tone_loss:
|
82 |
+
tone_loss = ToneLoss(cfg)
|
83 |
+
tone_loss.set_image_init(img_init)
|
84 |
+
|
85 |
+
if cfg.save.init:
|
86 |
+
print('saving init')
|
87 |
+
filename = os.path.join(
|
88 |
+
cfg.experiment_dir, "svg-init", "init.svg")
|
89 |
+
check_and_create_dir(filename)
|
90 |
+
save_svg.save_svg(filename, w, h, shapes, shape_groups)
|
91 |
+
|
92 |
+
num_iter = cfg.num_iter
|
93 |
+
pg = [{'params': parameters["point"], 'lr': cfg.lr_base["point"]}]
|
94 |
+
optim = torch.optim.Adam(pg, betas=(0.9, 0.9), eps=1e-6)
|
95 |
+
|
96 |
+
if cfg.loss.conformal.use_conformal_loss:
|
97 |
+
conformal_loss = ConformalLoss(parameters, device, cfg.optimized_letter, shape_groups)
|
98 |
+
|
99 |
+
lr_lambda = lambda step: learning_rate_decay(step, cfg.lr.lr_init, cfg.lr.lr_final, num_iter,
|
100 |
+
lr_delay_steps=cfg.lr.lr_delay_steps,
|
101 |
+
lr_delay_mult=cfg.lr.lr_delay_mult) / cfg.lr.lr_init
|
102 |
+
|
103 |
+
scheduler = LambdaLR(optim, lr_lambda=lr_lambda, last_epoch=-1) # lr.base * lrlambda_f
|
104 |
+
|
105 |
+
print("start training")
|
106 |
+
# training loop
|
107 |
+
t_range = tqdm(range(num_iter))
|
108 |
+
for step in t_range:
|
109 |
+
if cfg.use_wandb:
|
110 |
+
wandb.log({"learning_rate": optim.param_groups[0]['lr']}, step=step)
|
111 |
+
optim.zero_grad()
|
112 |
+
|
113 |
+
# render image
|
114 |
+
scene_args = pydiffvg.RenderFunction.serialize_scene(w, h, shapes, shape_groups)
|
115 |
+
img = render(w, h, 2, 2, step, None, *scene_args)
|
116 |
+
|
117 |
+
# compose image with white background
|
118 |
+
img = img[:, :, 3:4] * img[:, :, :3] + torch.ones(img.shape[0], img.shape[1], 3, device=device) * (1 - img[:, :, 3:4])
|
119 |
+
img = img[:, :, :3]
|
120 |
+
|
121 |
+
if cfg.save.video and (step % cfg.save.video_frame_freq == 0 or step == num_iter - 1):
|
122 |
+
save_image(img, os.path.join(cfg.experiment_dir, "video-png", f"iter{step:04d}.png"), gamma)
|
123 |
+
filename = os.path.join(
|
124 |
+
cfg.experiment_dir, "video-svg", f"iter{step:04d}.svg")
|
125 |
+
check_and_create_dir(filename)
|
126 |
+
save_svg.save_svg(
|
127 |
+
filename, w, h, shapes, shape_groups)
|
128 |
+
if cfg.use_wandb:
|
129 |
+
plt.imshow(img.detach().cpu())
|
130 |
+
wandb.log({"img": wandb.Image(plt)}, step=step)
|
131 |
+
plt.close()
|
132 |
+
|
133 |
+
x = img.unsqueeze(0).permute(0, 3, 1, 2) # HWC -> NCHW
|
134 |
+
x = x.repeat(cfg.batch_size, 1, 1, 1)
|
135 |
+
x_aug = data_augs.forward(x)
|
136 |
+
|
137 |
+
# compute diffusion loss per pixel
|
138 |
+
loss = sds_loss(x_aug)
|
139 |
+
if cfg.use_wandb:
|
140 |
+
wandb.log({"sds_loss": loss.item()}, step=step)
|
141 |
+
|
142 |
+
if cfg.loss.tone.use_tone_loss:
|
143 |
+
tone_loss_res = tone_loss(x, step)
|
144 |
+
if cfg.use_wandb:
|
145 |
+
wandb.log({"dist_loss": tone_loss_res}, step=step)
|
146 |
+
loss = loss + tone_loss_res
|
147 |
+
|
148 |
+
if cfg.loss.conformal.use_conformal_loss:
|
149 |
+
loss_angles = conformal_loss()
|
150 |
+
loss_angles = cfg.loss.conformal.angeles_w * loss_angles
|
151 |
+
if cfg.use_wandb:
|
152 |
+
wandb.log({"loss_angles": loss_angles}, step=step)
|
153 |
+
loss = loss + loss_angles
|
154 |
+
|
155 |
+
t_range.set_postfix({'loss': loss.item()})
|
156 |
+
loss.backward()
|
157 |
+
optim.step()
|
158 |
+
scheduler.step()
|
159 |
+
|
160 |
+
filename = os.path.join(
|
161 |
+
cfg.experiment_dir, "output-svg", "output.svg")
|
162 |
+
check_and_create_dir(filename)
|
163 |
+
save_svg.save_svg(
|
164 |
+
filename, w, h, shapes, shape_groups)
|
165 |
+
|
166 |
+
combine_word(cfg.word, cfg.optimized_letter, cfg.font, cfg.experiment_dir)
|
167 |
+
|
168 |
+
if cfg.save.image:
|
169 |
+
filename = os.path.join(
|
170 |
+
cfg.experiment_dir, "output-png", "output.png")
|
171 |
+
check_and_create_dir(filename)
|
172 |
+
imshow = img.detach().cpu()
|
173 |
+
pydiffvg.imwrite(imshow, filename, gamma=gamma)
|
174 |
+
if cfg.use_wandb:
|
175 |
+
plt.imshow(img.detach().cpu())
|
176 |
+
wandb.log({"img": wandb.Image(plt)}, step=step)
|
177 |
+
plt.close()
|
178 |
+
|
179 |
+
if cfg.save.video:
|
180 |
+
print("saving video")
|
181 |
+
create_video(cfg.num_iter, cfg.experiment_dir, cfg.save.video_frame_freq)
|
182 |
+
|
183 |
+
if cfg.use_wandb:
|
184 |
+
wandb.finish()
|
code/save_svg.py
ADDED
@@ -0,0 +1,155 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import pydiffvg
|
3 |
+
import xml.etree.ElementTree as etree
|
4 |
+
from xml.dom import minidom
|
5 |
+
def prettify(elem):
|
6 |
+
"""Return a pretty-printed XML string for the Element.
|
7 |
+
"""
|
8 |
+
rough_string = etree.tostring(elem, 'utf-8')
|
9 |
+
reparsed = minidom.parseString(rough_string)
|
10 |
+
return reparsed.toprettyxml(indent=" ")
|
11 |
+
def save_svg(filename, width, height, shapes, shape_groups, use_gamma = False, background=None):
|
12 |
+
root = etree.Element('svg')
|
13 |
+
root.set('version', '1.1')
|
14 |
+
root.set('xmlns', 'http://www.w3.org/2000/svg')
|
15 |
+
root.set('width', str(width))
|
16 |
+
root.set('height', str(height))
|
17 |
+
if background is not None:
|
18 |
+
print(f"setting background to {background}")
|
19 |
+
root.set('style', str(background))
|
20 |
+
defs = etree.SubElement(root, 'defs')
|
21 |
+
g = etree.SubElement(root, 'g')
|
22 |
+
if use_gamma:
|
23 |
+
f = etree.SubElement(defs, 'filter')
|
24 |
+
f.set('id', 'gamma')
|
25 |
+
f.set('x', '0')
|
26 |
+
f.set('y', '0')
|
27 |
+
f.set('width', '100%')
|
28 |
+
f.set('height', '100%')
|
29 |
+
gamma = etree.SubElement(f, 'feComponentTransfer')
|
30 |
+
gamma.set('color-interpolation-filters', 'sRGB')
|
31 |
+
feFuncR = etree.SubElement(gamma, 'feFuncR')
|
32 |
+
feFuncR.set('type', 'gamma')
|
33 |
+
feFuncR.set('amplitude', str(1))
|
34 |
+
feFuncR.set('exponent', str(1/2.2))
|
35 |
+
feFuncG = etree.SubElement(gamma, 'feFuncG')
|
36 |
+
feFuncG.set('type', 'gamma')
|
37 |
+
feFuncG.set('amplitude', str(1))
|
38 |
+
feFuncG.set('exponent', str(1/2.2))
|
39 |
+
feFuncB = etree.SubElement(gamma, 'feFuncB')
|
40 |
+
feFuncB.set('type', 'gamma')
|
41 |
+
feFuncB.set('amplitude', str(1))
|
42 |
+
feFuncB.set('exponent', str(1/2.2))
|
43 |
+
feFuncA = etree.SubElement(gamma, 'feFuncA')
|
44 |
+
feFuncA.set('type', 'gamma')
|
45 |
+
feFuncA.set('amplitude', str(1))
|
46 |
+
feFuncA.set('exponent', str(1/2.2))
|
47 |
+
g.set('style', 'filter:url(#gamma)')
|
48 |
+
# Store color
|
49 |
+
for i, shape_group in enumerate(shape_groups):
|
50 |
+
def add_color(shape_color, name):
|
51 |
+
if isinstance(shape_color, pydiffvg.LinearGradient):
|
52 |
+
lg = shape_color
|
53 |
+
color = etree.SubElement(defs, 'linearGradient')
|
54 |
+
color.set('id', name)
|
55 |
+
color.set('x1', str(lg.begin[0].item()/width))
|
56 |
+
color.set('y1', str(lg.begin[1].item()/height))
|
57 |
+
color.set('x2', str(lg.end[0].item()/width))
|
58 |
+
color.set('y2', str(lg.end[1].item()/height))
|
59 |
+
offsets = lg.offsets.data.cpu().numpy()
|
60 |
+
stop_colors = lg.stop_colors.data.cpu().numpy()
|
61 |
+
for j in range(offsets.shape[0]):
|
62 |
+
stop = etree.SubElement(color, 'stop')
|
63 |
+
stop.set('offset', str(offsets[j]))
|
64 |
+
c = lg.stop_colors[j, :]
|
65 |
+
stop.set('stop-color', 'rgb({}, {}, {})'.format(\
|
66 |
+
int(255 * c[0]), int(255 * c[1]), int(255 * c[2])))
|
67 |
+
stop.set('stop-opacity', '{}'.format(c[3]))
|
68 |
+
if isinstance(shape_color, pydiffvg.RadialGradient):
|
69 |
+
lg = shape_color
|
70 |
+
color = etree.SubElement(defs, 'radialGradient')
|
71 |
+
color.set('id', name)
|
72 |
+
color.set('cx', str(lg.center[0].item()/width))
|
73 |
+
color.set('cy', str(lg.center[1].item()/height))
|
74 |
+
# this only support width=height
|
75 |
+
color.set('r', str(lg.radius[0].item()/width))
|
76 |
+
offsets = lg.offsets.data.cpu().numpy()
|
77 |
+
stop_colors = lg.stop_colors.data.cpu().numpy()
|
78 |
+
for j in range(offsets.shape[0]):
|
79 |
+
stop = etree.SubElement(color, 'stop')
|
80 |
+
stop.set('offset', str(offsets[j]))
|
81 |
+
c = lg.stop_colors[j, :]
|
82 |
+
stop.set('stop-color', 'rgb({}, {}, {})'.format(\
|
83 |
+
int(255 * c[0]), int(255 * c[1]), int(255 * c[2])))
|
84 |
+
stop.set('stop-opacity', '{}'.format(c[3]))
|
85 |
+
if shape_group.fill_color is not None:
|
86 |
+
add_color(shape_group.fill_color, 'shape_{}_fill'.format(i))
|
87 |
+
if shape_group.stroke_color is not None:
|
88 |
+
add_color(shape_group.stroke_color, 'shape_{}_stroke'.format(i))
|
89 |
+
for i, shape_group in enumerate(shape_groups):
|
90 |
+
# shape = shapes[shape_group.shape_ids[0]]
|
91 |
+
for j,id in enumerate(shape_group.shape_ids):
|
92 |
+
shape = shapes[id]
|
93 |
+
if isinstance(shape, pydiffvg.Path):
|
94 |
+
if j == 0:
|
95 |
+
shape_node = etree.SubElement(g, 'path')
|
96 |
+
path_str = ''
|
97 |
+
# shape_node = etree.SubElement(g, 'path')
|
98 |
+
num_segments = shape.num_control_points.shape[0]
|
99 |
+
num_control_points = shape.num_control_points.data.cpu().numpy()
|
100 |
+
points = shape.points.data.cpu().numpy()
|
101 |
+
num_points = shape.points.shape[0]
|
102 |
+
path_str += 'M {} {}'.format(points[0, 0], points[0, 1])
|
103 |
+
point_id = 1
|
104 |
+
for j in range(0, num_segments):
|
105 |
+
if num_control_points[j] == 0:
|
106 |
+
p = point_id % num_points
|
107 |
+
path_str += ' L {} {}'.format(\
|
108 |
+
points[p, 0], points[p, 1])
|
109 |
+
point_id += 1
|
110 |
+
elif num_control_points[j] == 1:
|
111 |
+
p1 = (point_id + 1) % num_points
|
112 |
+
path_str += ' Q {} {} {} {}'.format(\
|
113 |
+
points[point_id, 0], points[point_id, 1],
|
114 |
+
points[p1, 0], points[p1, 1])
|
115 |
+
point_id += 2
|
116 |
+
elif num_control_points[j] == 2:
|
117 |
+
p2 = (point_id + 2) % num_points
|
118 |
+
path_str += ' C {} {} {} {} {} {}'.format(\
|
119 |
+
points[point_id, 0], points[point_id, 1],
|
120 |
+
points[point_id + 1, 0], points[point_id + 1, 1],
|
121 |
+
points[p2, 0], points[p2, 1])
|
122 |
+
point_id += 3
|
123 |
+
else:
|
124 |
+
assert(False)
|
125 |
+
# shape_node.set('stroke-width', str(2 * shape.stroke_width.data.cpu().item()))
|
126 |
+
shape_node.set('stroke-width', str(0)) # no strokes
|
127 |
+
if shape_group.fill_color is not None:
|
128 |
+
if isinstance(shape_group.fill_color, pydiffvg.LinearGradient):
|
129 |
+
shape_node.set('fill', 'url(#shape_{}_fill)'.format(i))
|
130 |
+
elif isinstance(shape_group.fill_color, pydiffvg.RadialGradient):
|
131 |
+
shape_node.set('fill', 'url(#shape_{}_fill)'.format(i))
|
132 |
+
else:
|
133 |
+
c = shape_group.fill_color.data.cpu().numpy()
|
134 |
+
shape_node.set('fill', 'rgb({}, {}, {})'.format(\
|
135 |
+
int(255 * c[0]), int(255 * c[1]), int(255 * c[2])))
|
136 |
+
shape_node.set('opacity', str(c[3]))
|
137 |
+
else:
|
138 |
+
shape_node.set('fill', 'none')
|
139 |
+
if shape_group.stroke_color is not None:
|
140 |
+
if isinstance(shape_group.stroke_color, pydiffvg.LinearGradient):
|
141 |
+
shape_node.set('stroke', 'url(#shape_{}_stroke)'.format(i))
|
142 |
+
elif isinstance(shape_group.stroke_color, pydiffvg.LinearGradient):
|
143 |
+
shape_node.set('stroke', 'url(#shape_{}_stroke)'.format(i))
|
144 |
+
else:
|
145 |
+
c = shape_group.stroke_color.data.cpu().numpy()
|
146 |
+
shape_node.set('stroke', 'rgb({}, {}, {})'.format(\
|
147 |
+
int(255 * c[0]), int(255 * c[1]), int(255 * c[2])))
|
148 |
+
shape_node.set('stroke-opacity', str(c[3]))
|
149 |
+
shape_node.set('stroke-linecap', 'round')
|
150 |
+
shape_node.set('stroke-linejoin', 'round')
|
151 |
+
|
152 |
+
shape_node.set('d', path_str)
|
153 |
+
|
154 |
+
with open(filename, "w") as f:
|
155 |
+
f.write(prettify(root))
|
code/ttf.py
ADDED
@@ -0,0 +1,409 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from importlib import reload
|
2 |
+
import os
|
3 |
+
import numpy as np
|
4 |
+
import bezier
|
5 |
+
import freetype as ft
|
6 |
+
import pydiffvg
|
7 |
+
import torch
|
8 |
+
import save_svg
|
9 |
+
import vharfbuzz as hb
|
10 |
+
from svgpathtools import svgstr2paths
|
11 |
+
import xml.etree.ElementTree as ET
|
12 |
+
|
13 |
+
|
14 |
+
device = torch.device("cuda" if (
|
15 |
+
torch.cuda.is_available() and torch.cuda.device_count() > 0) else "cpu")
|
16 |
+
|
17 |
+
reload(bezier)
|
18 |
+
|
19 |
+
def fix_single_svg(svg_path, all_word=False):
|
20 |
+
target_h_letter = 360
|
21 |
+
target_canvas_width, target_canvas_height = 600, 600
|
22 |
+
|
23 |
+
canvas_width, canvas_height, shapes, shape_groups = pydiffvg.svg_to_scene(svg_path)
|
24 |
+
|
25 |
+
letter_h = canvas_height
|
26 |
+
letter_w = canvas_width
|
27 |
+
|
28 |
+
if all_word:
|
29 |
+
if letter_w > letter_h:
|
30 |
+
scale_canvas_w = target_h_letter / letter_w
|
31 |
+
hsize = int(letter_h * scale_canvas_w)
|
32 |
+
scale_canvas_h = hsize / letter_h
|
33 |
+
else:
|
34 |
+
scale_canvas_h = target_h_letter / letter_h
|
35 |
+
wsize = int(letter_w * scale_canvas_h)
|
36 |
+
scale_canvas_w = wsize / letter_w
|
37 |
+
else:
|
38 |
+
scale_canvas_h = target_h_letter / letter_h
|
39 |
+
wsize = int(letter_w * scale_canvas_h)
|
40 |
+
scale_canvas_w = wsize / letter_w
|
41 |
+
|
42 |
+
for num, p in enumerate(shapes):
|
43 |
+
p.points[:, 0] = p.points[:, 0] * scale_canvas_w
|
44 |
+
p.points[:, 1] = p.points[:, 1] * scale_canvas_h + target_h_letter
|
45 |
+
p.points[:, 1] = -p.points[:, 1]
|
46 |
+
# p.points[:, 0] = -p.points[:, 0]
|
47 |
+
|
48 |
+
w_min, w_max = min([torch.min(p.points[:, 0]) for p in shapes]), max([torch.max(p.points[:, 0]) for p in shapes])
|
49 |
+
h_min, h_max = min([torch.min(p.points[:, 1]) for p in shapes]), max([torch.max(p.points[:, 1]) for p in shapes])
|
50 |
+
|
51 |
+
for num, p in enumerate(shapes):
|
52 |
+
p.points[:, 0] = p.points[:, 0] + target_canvas_width/2 - int(w_min + (w_max - w_min) / 2)
|
53 |
+
p.points[:, 1] = p.points[:, 1] + target_canvas_height/2 - int(h_min + (h_max - h_min) / 2)
|
54 |
+
|
55 |
+
output_path = f"{svg_path[:-4]}_scaled.svg"
|
56 |
+
save_svg.save_svg(output_path, target_canvas_width, target_canvas_height, shapes, shape_groups)
|
57 |
+
|
58 |
+
def normalize_letter_size(dest_path, font, txt, chars):
|
59 |
+
fontname = os.path.splitext(os.path.basename(font))[0]
|
60 |
+
# for i, c in enumerate(chars):
|
61 |
+
# fname = f"{dest_path}/{fontname}_{c}.svg"
|
62 |
+
# fname = fname.replace(" ", "_")
|
63 |
+
# fix_single_svg(fname)
|
64 |
+
|
65 |
+
fname = f"{dest_path}/{fontname}_{txt}.svg"
|
66 |
+
fname = fname.replace(" ", "_")
|
67 |
+
fix_single_svg(fname, all_word=True)
|
68 |
+
|
69 |
+
|
70 |
+
def glyph_to_cubics(face, x=0, y=0):
|
71 |
+
''' Convert current font face glyph to cubic beziers'''
|
72 |
+
|
73 |
+
def linear_to_cubic(Q):
|
74 |
+
a, b = Q
|
75 |
+
return [a + (b - a) * t for t in np.linspace(0, 1, 4)]
|
76 |
+
|
77 |
+
def quadratic_to_cubic(Q):
|
78 |
+
return [Q[0],
|
79 |
+
Q[0] + (2 / 3) * (Q[1] - Q[0]),
|
80 |
+
Q[2] + (2 / 3) * (Q[1] - Q[2]),
|
81 |
+
Q[2]]
|
82 |
+
|
83 |
+
beziers = []
|
84 |
+
pt = lambda p: np.array([x + p.x, - p.y - y]) # Flipping here since freetype has y-up
|
85 |
+
last = lambda: beziers[-1][-1]
|
86 |
+
|
87 |
+
def move_to(a, beziers):
|
88 |
+
beziers.append([pt(a)])
|
89 |
+
|
90 |
+
def line_to(a, beziers):
|
91 |
+
Q = linear_to_cubic([last(), pt(a)])
|
92 |
+
beziers[-1] += Q[1:]
|
93 |
+
|
94 |
+
def conic_to(a, b, beziers):
|
95 |
+
Q = quadratic_to_cubic([last(), pt(a), pt(b)])
|
96 |
+
beziers[-1] += Q[1:]
|
97 |
+
|
98 |
+
def cubic_to(a, b, c, beziers):
|
99 |
+
beziers[-1] += [pt(a), pt(b), pt(c)]
|
100 |
+
|
101 |
+
face.glyph.outline.decompose(beziers, move_to=move_to, line_to=line_to, conic_to=conic_to, cubic_to=cubic_to)
|
102 |
+
beziers = [np.array(C).astype(float) for C in beziers]
|
103 |
+
return beziers
|
104 |
+
|
105 |
+
# def handle_ligature(glyph_infos, glyph_positions):
|
106 |
+
# combined_advance = sum(pos.x_advance for pos in glyph_positions)
|
107 |
+
# first_x_offset = glyph_positions[0].x_offset
|
108 |
+
|
109 |
+
# combined_advance = x_adv_1 + x_adv_2
|
110 |
+
|
111 |
+
|
112 |
+
|
113 |
+
|
114 |
+
# # Adjust the x_offset values based on the difference between the first glyph's x_offset and the combined_advance
|
115 |
+
# for pos in glyph_positions:
|
116 |
+
# pos.x_offset += combined_advance - pos.x_advance - first_x_offset
|
117 |
+
|
118 |
+
# # Render the ligature using the adjusted glyph positions
|
119 |
+
# render_glyphs(glyph_infos, glyph_positions)
|
120 |
+
|
121 |
+
|
122 |
+
def font_string_to_beziers(font, txt, size=30, spacing=1.0, merge=True, target_control=None):
|
123 |
+
''' Load a font and convert the outlines for a given string to cubic bezier curves,
|
124 |
+
if merge is True, simply return a list of all bezier curves,
|
125 |
+
otherwise return a list of lists with the bezier curves for each glyph'''
|
126 |
+
print(font)
|
127 |
+
|
128 |
+
vhb = hb.Vharfbuzz(font)
|
129 |
+
buf = vhb.shape(txt, {"features": {"kern": True, "liga": True}})
|
130 |
+
|
131 |
+
buf.guess_segment_properties()
|
132 |
+
|
133 |
+
glyph_infos = buf.glyph_infos
|
134 |
+
glyph_positions = buf.glyph_positions
|
135 |
+
glyph_count = {glyph_infos[i].cluster: 0 for i in range(len(glyph_infos))}
|
136 |
+
|
137 |
+
svg = vhb.buf_to_svg(buf)
|
138 |
+
paths, attributes = svgstr2paths(svg)
|
139 |
+
|
140 |
+
face = ft.Face(font)
|
141 |
+
face.set_char_size(64 * size)
|
142 |
+
pindex = -1
|
143 |
+
|
144 |
+
x, y = 0, 0
|
145 |
+
beziers, chars = [], []
|
146 |
+
|
147 |
+
for path_idx, path in enumerate(paths):
|
148 |
+
segment_vals = []
|
149 |
+
print("="*20 + str(path_idx) + "="*20)
|
150 |
+
for segment in path:
|
151 |
+
segment_type = segment.__class__.__name__
|
152 |
+
t_values = np.linspace(0, 1, 10)
|
153 |
+
points = [segment.point(t) for t in t_values]
|
154 |
+
for pt in points:
|
155 |
+
segment_vals += [[pt.real, -pt.imag]]
|
156 |
+
|
157 |
+
# points = [bezier.point(t) for t in t_values]
|
158 |
+
|
159 |
+
if segment_type == 'Line':
|
160 |
+
# Line segment
|
161 |
+
start = segment.start
|
162 |
+
end = segment.end
|
163 |
+
print(f"Line: ({start.real}, {start.imag}) to ({end.real}, {end.imag})")
|
164 |
+
|
165 |
+
elif segment_type == 'QuadraticBezier':
|
166 |
+
# Quadratic Bézier segment
|
167 |
+
start = segment.start
|
168 |
+
control = segment.control
|
169 |
+
end = segment.end
|
170 |
+
print(f"Quadratic Bézier: ({start.real}, {start.imag}) to ({end.real}, {end.imag}) with control point ({control.real}, {control.imag})")
|
171 |
+
|
172 |
+
elif segment_type == 'CubicBezier':
|
173 |
+
# Cubic Bézier segment
|
174 |
+
start = segment.start
|
175 |
+
control1 = segment.control1
|
176 |
+
control2 = segment.control2
|
177 |
+
end = segment.end
|
178 |
+
print(f"Cubic Bézier: ({start.real}, {start.imag}) to ({end.real}, {end.imag}) with control points ({control1.real}, {control1.imag}) and ({control2.real}, {control2.imag})")
|
179 |
+
|
180 |
+
else:
|
181 |
+
# Other segment types (Arc, Close)
|
182 |
+
print(f"Segment type: {segment_type}")
|
183 |
+
|
184 |
+
beziers += [[np.array(segment_vals)]]
|
185 |
+
|
186 |
+
beziers_2 = []
|
187 |
+
glyph_infos = glyph_infos[::-1]
|
188 |
+
glyph_positions = glyph_positions[::-1]
|
189 |
+
for i, (info, pos) in enumerate(zip(glyph_infos, glyph_positions)):
|
190 |
+
index = info.cluster
|
191 |
+
c = f"{txt[index]}_{glyph_count[index]}"
|
192 |
+
chars += [c]
|
193 |
+
glyph_count[index] += 1
|
194 |
+
glyph_index = info.codepoint
|
195 |
+
face.load_glyph(glyph_index, flags=ft.FT_LOAD_DEFAULT | ft.FT_LOAD_NO_BITMAP)
|
196 |
+
# face.load_char(c, ft.FT_LOAD_DEFAULT | ft.FT_LOAD_NO_BITMAP)
|
197 |
+
|
198 |
+
findex = -1
|
199 |
+
if i+1 < len(glyph_infos):
|
200 |
+
findex = glyph_infos[i+1].cluster
|
201 |
+
foffset = (glyph_positions[i+1].x_offset, glyph_positions[i+1].y_offset)
|
202 |
+
fadvance = (glyph_positions[i+1].x_advance, glyph_positions[i+1].y_advance)
|
203 |
+
|
204 |
+
# bez = glyph_to_cubics(face, x+pos.x_offset+pos.x_advance, y+pos.y_offset+pos.y_advance)
|
205 |
+
# if findex != index:
|
206 |
+
# x += pos.x_offset
|
207 |
+
# y += pos.y_offset
|
208 |
+
# else:
|
209 |
+
# x += pos.x_offset
|
210 |
+
# y += pos.y_offset
|
211 |
+
|
212 |
+
|
213 |
+
bez = glyph_to_cubics(face, x, y)
|
214 |
+
|
215 |
+
|
216 |
+
# Check number of control points if desired
|
217 |
+
if target_control is not None:
|
218 |
+
if c in target_control.keys():
|
219 |
+
nctrl = np.sum([len(C) for C in bez])
|
220 |
+
while nctrl < target_control[c]:
|
221 |
+
longest = np.max(
|
222 |
+
sum([[bezier.approx_arc_length(b) for b in bezier.chain_to_beziers(C)] for C in bez], []))
|
223 |
+
thresh = longest * 0.5
|
224 |
+
bez = [bezier.subdivide_bezier_chain(C, thresh) for C in bez]
|
225 |
+
nctrl = np.sum([len(C) for C in bez])
|
226 |
+
print(nctrl)
|
227 |
+
|
228 |
+
if merge:
|
229 |
+
beziers_2 += bez
|
230 |
+
else:
|
231 |
+
beziers_2.append(bez)
|
232 |
+
|
233 |
+
# kerning = face.get_kerning(index, findex)
|
234 |
+
# x += (slot.advance.x + kerning.x) * spacing
|
235 |
+
# previous = txt[index]
|
236 |
+
|
237 |
+
# print(f"C: {txt[index]}/{index} | X: {x+pos.x_offset}| Y: {y+pos.y_offset}")
|
238 |
+
print(f"C: {txt[index]}/{index} | X: {x}: {pos.x_advance}/{pos.x_offset} | Y: {y}: {pos.y_advance}/{pos.y_offset}")
|
239 |
+
|
240 |
+
# if findex != index:
|
241 |
+
x -= pos.x_advance
|
242 |
+
# y += pos.y_advance + pos.y_offset
|
243 |
+
|
244 |
+
pindex = index
|
245 |
+
|
246 |
+
return beziers_2, chars
|
247 |
+
|
248 |
+
|
249 |
+
def bezier_chain_to_commands(C, closed=True):
|
250 |
+
curves = bezier.chain_to_beziers(C)
|
251 |
+
cmds = 'M %f %f ' % (C[0][0], C[0][1])
|
252 |
+
n = len(curves)
|
253 |
+
for i, bez in enumerate(curves):
|
254 |
+
if i == n - 1 and closed:
|
255 |
+
cmds += 'C %f %f %f %f %f %fz ' % (*bez[1], *bez[2], *bez[3])
|
256 |
+
else:
|
257 |
+
cmds += 'C %f %f %f %f %f %f ' % (*bez[1], *bez[2], *bez[3])
|
258 |
+
return cmds
|
259 |
+
|
260 |
+
|
261 |
+
def count_cp(file_name, font_name):
|
262 |
+
canvas_width, canvas_height, shapes, shape_groups = pydiffvg.svg_to_scene(file_name)
|
263 |
+
p_counter = 0
|
264 |
+
for path in shapes:
|
265 |
+
p_counter += path.points.shape[0]
|
266 |
+
print(f"TOTAL CP: [{p_counter}]")
|
267 |
+
return p_counter
|
268 |
+
|
269 |
+
|
270 |
+
def write_letter_svg(c, header, fontname, beziers, subdivision_thresh, dest_path):
|
271 |
+
cmds = ''
|
272 |
+
svg = header
|
273 |
+
|
274 |
+
path = '<g><path d="'
|
275 |
+
for C in beziers:
|
276 |
+
if subdivision_thresh is not None:
|
277 |
+
print('subd')
|
278 |
+
C = bezier.subdivide_bezier_chain(C, subdivision_thresh)
|
279 |
+
cmds += bezier_chain_to_commands(C, True)
|
280 |
+
path += cmds + '"/>\n'
|
281 |
+
svg += path + '</g></svg>\n'
|
282 |
+
|
283 |
+
fname = f"{dest_path}/{fontname}_{c}.svg"
|
284 |
+
fname = fname.replace(" ", "_")
|
285 |
+
f = open(fname, 'w')
|
286 |
+
f.write(svg)
|
287 |
+
f.close()
|
288 |
+
return fname, path
|
289 |
+
|
290 |
+
def write_letter_svg_hb(vhb, c, dest_path, fontname):
|
291 |
+
buf = vhb.shape(c, {"features": {"kern": True, "liga": True}})
|
292 |
+
svg = vhb.buf_to_svg(buf)
|
293 |
+
|
294 |
+
fname = f"{dest_path}/{fontname}_{c}.svg"
|
295 |
+
fname = fname.replace(" ", "_")
|
296 |
+
f = open(fname, 'w')
|
297 |
+
f.write(svg)
|
298 |
+
f.close()
|
299 |
+
return fname
|
300 |
+
|
301 |
+
def font_string_to_svgs(dest_path, font, txt, size=30, spacing=1.0, target_control=None, subdivision_thresh=None):
|
302 |
+
|
303 |
+
fontname = os.path.splitext(os.path.basename(font))[0]
|
304 |
+
glyph_beziers, chars = font_string_to_beziers(font, txt, size, spacing, merge=False, target_control=target_control)
|
305 |
+
if not os.path.isdir(dest_path):
|
306 |
+
os.mkdir(dest_path)
|
307 |
+
# Compute boundig box
|
308 |
+
points = np.vstack(sum(glyph_beziers, []))
|
309 |
+
lt = np.min(points, axis=0)
|
310 |
+
rb = np.max(points, axis=0)
|
311 |
+
size = rb - lt
|
312 |
+
|
313 |
+
sizestr = 'width="%.1f" height="%.1f"' % (size[0], size[1])
|
314 |
+
boxstr = ' viewBox="%.1f %.1f %.1f %.1f"' % (lt[0], lt[1], size[0], size[1])
|
315 |
+
header = '''<?xml version="1.0" encoding="utf-8"?>
|
316 |
+
<svg xmlns="http://www.w3.org/2000/svg" xmlns:ev="http://www.w3.org/2001/xml-events" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" baseProfile="full" '''
|
317 |
+
header += sizestr
|
318 |
+
header += boxstr
|
319 |
+
header += '>\n<defs/>\n'
|
320 |
+
|
321 |
+
svg_all = header
|
322 |
+
|
323 |
+
print(f"Len Glyph Bezier: {len(glyph_beziers)} | Chars: {len(chars)}")
|
324 |
+
for i, (c, beziers) in enumerate(zip(chars, glyph_beziers)):
|
325 |
+
print(f"==== {c} ====")
|
326 |
+
fname, path = write_letter_svg(c, header, fontname, beziers, subdivision_thresh, dest_path)
|
327 |
+
|
328 |
+
num_cp = count_cp(fname, fontname)
|
329 |
+
print(num_cp)
|
330 |
+
print(font, c)
|
331 |
+
# Add to global svg
|
332 |
+
svg_all += path + '</g>\n'
|
333 |
+
|
334 |
+
vhb = hb.Vharfbuzz(font)
|
335 |
+
buf = vhb.shape(txt, {"features": {"kern": True, "liga": True}})
|
336 |
+
svg = vhb.buf_to_svg(buf)
|
337 |
+
|
338 |
+
# Save global svg
|
339 |
+
svg_all += '</svg>\n'
|
340 |
+
fname = f"{dest_path}/{fontname}_{txt}.svg"
|
341 |
+
fname = fname.replace(" ", "_")
|
342 |
+
f = open(fname, 'w')
|
343 |
+
f.write(svg)
|
344 |
+
f.close()
|
345 |
+
return chars
|
346 |
+
|
347 |
+
def font_string_to_svgs_hb(dest_path, font, txt, size=30, spacing=1.0, target_control=None, subdivision_thresh=None):
|
348 |
+
|
349 |
+
fontname = os.path.splitext(os.path.basename(font))[0]
|
350 |
+
if not os.path.isdir(dest_path):
|
351 |
+
os.mkdir(dest_path)
|
352 |
+
|
353 |
+
vhb = hb.Vharfbuzz(font)
|
354 |
+
buf = vhb.shape(txt, {"features": {"kern": True, "liga": True}})
|
355 |
+
buf.guess_segment_properties()
|
356 |
+
|
357 |
+
buf = vhb.shape(txt, {"features": {"kern": True, "liga": True}})
|
358 |
+
svg = vhb.buf_to_svg(buf)
|
359 |
+
|
360 |
+
# Save global svg
|
361 |
+
fname = f"{dest_path}/{fontname}_{txt}.svg"
|
362 |
+
fname = fname.replace(" ", "_")
|
363 |
+
f = open(fname, 'w')
|
364 |
+
f.write(svg)
|
365 |
+
f.close()
|
366 |
+
return None
|
367 |
+
|
368 |
+
if __name__ == '__main__':
|
369 |
+
|
370 |
+
fonts = ["KaushanScript-Regular"]
|
371 |
+
level_of_cc = 1
|
372 |
+
|
373 |
+
if level_of_cc == 0:
|
374 |
+
target_cp = None
|
375 |
+
|
376 |
+
else:
|
377 |
+
target_cp = {"A": 120, "B": 120, "C": 100, "D": 100,
|
378 |
+
"E": 120, "F": 120, "G": 120, "H": 120,
|
379 |
+
"I": 35, "J": 80, "K": 100, "L": 80,
|
380 |
+
"M": 100, "N": 100, "O": 100, "P": 120,
|
381 |
+
"Q": 120, "R": 130, "S": 110, "T": 90,
|
382 |
+
"U": 100, "V": 100, "W": 100, "X": 130,
|
383 |
+
"Y": 120, "Z": 120,
|
384 |
+
"a": 120, "b": 120, "c": 100, "d": 100,
|
385 |
+
"e": 120, "f": 120, "g": 120, "h": 120,
|
386 |
+
"i": 35, "j": 80, "k": 100, "l": 80,
|
387 |
+
"m": 100, "n": 100, "o": 100, "p": 120,
|
388 |
+
"q": 120, "r": 130, "s": 110, "t": 90,
|
389 |
+
"u": 100, "v": 100, "w": 100, "x": 130,
|
390 |
+
"y": 120, "z": 120
|
391 |
+
}
|
392 |
+
|
393 |
+
target_cp = {k: v * level_of_cc for k, v in target_cp.items()}
|
394 |
+
|
395 |
+
for f in fonts:
|
396 |
+
print(f"======= {f} =======")
|
397 |
+
font_path = f"data/fonts/{f}.ttf"
|
398 |
+
output_path = f"data/init"
|
399 |
+
txt = "BUNNY"
|
400 |
+
subdivision_thresh = None
|
401 |
+
font_string_to_svgs(output_path, font_path, txt, target_control=target_cp,
|
402 |
+
subdivision_thresh=subdivision_thresh)
|
403 |
+
normalize_letter_size(output_path, font_path, txt)
|
404 |
+
|
405 |
+
print("DONE")
|
406 |
+
|
407 |
+
|
408 |
+
|
409 |
+
|
code/utils.py
ADDED
@@ -0,0 +1,225 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import collections.abc
|
2 |
+
import os
|
3 |
+
import os.path as osp
|
4 |
+
from torch import nn
|
5 |
+
import kornia.augmentation as K
|
6 |
+
import pydiffvg
|
7 |
+
import save_svg
|
8 |
+
import cv2
|
9 |
+
from ttf import font_string_to_svgs, font_string_to_svgs_hb, normalize_letter_size
|
10 |
+
import torch
|
11 |
+
import numpy as np
|
12 |
+
|
13 |
+
|
14 |
+
def edict_2_dict(x):
|
15 |
+
if isinstance(x, dict):
|
16 |
+
xnew = {}
|
17 |
+
for k in x:
|
18 |
+
xnew[k] = edict_2_dict(x[k])
|
19 |
+
return xnew
|
20 |
+
elif isinstance(x, list):
|
21 |
+
xnew = []
|
22 |
+
for i in range(len(x)):
|
23 |
+
xnew.append( edict_2_dict(x[i]))
|
24 |
+
return xnew
|
25 |
+
else:
|
26 |
+
return x
|
27 |
+
|
28 |
+
|
29 |
+
def check_and_create_dir(path):
|
30 |
+
pathdir = osp.split(path)[0]
|
31 |
+
if osp.isdir(pathdir):
|
32 |
+
pass
|
33 |
+
else:
|
34 |
+
os.makedirs(pathdir)
|
35 |
+
|
36 |
+
|
37 |
+
def update(d, u):
|
38 |
+
"""https://stackoverflow.com/questions/3232943/update-value-of-a-nested-dictionary-of-varying-depth"""
|
39 |
+
for k, v in u.items():
|
40 |
+
if isinstance(v, collections.abc.Mapping):
|
41 |
+
d[k] = update(d.get(k, {}), v)
|
42 |
+
else:
|
43 |
+
d[k] = v
|
44 |
+
return d
|
45 |
+
|
46 |
+
|
47 |
+
def preprocess(font, word, letter, level_of_cc=1):
|
48 |
+
|
49 |
+
if level_of_cc == 0:
|
50 |
+
target_cp = None
|
51 |
+
else:
|
52 |
+
target_cp = {"A": 120, "B": 120, "C": 100, "D": 100,
|
53 |
+
"E": 120, "F": 120, "G": 120, "H": 120,
|
54 |
+
"I": 35, "J": 80, "K": 100, "L": 80,
|
55 |
+
"M": 100, "N": 100, "O": 100, "P": 120,
|
56 |
+
"Q": 120, "R": 130, "S": 110, "T": 90,
|
57 |
+
"U": 100, "V": 100, "W": 100, "X": 130,
|
58 |
+
"Y": 120, "Z": 120,
|
59 |
+
"a": 120, "b": 120, "c": 100, "d": 100,
|
60 |
+
"e": 120, "f": 120, "g": 120, "h": 120,
|
61 |
+
"i": 35, "j": 80, "k": 100, "l": 80,
|
62 |
+
"m": 100, "n": 100, "o": 100, "p": 120,
|
63 |
+
"q": 120, "r": 130, "s": 110, "t": 90,
|
64 |
+
"u": 100, "v": 100, "w": 100, "x": 130,
|
65 |
+
"y": 120, "z": 120
|
66 |
+
}
|
67 |
+
target_cp = {k: v * level_of_cc for k, v in target_cp.items()}
|
68 |
+
|
69 |
+
print(f"======= {font} =======")
|
70 |
+
if font[0] in ['0', '1', '2']:
|
71 |
+
font_path = f"code/data/arabic-fonts/{font}.ttf"
|
72 |
+
else:
|
73 |
+
font_path = f"code/data/fonts/{font}.ttf"
|
74 |
+
|
75 |
+
init_path = f"code/data/init"
|
76 |
+
subdivision_thresh = None
|
77 |
+
chars = font_string_to_svgs_hb(init_path, font_path, word, target_control=target_cp,
|
78 |
+
subdivision_thresh=subdivision_thresh)
|
79 |
+
normalize_letter_size(init_path, font_path, word, chars)
|
80 |
+
|
81 |
+
# optimaize two adjacent letters
|
82 |
+
if len(letter) > 1:
|
83 |
+
subdivision_thresh = None
|
84 |
+
font_string_to_svgs_hb(init_path, font_path, letter, target_control=target_cp,
|
85 |
+
subdivision_thresh=subdivision_thresh)
|
86 |
+
normalize_letter_size(init_path, font_path, letter, chars)
|
87 |
+
|
88 |
+
print("Done preprocess")
|
89 |
+
|
90 |
+
def get_data_augs(cut_size):
|
91 |
+
augmentations = []
|
92 |
+
augmentations.append(K.RandomPerspective(distortion_scale=0.5, p=0.7))
|
93 |
+
augmentations.append(K.RandomCrop(size=(cut_size, cut_size), pad_if_needed=True, padding_mode='reflect', p=1.0))
|
94 |
+
return nn.Sequential(*augmentations)
|
95 |
+
|
96 |
+
|
97 |
+
'''pytorch adaptation of https://github.com/google/mipnerf'''
|
98 |
+
def learning_rate_decay(step,
|
99 |
+
lr_init,
|
100 |
+
lr_final,
|
101 |
+
max_steps,
|
102 |
+
lr_delay_steps=0,
|
103 |
+
lr_delay_mult=1):
|
104 |
+
"""Continuous learning rate decay function.
|
105 |
+
The returned rate is lr_init when step=0 and lr_final when step=max_steps, and
|
106 |
+
is log-linearly interpolated elsewhere (equivalent to exponential decay).
|
107 |
+
If lr_delay_steps>0 then the learning rate will be scaled by some smooth
|
108 |
+
function of lr_delay_mult, such that the initial learning rate is
|
109 |
+
lr_init*lr_delay_mult at the beginning of optimization but will be eased back
|
110 |
+
to the normal learning rate when steps>lr_delay_steps.
|
111 |
+
Args:
|
112 |
+
step: int, the current optimization step.
|
113 |
+
lr_init: float, the initial learning rate.
|
114 |
+
lr_final: float, the final learning rate.
|
115 |
+
max_steps: int, the number of steps during optimization.
|
116 |
+
lr_delay_steps: int, the number of steps to delay the full learning rate.
|
117 |
+
lr_delay_mult: float, the multiplier on the rate when delaying it.
|
118 |
+
Returns:
|
119 |
+
lr: the learning for current step 'step'.
|
120 |
+
"""
|
121 |
+
if lr_delay_steps > 0:
|
122 |
+
# A kind of reverse cosine decay.
|
123 |
+
delay_rate = lr_delay_mult + (1 - lr_delay_mult) * np.sin(
|
124 |
+
0.5 * np.pi * np.clip(step / lr_delay_steps, 0, 1))
|
125 |
+
else:
|
126 |
+
delay_rate = 1.
|
127 |
+
t = np.clip(step / max_steps, 0, 1)
|
128 |
+
log_lerp = np.exp(np.log(lr_init) * (1 - t) + np.log(lr_final) * t)
|
129 |
+
return delay_rate * log_lerp
|
130 |
+
|
131 |
+
|
132 |
+
|
133 |
+
def save_image(img, filename, gamma=1):
|
134 |
+
check_and_create_dir(filename)
|
135 |
+
imshow = img.detach().cpu()
|
136 |
+
pydiffvg.imwrite(imshow, filename, gamma=gamma)
|
137 |
+
|
138 |
+
|
139 |
+
def get_letter_ids(letter, word, shape_groups):
|
140 |
+
for group, l in zip(shape_groups, word):
|
141 |
+
if l == letter:
|
142 |
+
return group.shape_ids
|
143 |
+
|
144 |
+
|
145 |
+
def combine_word(word, letter, font, experiment_dir):
|
146 |
+
word_svg_scaled = f"./code/data/init/{font}_{word}_scaled.svg"
|
147 |
+
canvas_width_word, canvas_height_word, shapes_word, shape_groups_word = pydiffvg.svg_to_scene(word_svg_scaled)
|
148 |
+
|
149 |
+
letter_ids = []
|
150 |
+
for l in letter:
|
151 |
+
letter_ids += get_letter_ids(l, word, shape_groups_word)
|
152 |
+
|
153 |
+
w_min, w_max = min([torch.min(shapes_word[ids].points[:, 0]) for ids in letter_ids]), max(
|
154 |
+
[torch.max(shapes_word[ids].points[:, 0]) for ids in letter_ids])
|
155 |
+
h_min, h_max = min([torch.min(shapes_word[ids].points[:, 1]) for ids in letter_ids]), max(
|
156 |
+
[torch.max(shapes_word[ids].points[:, 1]) for ids in letter_ids])
|
157 |
+
|
158 |
+
c_w = (-w_min + w_max) / 2
|
159 |
+
c_h = (-h_min + h_max) / 2
|
160 |
+
|
161 |
+
svg_result = os.path.join(experiment_dir, "output-svg", "output.svg")
|
162 |
+
canvas_width, canvas_height, shapes, shape_groups = pydiffvg.svg_to_scene(svg_result)
|
163 |
+
|
164 |
+
out_w_min, out_w_max = min([torch.min(p.points[:, 0]) for p in shapes]), max(
|
165 |
+
[torch.max(p.points[:, 0]) for p in shapes])
|
166 |
+
out_h_min, out_h_max = min([torch.min(p.points[:, 1]) for p in shapes]), max(
|
167 |
+
[torch.max(p.points[:, 1]) for p in shapes])
|
168 |
+
|
169 |
+
out_c_w = (-out_w_min + out_w_max) / 2
|
170 |
+
out_c_h = (-out_h_min + out_h_max) / 2
|
171 |
+
|
172 |
+
scale_canvas_w = (w_max - w_min) / (out_w_max - out_w_min)
|
173 |
+
scale_canvas_h = (h_max - h_min) / (out_h_max - out_h_min)
|
174 |
+
|
175 |
+
if scale_canvas_h > scale_canvas_w:
|
176 |
+
wsize = int((out_w_max - out_w_min) * scale_canvas_h)
|
177 |
+
scale_canvas_w = wsize / (out_w_max - out_w_min)
|
178 |
+
shift_w = -out_c_w * scale_canvas_w + c_w
|
179 |
+
else:
|
180 |
+
hsize = int((out_h_max - out_h_min) * scale_canvas_w)
|
181 |
+
scale_canvas_h = hsize / (out_h_max - out_h_min)
|
182 |
+
shift_h = -out_c_h * scale_canvas_h + c_h
|
183 |
+
|
184 |
+
for num, p in enumerate(shapes):
|
185 |
+
p.points[:, 0] = p.points[:, 0] * scale_canvas_w
|
186 |
+
p.points[:, 1] = p.points[:, 1] * scale_canvas_h
|
187 |
+
if scale_canvas_h > scale_canvas_w:
|
188 |
+
p.points[:, 0] = p.points[:, 0] - out_w_min * scale_canvas_w + w_min + shift_w
|
189 |
+
p.points[:, 1] = p.points[:, 1] - out_h_min * scale_canvas_h + h_min
|
190 |
+
else:
|
191 |
+
p.points[:, 0] = p.points[:, 0] - out_w_min * scale_canvas_w + w_min
|
192 |
+
p.points[:, 1] = p.points[:, 1] - out_h_min * scale_canvas_h + h_min + shift_h
|
193 |
+
|
194 |
+
for j, s in enumerate(letter_ids):
|
195 |
+
shapes_word[s] = shapes[j]
|
196 |
+
|
197 |
+
save_svg.save_svg(
|
198 |
+
f"{experiment_dir}/{font}_{word}_{letter}.svg", canvas_width, canvas_height, shapes_word,
|
199 |
+
shape_groups_word)
|
200 |
+
|
201 |
+
render = pydiffvg.RenderFunction.apply
|
202 |
+
scene_args = pydiffvg.RenderFunction.serialize_scene(canvas_width, canvas_height, shapes_word, shape_groups_word)
|
203 |
+
img = render(canvas_width, canvas_height, 2, 2, 0, None, *scene_args)
|
204 |
+
img = img[:, :, 3:4] * img[:, :, :3] + \
|
205 |
+
torch.ones(img.shape[0], img.shape[1], 3, device="cuda:0") * (1 - img[:, :, 3:4])
|
206 |
+
img = img[:, :, :3]
|
207 |
+
save_image(img, f"{experiment_dir}/{font}_{word}_{letter}.png")
|
208 |
+
|
209 |
+
|
210 |
+
def create_video(num_iter, experiment_dir, video_frame_freq):
|
211 |
+
img_array = []
|
212 |
+
for ii in range(0, num_iter):
|
213 |
+
if ii % video_frame_freq == 0 or ii == num_iter - 1:
|
214 |
+
filename = os.path.join(
|
215 |
+
experiment_dir, "video-png", f"iter{ii:04d}.png")
|
216 |
+
img = cv2.imread(filename)
|
217 |
+
img_array.append(img)
|
218 |
+
|
219 |
+
video_name = os.path.join(
|
220 |
+
experiment_dir, "video.mp4")
|
221 |
+
check_and_create_dir(video_name)
|
222 |
+
out = cv2.VideoWriter(video_name, cv2.VideoWriter_fourcc(*'mp4v'), 30.0, (600, 600))
|
223 |
+
for iii in range(len(img_array)):
|
224 |
+
out.write(img_array[iii])
|
225 |
+
out.release()
|
diffvg
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
Subproject commit adb04d04ee63e82d6569a2fa178ba0dd49115561
|
packages.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
python3-dev
|
requirements.txt
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
--extra-index-url https://download.pytorch.org/whl/cu113
|
2 |
+
torch==1.12.1+cu113
|
3 |
+
torchvision==0.13.1+cu113
|
4 |
+
|
5 |
+
cmake
|
6 |
+
numpy
|
7 |
+
scikit-image
|
8 |
+
ffmpeg
|
9 |
+
svgwrite
|
10 |
+
svgpathtools
|
11 |
+
cssutils
|
12 |
+
numba
|
13 |
+
torch-tools
|
14 |
+
scikit-fmm
|
15 |
+
easydict
|
16 |
+
visdom
|
17 |
+
opencv-python==4.5.4.60
|
18 |
+
|
19 |
+
diffusers==0.8
|
20 |
+
transformers
|
21 |
+
scipy
|
22 |
+
ftfy
|
23 |
+
accelerate
|
24 |
+
|
25 |
+
vharfbuzz
|
26 |
+
freetype-py
|
27 |
+
shapely
|
28 |
+
kornia==0.6.8
|