()})
+ : p_min(p_min), p_max(p_max) {}
+ Vector2f p_min, p_max;
+};
+
+DEVICE
+inline
+AABB merge(const AABB &box, const Vector2f &p) {
+ return AABB{Vector2f{min(p.x, box.p_min.x), min(p.y, box.p_min.y)},
+ Vector2f{max(p.x, box.p_max.x), max(p.y, box.p_max.y)}};
+}
+
+DEVICE
+inline
+AABB merge(const AABB &box0, const AABB &box1) {
+ return AABB{Vector2f{min(box0.p_min.x, box1.p_min.x), min(box0.p_min.y, box1.p_min.y)},
+ Vector2f{max(box0.p_max.x, box1.p_max.x), max(box0.p_max.y, box1.p_max.y)}};
+}
+
+DEVICE
+inline
+bool inside(const AABB &box, const Vector2f &p) {
+ return p.x >= box.p_min.x && p.x <= box.p_max.x &&
+ p.y >= box.p_min.y && p.y <= box.p_max.y;
+}
+
+DEVICE
+inline
+bool inside(const AABB &box, const Vector2f &p, float radius) {
+ return p.x >= box.p_min.x - radius && p.x <= box.p_max.x + radius &&
+ p.y >= box.p_min.y - radius && p.y <= box.p_max.y + radius;
+}
+
+DEVICE
+inline
+AABB enlarge(const AABB &box, float width) {
+ return AABB{Vector2f{box.p_min.x - width, box.p_min.y - width},
+ Vector2f{box.p_max.x + width, box.p_max.y + width}};
+}
+
+DEVICE
+inline
+AABB transform(const Matrix3x3f &xform, const AABB &box) {
+ auto ret = AABB();
+ ret = merge(ret, xform_pt(xform, Vector2f{box.p_min.x, box.p_min.y}));
+ ret = merge(ret, xform_pt(xform, Vector2f{box.p_min.x, box.p_max.y}));
+ ret = merge(ret, xform_pt(xform, Vector2f{box.p_max.x, box.p_min.y}));
+ ret = merge(ret, xform_pt(xform, Vector2f{box.p_max.x, box.p_max.y}));
+ return ret;
+}
+
+DEVICE
+inline
+bool within_distance(const AABB &box, const Vector2f &pt, float r) {
+ return pt.x >= box.p_min.x - r && pt.x <= box.p_max.x + r &&
+ pt.y >= box.p_min.y - r && pt.y <= box.p_max.y + r;
+}
diff --git a/app.py b/app.py
new file mode 100644
index 0000000000000000000000000000000000000000..5d869df383fe6caf91cb40fe535d96cb1caa5ea4
--- /dev/null
+++ b/app.py
@@ -0,0 +1,375 @@
+import os
+os.system('python setup.py install --user')
+import argparse
+import csv
+import numpy as np
+import sys
+sys.path.append("/home/user/.local/lib/python3.8/site-packages/diffvg-0.0.1-py3.8-linux-x86_64.egg")
+print(sys.path)
+from pathlib import Path
+
+import gradio as gr
+
+import torch
+import yaml
+from PIL import Image
+from subprocess import call
+import torch
+import cv2
+import matplotlib.pyplot as plt
+import random
+import argparse
+import math
+import errno
+from tqdm import tqdm
+import yaml
+from easydict import EasyDict as edict
+
+
+def run_cmd(command):
+ try:
+ print(command)
+ call(command, shell=True)
+ except KeyboardInterrupt:
+ print("Process interrupted")
+ sys.exit(1)
+# run_cmd("gcc --version")
+# run_cmd("pwd")
+# run_cmd("ls")
+# run_cmd("git submodule update --init --recursive")
+# run_cmd("python setup.py install --user")
+# run_cmd("pip3 list")
+# import pydiffvg
+#
+# print("Sccuessfuly import diffvg ")
+# run_cmd("pwd")
+# run_cmd("ls")
+# run_cmd("git submodule update --init --recursive")
+# run_cmd("python setup.py install --user")
+
+# run_cmd("python main.py --config config/base.yaml --experiment experiment_5x1 --signature smile --target figures/smile.png --log_dir log/")
+from main import main_func
+
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--debug', action='store_true', default=False)
+ parser.add_argument("--config", default="config/base.yaml", type=str)
+ parser.add_argument("--experiment", type=str)
+ parser.add_argument("--seed", type=int)
+ parser.add_argument("--target", type=str, help="target image path")
+ parser.add_argument('--log_dir', metavar='DIR', default="log/")
+ parser.add_argument('--initial', type=str, default="random", choices=['random', 'circle'])
+ parser.add_argument('--signature', default="demo", nargs='+', type=str)
+ parser.add_argument('--seginit', nargs='+', type=str)
+ parser.add_argument("--num_segments", type=int, default=4)
+ # parser.add_argument("--num_paths", type=str, default="1,1,1")
+ # parser.add_argument("--num_iter", type=int, default=500)
+ # parser.add_argument('--free', action='store_true')
+ # Please ensure that image resolution is divisible by pool_size; otherwise the performance would drop a lot.
+ # parser.add_argument('--pool_size', type=int, default=40, help="the pooled image size for next path initialization")
+ # parser.add_argument('--save_loss', action='store_true')
+ # parser.add_argument('--save_init', action='store_true')
+ # parser.add_argument('--save_image', action='store_true')
+ # parser.add_argument('--save_video', action='store_true')
+ # parser.add_argument('--print_weight', action='store_true')
+ # parser.add_argument('--circle_init_radius', type=float)
+ cfg = edict()
+ args = parser.parse_args()
+ cfg.debug = args.debug
+ cfg.config = args.config
+ cfg.experiment = args.experiment
+ cfg.seed = args.seed
+ cfg.target = args.target
+ cfg.log_dir = args.log_dir
+ cfg.initial = args.initial
+ cfg.signature = args.signature
+ # set cfg num_segments in command
+ cfg.num_segments = args.num_segments
+ if args.seginit is not None:
+ cfg.seginit = edict()
+ cfg.seginit.type = args.seginit[0]
+ if cfg.seginit.type == 'circle':
+ cfg.seginit.radius = float(args.seginit[1])
+ return cfg
+
+
+def app_experiment_change(experiment_id):
+ if experiment_id == "add [1] total 1 path for demonstration":
+ return "experiment_1x1"
+ if experiment_id == "add [1, 1, 1, 1, 1] total 5 paths one by one":
+ return "experiment_5x1"
+ elif experiment_id == "add [1, 1, 1, 1, 1, 1, 1, 1] total 8 paths one by one":
+ return "experiment_8x1"
+ elif experiment_id == "add [1,2,4,8,16,32, ...] total 128 paths":
+ return "experiment_exp2_128"
+ elif experiment_id == "add [1,2,4,8,16,32, ...] total 256 paths":
+ return "experiment_exp2_256"
+
+
+cfg_arg = parse_args()
+temp_image = np.random.rand(224,224,3)
+temp_text = "start"
+temp_input = np.random.rand(224,224,3)
+def run_live(img, experiment_id, num_iter, cfg_arg=cfg_arg):
+ experiment = app_experiment_change(experiment_id)
+ cfg_arg.target = img
+ cfg_arg.experiment = experiment
+ img, text = main_func(img, experiment_id, num_iter, cfg_arg=cfg_arg)
+ return img, text
+
+
+
+
+
+
+
+
+
+# ROOT_PATH = sys.path[0] # 根目录
+# # 模型路径
+# model_path = "ultralytics/yolov5"
+# # 模型名称临时变量
+# model_name_tmp = ""
+# # 设备临时变量
+# device_tmp = ""
+# # 文件后缀
+# suffix_list = [".csv", ".yaml"]
+# def parse_args(known=False):
+# parser = argparse.ArgumentParser(description="Gradio LIVE")
+# parser.add_argument(
+# "--model_name", "-mn", default="yolov5s", type=str, help="model name"
+# )
+# parser.add_argument(
+# "--model_cfg",
+# "-mc",
+# default="./model_config/model_name_p5_all.yaml",
+# type=str,
+# help="model config",
+# )
+# parser.add_argument(
+# "--cls_name",
+# "-cls",
+# default="./cls_name/cls_name.yaml",
+# type=str,
+# help="cls name",
+# )
+# parser.add_argument(
+# "--nms_conf",
+# "-conf",
+# default=0.5,
+# type=float,
+# help="model NMS confidence threshold",
+# )
+# parser.add_argument(
+# "--nms_iou", "-iou", default=0.45, type=float, help="model NMS IoU threshold"
+# )
+#
+# parser.add_argument(
+# "--label_dnt_show",
+# "-lds",
+# action="store_false",
+# default=True,
+# help="label show",
+# )
+# parser.add_argument(
+# "--device",
+# "-dev",
+# default="cpu",
+# type=str,
+# help="cuda or cpu, hugging face only cpu",
+# )
+# parser.add_argument(
+# "--inference_size", "-isz", default=640, type=int, help="model inference size"
+# )
+#
+# args = parser.parse_known_args()[0] if known else parser.parse_args()
+# return args
+# # 模型加载
+# def model_loading(model_name, device):
+#
+# # 加载本地模型
+# model = torch.hub.load(model_path, model_name, force_reload=True, device=device)
+#
+# return model
+# # 检测信息
+# def export_json(results, model, img_size):
+#
+# return [
+# [
+# {
+# "id": int(i),
+# "class": int(result[i][5]),
+# "class_name": model.model.names[int(result[i][5])],
+# "normalized_box": {
+# "x0": round(result[i][:4].tolist()[0], 6),
+# "y0": round(result[i][:4].tolist()[1], 6),
+# "x1": round(result[i][:4].tolist()[2], 6),
+# "y1": round(result[i][:4].tolist()[3], 6),
+# },
+# "confidence": round(float(result[i][4]), 2),
+# "fps": round(1000 / float(results.t[1]), 2),
+# "width": img_size[0],
+# "height": img_size[1],
+# }
+# for i in range(len(result))
+# ]
+# for result in results.xyxyn
+# ]
+# def yolo_det(img, experiment_id, device=None, model_name=None, inference_size=None, conf=None, iou=None, label_opt=None, model_cls=None):
+#
+# global model, model_name_tmp, device_tmp
+#
+# if model_name_tmp != model_name:
+# # 模型判断,避免反复加载
+# model_name_tmp = model_name
+# model = model_loading(model_name_tmp, device)
+# elif device_tmp != device:
+# device_tmp = device
+# model = model_loading(model_name_tmp, device)
+#
+# # -----------模型调参-----------
+# model.conf = conf # NMS 置信度阈值
+# model.iou = iou # NMS IOU阈值
+# model.max_det = 1000 # 最大检测框数
+# model.classes = model_cls # 模型类别
+#
+# results = model(img, size=inference_size) # 检测
+# results.render(labels=label_opt) # 渲染
+#
+# det_img = Image.fromarray(results.imgs[0]) # 检测图片
+#
+# det_json = export_json(results, model, img.size)[0] # 检测信息
+#
+# return det_img, det_json
+
+
+# def run_cmd(command):
+# try:
+# print(command)
+# call(command, shell=True)
+# except KeyboardInterrupt:
+# print("Process interrupted")
+# sys.exit(1)
+#
+# run_cmd("gcc --version")
+# run_cmd("pwd")
+# run_cmd("ls")
+# run_cmd("git submodule update --init --recursive")
+# run_cmd("python setup.py install --user")
+# run_cmd("ls")
+# run_cmd("python main.py --config config/base.yaml --experiment experiment_5x1 --signature smile --target figures/smile.png --log_dir log/")
+
+
+
+
+
+
+# # yaml文件解析
+# def yaml_parse(file_path):
+# return yaml.safe_load(open(file_path, "r", encoding="utf-8").read())
+#
+#
+# # yaml csv 文件解析
+# def yaml_csv(file_path, file_tag):
+# file_suffix = Path(file_path).suffix
+# if file_suffix == suffix_list[0]:
+# # 模型名称
+# file_names = [i[0] for i in list(csv.reader(open(file_path)))] # csv版
+# elif file_suffix == suffix_list[1]:
+# # 模型名称
+# file_names = yaml_parse(file_path).get(file_tag) # yaml版
+# else:
+# print(f"{file_path}格式不正确!程序退出!")
+# sys.exit()
+#
+# return file_names
+
+
+def main(args):
+ gr.close_all()
+ # -------------------Inputs-------------------
+ inputs_iteration = gr.inputs.Slider(
+ label="Optimization Iteration",
+ default=500, maximum=600, minimum=100, step=100)
+ inputs_img = gr.inputs.Image(type="pil", label="Input Image", shape=[160, 160])
+ experiment_id = gr.inputs.Radio(
+ choices=[
+ "add [1] total 1 path for demonstration",
+ "add [1, 1, 1, 1, 1] total 5 paths one by one",
+ "add [1, 1, 1, 1, 1, 1, 1, 1] total 8 paths one by one",
+ "add [1,2,4,8,16,32, ...] total 128 paths",
+ "add [1,2,4,8,16,32, ...] total 256 paths"], type="value", default="add [1, 1, 1, 1, 1] total 5 paths one by one", label="Path Adding Scheduler"
+ )
+
+ # inputs
+ inputs = [
+
+ inputs_img, # input image
+ experiment_id, # path adding scheduler
+ inputs_iteration, # input iteration
+
+ ]
+ # outputs
+ outputs = gr.outputs.Image(type="numpy", label="Vectorized Image")
+ outputs02 = gr.outputs.File(label="Generated SVG output")
+
+ # title
+ title = "LIVE: Towards Layer-wise Image Vectorization"
+ # description
+ description = "(CVPR 2022 Oral Presentation)
" \
+ "Without GPUs, LIVE will cost longer time.
" \
+ "For efficiency, we rescale input to 160x160 (smaller size and fewer iterations will decrease the reconstructions).
"
+
+ # examples
+ examples = [
+ [
+ "./examples/1.png",
+ "add [1] total 1 path for demonstration",
+ 100,
+ ],
+ [
+ "./examples/2.png",
+ "add [1, 1, 1, 1, 1] total 5 paths one by one",
+ 300,
+ ],
+ [
+ "./examples/3.jpg",
+ "add [1,2,4,8,16,32, ...] total 128 paths",
+ 300,
+ ],
+ [
+ "./examples/4.png",
+ "add [1,2,4,8,16,32, ...] total 256 paths",
+ 300,
+ ],
+ [
+ "./examples/5.png",
+ "add [1, 1, 1, 1, 1] total 5 paths one by one",
+ 300,
+ ],
+ ]
+
+ # Interface
+ gr.Interface(
+ fn=run_live,
+ inputs=inputs,
+ outputs=[outputs, outputs02],
+ title=title,
+ description=description,
+ examples=examples,
+ theme="seafoam",
+ # live=True, # 实时变更输出
+ flagging_dir="log" # 输出目录
+ # ).launch(inbrowser=True, auth=['admin', 'admin'])
+ ).launch(
+ inbrowser=True, # 自动打开默认浏览器
+ show_tips=True, # 自动显示gradio最新功能
+ enable_queue=True
+ # favicon_path="./icon/logo.ico",
+ )
+
+
+if __name__ == "__main__":
+ args = parse_args()
+ main(args)
diff --git a/atomic.cpp b/atomic.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..9c642b9b84357a10f2155d28324517f36d00b0cb
--- /dev/null
+++ b/atomic.cpp
@@ -0,0 +1,27 @@
+//A hacky solution to get around the Ellipse include
+
+#ifdef WIN32
+#include
+#include
+
+float win_atomic_add(float &target, float source) {
+ union { int i; float f; } old_val;
+ union { int i; float f; } new_val;
+ do {
+ old_val.f = target;
+ new_val.f = old_val.f + (float)source;
+ } while (InterlockedCompareExchange((LONG*)&target, (LONG)new_val.i, (LONG)old_val.i) != old_val.i);
+ return old_val.f;
+}
+
+double win_atomic_add(double &target, double source) {
+ union { int64_t i; double f; } old_val;
+ union { int64_t i; double f; } new_val;
+ do {
+ old_val.f = target;
+ new_val.f = old_val.f + (double)source;
+ } while (InterlockedCompareExchange64((LONG64*)&target, (LONG64)new_val.i, (LONG64)old_val.i) != old_val.i);
+ return old_val.f;
+}
+
+#endif
\ No newline at end of file
diff --git a/atomic.h b/atomic.h
new file mode 100644
index 0000000000000000000000000000000000000000..c721722df23f17097c67b79b05b57eecd12c5912
--- /dev/null
+++ b/atomic.h
@@ -0,0 +1,139 @@
+#pragma once
+
+#include "diffvg.h"
+#include "vector.h"
+#include "matrix.h"
+
+// https://stackoverflow.com/questions/39274472/error-function-atomicadddouble-double-has-already-been-defined
+#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600
+#else
+static inline DEVICE double atomicAdd(double *address, double val) {
+ unsigned long long int* address_as_ull = (unsigned long long int*)address;
+ unsigned long long int old = *address_as_ull, assumed;
+ if (val == 0.0)
+ return __longlong_as_double(old);
+ do {
+ assumed = old;
+ old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val +__longlong_as_double(assumed)));
+ } while (assumed != old);
+ return __longlong_as_double(old);
+}
+#endif
+
+#ifndef WIN32
+ template
+ DEVICE
+ inline T0 atomic_add_(T0 &target, T1 source) {
+ #ifdef __CUDA_ARCH__
+ return atomicAdd(&target, (T0)source);
+ #else
+ T0 old_val;
+ T0 new_val;
+ do {
+ old_val = target;
+ new_val = old_val + source;
+ } while (!__atomic_compare_exchange(&target, &old_val, &new_val, true,
+ std::memory_order::memory_order_seq_cst,
+ std::memory_order::memory_order_seq_cst));
+ return old_val;
+ #endif
+ }
+
+ DEVICE
+ inline
+ float atomic_add(float &target, float source) {
+ return atomic_add_(target, source);
+ }
+ DEVICE
+ inline
+ double atomic_add(double &target, double source) {
+ return atomic_add_(target, source);
+ }
+#else
+ float win_atomic_add(float &target, float source);
+ double win_atomic_add(double &target, double source);
+ DEVICE
+ static float atomic_add(float &target, float source) {
+ #ifdef __CUDA_ARCH__
+ return atomicAdd(&target, source);
+ #else
+ return win_atomic_add(target, source);
+ #endif
+ }
+ DEVICE
+ static double atomic_add(double &target, double source) {
+ #ifdef __CUDA_ARCH__
+ return atomicAdd(&target, (double)source);
+ #else
+ return win_atomic_add(target, source);
+ #endif
+ }
+#endif
+
+template
+DEVICE
+inline T0 atomic_add(T0 *target, T1 source) {
+ return atomic_add(*target, (T0)source);
+}
+
+template
+DEVICE
+inline TVector2 atomic_add(TVector2 &target, const TVector2 &source) {
+ atomic_add(target[0], source[0]);
+ atomic_add(target[1], source[1]);
+ return target;
+}
+
+template
+DEVICE
+inline void atomic_add(T0 *target, const TVector2 &source) {
+ atomic_add(target[0], (T0)source[0]);
+ atomic_add(target[1], (T0)source[1]);
+}
+
+template
+DEVICE
+inline TVector3 atomic_add(TVector3 &target, const TVector3 &source) {
+ atomic_add(target[0], source[0]);
+ atomic_add(target[1], source[1]);
+ atomic_add(target[2], source[2]);
+ return target;
+}
+
+template
+DEVICE
+inline void atomic_add(T0 *target, const TVector3 &source) {
+ atomic_add(target[0], (T0)source[0]);
+ atomic_add(target[1], (T0)source[1]);
+ atomic_add(target[2], (T0)source[2]);
+}
+
+template
+DEVICE
+inline TVector4 atomic_add(TVector4 &target, const TVector4 &source) {
+ atomic_add(target[0], source[0]);
+ atomic_add(target[1], source[1]);
+ atomic_add(target[2], source[2]);
+ atomic_add(target[3], source[3]);
+ return target;
+}
+
+template
+DEVICE
+inline void atomic_add(T0 *target, const TVector4 &source) {
+ atomic_add(target[0], (T0)source[0]);
+ atomic_add(target[1], (T0)source[1]);
+ atomic_add(target[2], (T0)source[2]);
+ atomic_add(target[3], (T0)source[3]);
+}
+
+template
+DEVICE
+inline void atomic_add(T0 *target, const TMatrix3x3 &source) {
+ for (int i = 0; i < 3; i++) {
+ for (int j = 0; j < 3; j++) {
+ atomic_add(target[3 * i + j], (T0)source(i, j));
+ }
+ }
+}
+
diff --git a/cdf.h b/cdf.h
new file mode 100644
index 0000000000000000000000000000000000000000..48a64f897f2c230e3e0b5595de401dd644b8b777
--- /dev/null
+++ b/cdf.h
@@ -0,0 +1,29 @@
+#pragma once
+
+#include "diffvg.h"
+
+DEVICE int sample(const float *cdf, int num_entries, float u, float *updated_u = nullptr) {
+ // Binary search the cdf
+ auto lb = 0;
+ auto len = num_entries - 1 - lb;
+ while (len > 0) {
+ auto half_len = len / 2;
+ auto mid = lb + half_len;
+ assert(mid >= 0 && mid < num_entries);
+ if (u < cdf[mid]) {
+ len = half_len;
+ } else {
+ lb = mid + 1;
+ len = len - half_len - 1;
+ }
+ }
+ lb = clamp(lb, 0, num_entries - 1);
+ if (updated_u != nullptr) {
+ if (lb > 0) {
+ *updated_u = (u - cdf[lb - 1]) / (cdf[lb] - cdf[lb - 1]);
+ } else {
+ *updated_u = u / cdf[lb];
+ }
+ }
+ return lb;
+}
diff --git a/cls_name/cls_name.csv b/cls_name/cls_name.csv
new file mode 100644
index 0000000000000000000000000000000000000000..612e83beac9dfb3045b412a503a0efa8524c46bd
--- /dev/null
+++ b/cls_name/cls_name.csv
@@ -0,0 +1,80 @@
+人
+自行车
+汽车
+摩托车
+飞机
+公交车
+火车
+卡车
+船
+红绿灯
+消防栓
+停止标志
+停车收费表
+长凳
+鸟
+猫
+狗
+马
+羊
+牛
+象
+熊
+斑马
+长颈鹿
+背包
+雨伞
+手提包
+领带
+手提箱
+飞盘
+滑雪板
+单板滑雪
+运动球
+风筝
+棒球棒
+棒球手套
+滑板
+冲浪板
+网球拍
+瓶子
+红酒杯
+杯子
+叉子
+刀
+勺
+碗
+香蕉
+苹果
+三明治
+橙子
+西兰花
+胡萝卜
+热狗
+比萨
+甜甜圈
+蛋糕
+椅子
+长椅
+盆栽
+床
+餐桌
+马桶
+电视
+笔记本电脑
+鼠标
+遥控器
+键盘
+手机
+微波炉
+烤箱
+烤面包机
+洗碗槽
+冰箱
+书
+时钟
+花瓶
+剪刀
+泰迪熊
+吹风机
+牙刷
\ No newline at end of file
diff --git a/cls_name/cls_name.yaml b/cls_name/cls_name.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e03abc7379e74582534b4ad085939bdc70d43057
--- /dev/null
+++ b/cls_name/cls_name.yaml
@@ -0,0 +1,7 @@
+model_cls_name: ['人', '自行车', '汽车', '摩托车', '飞机', '公交车', '火车', '卡车', '船', '红绿灯', '消防栓', '停止标志',
+ '停车收费表', '长凳', '鸟', '猫', '狗', '马', '羊', '牛', '象', '熊', '斑马', '长颈鹿', '背包', '雨伞', '手提包', '领带',
+ '手提箱', '飞盘', '滑雪板', '单板滑雪', '运动球', '风筝', '棒球棒', '棒球手套', '滑板', '冲浪板', '网球拍', '瓶子', '红酒杯',
+ '杯子', '叉子', '刀', '勺', '碗', '香蕉', '苹果', '三明治', '橙子', '西兰花', '胡萝卜', '热狗', '比萨', '甜甜圈', '蛋糕',
+ '椅子', '长椅', '盆栽', '床', '餐桌', '马桶', '电视', '笔记本电脑', '鼠标', '遥控器', '键盘', '手机', '微波炉', '烤箱',
+ '烤面包机', '洗碗槽', '冰箱', '书', '时钟', '花瓶', '剪刀', '泰迪熊', '吹风机', '牙刷'
+ ]
\ No newline at end of file
diff --git a/cmake/FindTensorFlow.cmake b/cmake/FindTensorFlow.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..b251b10538f69f3dce42370e840f167ea24fc4fc
--- /dev/null
+++ b/cmake/FindTensorFlow.cmake
@@ -0,0 +1,34 @@
+# https://github.com/PatWie/tensorflow-cmake/blob/master/cmake/modules/FindTensorFlow.cmake
+
+execute_process(
+ COMMAND python -c "exec(\"try:\\n import tensorflow as tf; print(tf.__version__); print(tf.__cxx11_abi_flag__);print(tf.sysconfig.get_include()); print(tf.sysconfig.get_lib())\\nexcept ImportError:\\n exit(1)\")"
+ OUTPUT_VARIABLE TF_INFORMATION_STRING
+ OUTPUT_STRIP_TRAILING_WHITESPACE
+ RESULT_VARIABLE retcode)
+
+if("${retcode}" STREQUAL "0")
+ string(REPLACE "\n" ";" TF_INFORMATION_LIST ${TF_INFORMATION_STRING})
+ list(GET TF_INFORMATION_LIST 0 TF_DETECTED_VERSION)
+ list(GET TF_INFORMATION_LIST 1 TF_DETECTED_ABI)
+ list(GET TF_INFORMATION_LIST 2 TF_DETECTED_INCLUDE_DIR)
+ list(GET TF_INFORMATION_LIST 3 TF_DETECTED_LIBRARY_DIR)
+ if(WIN32)
+ find_library(TF_DETECTED_LIBRARY NAMES _pywrap_tensorflow_internal PATHS
+ ${TF_DETECTED_LIBRARY_DIR}/python)
+ else()
+ # For some reason my tensorflow doesn't have a .so file
+ list(APPEND CMAKE_FIND_LIBRARY_SUFFIXES .so.1)
+ list(APPEND CMAKE_FIND_LIBRARY_SUFFIXES .so.2)
+ find_library(TF_DETECTED_LIBRARY NAMES tensorflow_framework PATHS
+ ${TF_DETECTED_LIBRARY_DIR})
+ endif()
+ set(TensorFlow_VERSION ${TF_DETECTED_VERSION})
+ set(TensorFlow_ABI ${TF_DETECTED_ABI})
+ set(TensorFlow_INCLUDE_DIR ${TF_DETECTED_INCLUDE_DIR})
+ set(TensorFlow_LIBRARY ${TF_DETECTED_LIBRARY})
+ if(TensorFlow_LIBRARY AND TensorFlow_INCLUDE_DIR)
+ set(TensorFlow_FOUND TRUE)
+ else()
+ set(TensorFlow_FOUND FALSE)
+ endif()
+endif()
diff --git a/cmake/FindThrust.cmake b/cmake/FindThrust.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..61eef297b996496f4222d6afb570fb5aa960781d
--- /dev/null
+++ b/cmake/FindThrust.cmake
@@ -0,0 +1,40 @@
+##=============================================================================
+##
+## Copyright (c) Kitware, Inc.
+## All rights reserved.
+## See LICENSE.txt for details.
+##
+## This software is distributed WITHOUT ANY WARRANTY; without even
+## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+## PURPOSE. See the above copyright notice for more information.
+##
+## Copyright 2012 Sandia Corporation.
+## Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+## the U.S. Government retains certain rights in this software.
+##
+##=============================================================================
+
+#
+# FindThrust
+#
+# This module finds the Thrust header files and extrats their version. It
+# sets the following variables.
+#
+# THRUST_INCLUDE_DIR - Include directory for thrust header files. (All header
+# files will actually be in the thrust subdirectory.)
+# THRUST_VERSION - Version of thrust in the form "major.minor.patch".
+#
+
+find_path(THRUST_INCLUDE_DIR
+ HINTS /usr/include/cuda
+ /usr/local/include
+ /usr/local/cuda/include
+ ${CUDA_INCLUDE_DIRS}
+ ./thrust
+ ../thrust
+ NAMES thrust/version.h
+)
+
+if (THRUST_INCLUDE_DIR)
+ set(THRUST_FOUND TRUE)
+endif ()
\ No newline at end of file
diff --git a/color.cpp b/color.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..2a2e8abcee1dacefeaeb0268359737aec178bace
--- /dev/null
+++ b/color.cpp
@@ -0,0 +1,25 @@
+#include "color.h"
+
+void LinearGradient::copy_to(ptr stop_offsets,
+ ptr stop_colors) const {
+ float *o = stop_offsets.get();
+ float *c = stop_colors.get();
+ for (int i = 0; i < num_stops; i++) {
+ o[i] = this->stop_offsets[i];
+ }
+ for (int i = 0; i < 4 * num_stops; i++) {
+ c[i] = this->stop_colors[i];
+ }
+}
+
+void RadialGradient::copy_to(ptr stop_offsets,
+ ptr stop_colors) const {
+ float *o = stop_offsets.get();
+ float *c = stop_colors.get();
+ for (int i = 0; i < num_stops; i++) {
+ o[i] = this->stop_offsets[i];
+ }
+ for (int i = 0; i < 4 * num_stops; i++) {
+ c[i] = this->stop_colors[i];
+ }
+}
diff --git a/color.h b/color.h
new file mode 100644
index 0000000000000000000000000000000000000000..c787105636d42b4706110500982d0ce576eda47e
--- /dev/null
+++ b/color.h
@@ -0,0 +1,63 @@
+#pragma once
+
+#include "diffvg.h"
+#include "vector.h"
+#include "ptr.h"
+
+enum class ColorType {
+ Constant,
+ LinearGradient,
+ RadialGradient
+};
+
+struct Constant {
+ Vector4f color;
+
+ ptr get_ptr() {
+ return ptr(this);
+ }
+};
+
+struct LinearGradient {
+ LinearGradient(const Vector2f &begin,
+ const Vector2f &end,
+ int num_stops,
+ ptr stop_offsets,
+ ptr stop_colors)
+ : begin(begin), end(end), num_stops(num_stops),
+ stop_offsets(stop_offsets.get()), stop_colors(stop_colors.get()) {}
+
+ ptr get_ptr() {
+ return ptr(this);
+ }
+
+ void copy_to(ptr stop_offset,
+ ptr stop_colors) const;
+
+ Vector2f begin, end;
+ int num_stops;
+ float *stop_offsets;
+ float *stop_colors; // rgba
+};
+
+struct RadialGradient {
+ RadialGradient(const Vector2f ¢er,
+ const Vector2f &radius,
+ int num_stops,
+ ptr stop_offsets,
+ ptr stop_colors)
+ : center(center), radius(radius), num_stops(num_stops),
+ stop_offsets(stop_offsets.get()), stop_colors(stop_colors.get()) {}
+
+ ptr get_ptr() {
+ return ptr(this);
+ }
+
+ void copy_to(ptr stop_offset,
+ ptr stop_colors) const;
+
+ Vector2f center, radius;
+ int num_stops;
+ float *stop_offsets;
+ float *stop_colors; // rgba
+};
diff --git a/compute_distance.h b/compute_distance.h
new file mode 100644
index 0000000000000000000000000000000000000000..c125641a9d720bd16be1428e205bd6c07c726bc5
--- /dev/null
+++ b/compute_distance.h
@@ -0,0 +1,949 @@
+#pragma once
+
+#include "diffvg.h"
+#include "edge_query.h"
+#include "scene.h"
+#include "shape.h"
+#include "solve.h"
+#include "vector.h"
+
+#include
+
+struct ClosestPointPathInfo {
+ int base_point_id;
+ int point_id;
+ float t_root;
+};
+
+DEVICE
+inline
+bool closest_point(const Circle &circle, const Vector2f &pt,
+ Vector2f *result) {
+ *result = circle.center + circle.radius * normalize(pt - circle.center);
+ return false;
+}
+
+DEVICE
+inline
+bool closest_point(const Path &path, const BVHNode *bvh_nodes, const Vector2f &pt, float max_radius,
+ ClosestPointPathInfo *path_info,
+ Vector2f *result) {
+ auto min_dist = max_radius;
+ auto ret_pt = Vector2f{0, 0};
+ auto found = false;
+ auto num_segments = path.num_base_points;
+ constexpr auto max_bvh_size = 128;
+ int bvh_stack[max_bvh_size];
+ auto stack_size = 0;
+ bvh_stack[stack_size++] = 2 * num_segments - 2;
+ while (stack_size > 0) {
+ const BVHNode &node = bvh_nodes[bvh_stack[--stack_size]];
+ if (node.child1 < 0) {
+ // leaf
+ auto base_point_id = node.child0;
+ auto point_id = - node.child1 - 1;
+ assert(base_point_id < num_segments);
+ assert(point_id < path.num_points);
+ auto dist = 0.f;
+ auto closest_pt = Vector2f{0, 0};
+ auto t_root = 0.f;
+ if (path.num_control_points[base_point_id] == 0) {
+ // Straight line
+ auto i0 = point_id;
+ auto i1 = (point_id + 1) % path.num_points;
+ auto p0 = Vector2f{path.points[2 * i0], path.points[2 * i0 + 1]};
+ auto p1 = Vector2f{path.points[2 * i1], path.points[2 * i1 + 1]};
+ // project pt to line
+ auto t = dot(pt - p0, p1 - p0) / dot(p1 - p0, p1 - p0);
+ if (t < 0) {
+ dist = distance(p0, pt);
+ closest_pt = p0;
+ t_root = 0;
+ } else if (t > 1) {
+ dist = distance(p1, pt);
+ closest_pt = p1;
+ t_root = 1;
+ } else {
+ dist = distance(p0 + t * (p1 - p0), pt);
+ closest_pt = p0 + t * (p1 - p0);
+ t_root = t;
+ }
+ } else if (path.num_control_points[base_point_id] == 1) {
+ // Quadratic Bezier curve
+ auto i0 = point_id;
+ auto i1 = point_id + 1;
+ auto i2 = (point_id + 2) % path.num_points;
+ auto p0 = Vector2f{path.points[2 * i0], path.points[2 * i0 + 1]};
+ auto p1 = Vector2f{path.points[2 * i1], path.points[2 * i1 + 1]};
+ auto p2 = Vector2f{path.points[2 * i2], path.points[2 * i2 + 1]};
+ if (path.use_distance_approx) {
+ closest_pt = quadratic_closest_pt_approx(p0, p1, p2, pt, &t_root);
+ dist = distance(closest_pt, pt);
+ } else {
+ auto eval = [&](float t) -> Vector2f {
+ auto tt = 1 - t;
+ return (tt*tt)*p0 + (2*tt*t)*p1 + (t*t)*p2;
+ };
+ auto pt0 = eval(0);
+ auto pt1 = eval(1);
+ auto dist0 = distance(pt0, pt);
+ auto dist1 = distance(pt1, pt);
+ {
+ dist = dist0;
+ closest_pt = pt0;
+ t_root = 0;
+ }
+ if (dist1 < dist) {
+ dist = dist1;
+ closest_pt = pt1;
+ t_root = 1;
+ }
+ // The curve is (1-t)^2p0 + 2(1-t)tp1 + t^2p2
+ // = (p0-2p1+p2)t^2+(-2p0+2p1)t+p0 = q
+ // Want to solve (q - pt) dot q' = 0
+ // q' = (p0-2p1+p2)t + (-p0+p1)
+ // Expanding (p0-2p1+p2)^2 t^3 +
+ // 3(p0-2p1+p2)(-p0+p1) t^2 +
+ // (2(-p0+p1)^2+(p0-2p1+p2)(p0-pt))t +
+ // (-p0+p1)(p0-pt) = 0
+ auto A = sum((p0-2*p1+p2)*(p0-2*p1+p2));
+ auto B = sum(3*(p0-2*p1+p2)*(-p0+p1));
+ auto C = sum(2*(-p0+p1)*(-p0+p1)+(p0-2*p1+p2)*(p0-pt));
+ auto D = sum((-p0+p1)*(p0-pt));
+ float t[3];
+ int num_sol = solve_cubic(A, B, C, D, t);
+ for (int j = 0; j < num_sol; j++) {
+ if (t[j] >= 0 && t[j] <= 1) {
+ auto p = eval(t[j]);
+ auto distp = distance(p, pt);
+ if (distp < dist) {
+ dist = distp;
+ closest_pt = p;
+ t_root = t[j];
+ }
+ }
+ }
+ }
+ } else if (path.num_control_points[base_point_id] == 2) {
+ // Cubic Bezier curve
+ auto i0 = point_id;
+ auto i1 = point_id + 1;
+ auto i2 = point_id + 2;
+ auto i3 = (point_id + 3) % path.num_points;
+ auto p0 = Vector2f{path.points[2 * i0], path.points[2 * i0 + 1]};
+ auto p1 = Vector2f{path.points[2 * i1], path.points[2 * i1 + 1]};
+ auto p2 = Vector2f{path.points[2 * i2], path.points[2 * i2 + 1]};
+ auto p3 = Vector2f{path.points[2 * i3], path.points[2 * i3 + 1]};
+ auto eval = [&](float t) -> Vector2f {
+ auto tt = 1 - t;
+ return (tt*tt*tt)*p0 + (3*tt*tt*t)*p1 + (3*tt*t*t)*p2 + (t*t*t)*p3;
+ };
+ auto pt0 = eval(0);
+ auto pt1 = eval(1);
+ auto dist0 = distance(pt0, pt);
+ auto dist1 = distance(pt1, pt);
+ {
+ dist = dist0;
+ closest_pt = pt0;
+ t_root = 0;
+ }
+ if (dist1 < dist) {
+ dist = dist1;
+ closest_pt = pt1;
+ t_root = 1;
+ }
+ // The curve is (1 - t)^3 p0 + 3 * (1 - t)^2 t p1 + 3 * (1 - t) t^2 p2 + t^3 p3
+ // = (-p0+3p1-3p2+p3) t^3 + (3p0-6p1+3p2) t^2 + (-3p0+3p1) t + p0
+ // Want to solve (q - pt) dot q' = 0
+ // q' = 3*(-p0+3p1-3p2+p3)t^2 + 2*(3p0-6p1+3p2)t + (-3p0+3p1)
+ // Expanding
+ // 3*(-p0+3p1-3p2+p3)^2 t^5
+ // 5*(-p0+3p1-3p2+p3)(3p0-6p1+3p2) t^4
+ // 4*(-p0+3p1-3p2+p3)(-3p0+3p1) + 2*(3p0-6p1+3p2)^2 t^3
+ // 3*(3p0-6p1+3p2)(-3p0+3p1) + 3*(-p0+3p1-3p2+p3)(p0-pt) t^2
+ // (-3p0+3p1)^2+2(p0-pt)(3p0-6p1+3p2) t
+ // (p0-pt)(-3p0+3p1)
+ double A = 3*sum((-p0+3*p1-3*p2+p3)*(-p0+3*p1-3*p2+p3));
+ double B = 5*sum((-p0+3*p1-3*p2+p3)*(3*p0-6*p1+3*p2));
+ double C = 4*sum((-p0+3*p1-3*p2+p3)*(-3*p0+3*p1)) + 2*sum((3*p0-6*p1+3*p2)*(3*p0-6*p1+3*p2));
+ double D = 3*(sum((3*p0-6*p1+3*p2)*(-3*p0+3*p1)) + sum((-p0+3*p1-3*p2+p3)*(p0-pt)));
+ double E = sum((-3*p0+3*p1)*(-3*p0+3*p1)) + 2*sum((p0-pt)*(3*p0-6*p1+3*p2));
+ double F = sum((p0-pt)*(-3*p0+3*p1));
+ // normalize the polynomial
+ B /= A;
+ C /= A;
+ D /= A;
+ E /= A;
+ F /= A;
+ // Isolator Polynomials:
+ // https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.133.2233&rep=rep1&type=pdf
+ // x/5 + B/25
+ // /-----------------------------------------------------
+ // 5x^4 + 4B x^3 + 3C x^2 + 2D x + E / x^5 + B x^4 + C x^3 + D x^2 + E x + F
+ // x^5 + 4B/5 x^4 + 3C/5 x^3 + 2D/5 x^2 + E/5 x
+ // ----------------------------------------------------
+ // B/5 x^4 + 2C/5 x^3 + 3D/5 x^2 + 4E/5 x + F
+ // B/5 x^4 + 4B^2/25 x^3 + 3BC/25 x^2 + 2BD/25 x + BE/25
+ // ----------------------------------------------------
+ // (2C/5 - 4B^2/25)x^3 + (3D/5-3BC/25)x^2 + (4E/5-2BD/25) + (F-BE/25)
+ auto p1A = ((2 / 5.f) * C - (4 / 25.f) * B * B);
+ auto p1B = ((3 / 5.f) * D - (3 / 25.f) * B * C);
+ auto p1C = ((4 / 5.f) * E - (2 / 25.f) * B * D);
+ auto p1D = F - B * E / 25.f;
+ // auto q1A = 1 / 5.f;
+ // auto q1B = B / 25.f;
+ // x/5 + B/25 = 0
+ // x = -B/5
+ auto q_root = -B/5.f;
+ double p_roots[3];
+ int num_sol = solve_cubic(p1A, p1B, p1C, p1D, p_roots);
+ float intervals[4];
+ if (q_root >= 0 && q_root <= 1) {
+ intervals[0] = q_root;
+ }
+ for (int j = 0; j < num_sol; j++) {
+ intervals[j + 1] = p_roots[j];
+ }
+ auto num_intervals = 1 + num_sol;
+ // sort intervals
+ for (int j = 1; j < num_intervals; j++) {
+ for (int k = j; k > 0 && intervals[k - 1] > intervals[k]; k--) {
+ auto tmp = intervals[k];
+ intervals[k] = intervals[k - 1];
+ intervals[k - 1] = tmp;
+ }
+ }
+ auto eval_polynomial = [&] (double t) {
+ return t*t*t*t*t+
+ B*t*t*t*t+
+ C*t*t*t+
+ D*t*t+
+ E*t+
+ F;
+ };
+ auto eval_polynomial_deriv = [&] (double t) {
+ return 5*t*t*t*t+
+ 4*B*t*t*t+
+ 3*C*t*t+
+ 2*D*t+
+ E;
+ };
+ auto lower_bound = 0.f;
+ for (int j = 0; j < num_intervals + 1; j++) {
+ if (j < num_intervals && intervals[j] < 0.f) {
+ continue;
+ }
+ auto upper_bound = j < num_intervals ?
+ min(intervals[j], 1.f) : 1.f;
+ auto lb = lower_bound;
+ auto ub = upper_bound;
+ auto lb_eval = eval_polynomial(lb);
+ auto ub_eval = eval_polynomial(ub);
+ if (lb_eval * ub_eval > 0) {
+ // Doesn't have root
+ continue;
+ }
+ if (lb_eval > ub_eval) {
+ swap_(lb, ub);
+ }
+ auto t = 0.5f * (lb + ub);
+ auto num_iter = 20;
+ for (int it = 0; it < num_iter; it++) {
+ if (!(t >= lb && t <= ub)) {
+ t = 0.5f * (lb + ub);
+ }
+ auto value = eval_polynomial(t);
+ if (fabs(value) < 1e-5f || it == num_iter - 1) {
+ break;
+ }
+ // The derivative may not be entirely accurate,
+ // but the bisection is going to handle this
+ if (value > 0.f) {
+ ub = t;
+ } else {
+ lb = t;
+ }
+ auto derivative = eval_polynomial_deriv(t);
+ t -= value / derivative;
+ }
+ auto p = eval(t);
+ auto distp = distance(p, pt);
+ if (distp < dist) {
+ dist = distp;
+ closest_pt = p;
+ t_root = t;
+ }
+ if (upper_bound >= 1.f) {
+ break;
+ }
+ lower_bound = upper_bound;
+ }
+ } else {
+ assert(false);
+ }
+ if (dist < min_dist) {
+ min_dist = dist;
+ ret_pt = closest_pt;
+ path_info->base_point_id = base_point_id;
+ path_info->point_id = point_id;
+ path_info->t_root = t_root;
+ found = true;
+ }
+ } else {
+ assert(node.child0 >= 0 && node.child1 >= 0);
+ const AABB &b0 = bvh_nodes[node.child0].box;
+ if (within_distance(b0, pt, min_dist)) {
+ bvh_stack[stack_size++] = node.child0;
+ }
+ const AABB &b1 = bvh_nodes[node.child1].box;
+ if (within_distance(b1, pt, min_dist)) {
+ bvh_stack[stack_size++] = node.child1;
+ }
+ assert(stack_size <= max_bvh_size);
+ }
+ }
+ if (found) {
+ assert(path_info->base_point_id < num_segments);
+ }
+ *result = ret_pt;
+ return found;
+}
+
+DEVICE
+inline
+bool closest_point(const Rect &rect, const Vector2f &pt,
+ Vector2f *result) {
+ auto min_dist = 0.f;
+ auto closest_pt = Vector2f{0, 0};
+ auto update = [&](const Vector2f &p0, const Vector2f &p1, bool first) {
+ // project pt to line
+ auto t = dot(pt - p0, p1 - p0) / dot(p1 - p0, p1 - p0);
+ if (t < 0) {
+ auto d = distance(p0, pt);
+ if (first || d < min_dist) {
+ min_dist = d;
+ closest_pt = p0;
+ }
+ } else if (t > 1) {
+ auto d = distance(p1, pt);
+ if (first || d < min_dist) {
+ min_dist = d;
+ closest_pt = p1;
+ }
+ } else {
+ auto p = p0 + t * (p1 - p0);
+ auto d = distance(p, pt);
+ if (first || d < min_dist) {
+ min_dist = d;
+ closest_pt = p0;
+ }
+ }
+ };
+ auto left_top = rect.p_min;
+ auto right_top = Vector2f{rect.p_max.x, rect.p_min.y};
+ auto left_bottom = Vector2f{rect.p_min.x, rect.p_max.y};
+ auto right_bottom = rect.p_max;
+ update(left_top, left_bottom, true);
+ update(left_top, right_top, false);
+ update(right_top, right_bottom, false);
+ update(left_bottom, right_bottom, false);
+ *result = closest_pt;
+ return true;
+}
+
+DEVICE
+inline
+bool closest_point(const Shape &shape, const BVHNode *bvh_nodes, const Vector2f &pt, float max_radius,
+ ClosestPointPathInfo *path_info,
+ Vector2f *result) {
+ switch (shape.type) {
+ case ShapeType::Circle:
+ return closest_point(*(const Circle *)shape.ptr, pt, result);
+ case ShapeType::Ellipse:
+ // https://www.geometrictools.com/Documentation/DistancePointEllipseEllipsoid.pdf
+ assert(false);
+ return false;
+ case ShapeType::Path:
+ return closest_point(*(const Path *)shape.ptr, bvh_nodes, pt, max_radius, path_info, result);
+ case ShapeType::Rect:
+ return closest_point(*(const Rect *)shape.ptr, pt, result);
+ }
+ assert(false);
+ return false;
+}
+
+DEVICE
+inline
+bool compute_distance(const SceneData &scene,
+ int shape_group_id,
+ const Vector2f &pt,
+ float max_radius,
+ int *min_shape_id,
+ Vector2f *closest_pt_,
+ ClosestPointPathInfo *path_info,
+ float *result) {
+ const ShapeGroup &shape_group = scene.shape_groups[shape_group_id];
+ // pt is in canvas space, transform it to shape's local space
+ auto local_pt = xform_pt(shape_group.canvas_to_shape, pt);
+
+ constexpr auto max_bvh_stack_size = 64;
+ int bvh_stack[max_bvh_stack_size];
+ auto stack_size = 0;
+ bvh_stack[stack_size++] = 2 * shape_group.num_shapes - 2;
+ const auto &bvh_nodes = scene.shape_groups_bvh_nodes[shape_group_id];
+
+ auto min_dist = max_radius;
+ auto found = false;
+
+ while (stack_size > 0) {
+ const BVHNode &node = bvh_nodes[bvh_stack[--stack_size]];
+ if (node.child1 < 0) {
+ // leaf
+ auto shape_id = node.child0;
+ const auto &shape = scene.shapes[shape_id];
+ ClosestPointPathInfo local_path_info{-1, -1};
+ auto local_closest_pt = Vector2f{0, 0};
+ if (closest_point(shape, scene.path_bvhs[shape_id], local_pt, max_radius, &local_path_info, &local_closest_pt)) {
+ auto closest_pt = xform_pt(shape_group.shape_to_canvas, local_closest_pt);
+ auto dist = distance(closest_pt, pt);
+ if (!found || dist < min_dist) {
+ found = true;
+ min_dist = dist;
+ if (min_shape_id != nullptr) {
+ *min_shape_id = shape_id;
+ }
+ if (closest_pt_ != nullptr) {
+ *closest_pt_ = closest_pt;
+ }
+ if (path_info != nullptr) {
+ *path_info = local_path_info;
+ }
+ }
+ }
+ } else {
+ assert(node.child0 >= 0 && node.child1 >= 0);
+ const AABB &b0 = bvh_nodes[node.child0].box;
+ if (inside(b0, local_pt, max_radius)) {
+ bvh_stack[stack_size++] = node.child0;
+ }
+ const AABB &b1 = bvh_nodes[node.child1].box;
+ if (inside(b1, local_pt, max_radius)) {
+ bvh_stack[stack_size++] = node.child1;
+ }
+ assert(stack_size <= max_bvh_stack_size);
+ }
+ }
+
+ *result = min_dist;
+ return found;
+}
+
+
+DEVICE
+inline
+void d_closest_point(const Circle &circle,
+ const Vector2f &pt,
+ const Vector2f &d_closest_pt,
+ Circle &d_circle,
+ Vector2f &d_pt) {
+ // return circle.center + circle.radius * normalize(pt - circle.center);
+ auto d_center = d_closest_pt *
+ (1 + d_normalize(pt - circle.center, circle.radius * d_closest_pt));
+ atomic_add(&d_circle.center.x, d_center);
+ atomic_add(&d_circle.radius, dot(d_closest_pt, normalize(pt - circle.center)));
+}
+
+DEVICE
+inline
+void d_closest_point(const Path &path,
+ const Vector2f &pt,
+ const Vector2f &d_closest_pt,
+ const ClosestPointPathInfo &path_info,
+ Path &d_path,
+ Vector2f &d_pt) {
+ auto base_point_id = path_info.base_point_id;
+ auto point_id = path_info.point_id;
+ auto min_t_root = path_info.t_root;
+
+ if (path.num_control_points[base_point_id] == 0) {
+ // Straight line
+ auto i0 = point_id;
+ auto i1 = (point_id + 1) % path.num_points;
+ auto p0 = Vector2f{path.points[2 * i0], path.points[2 * i0 + 1]};
+ auto p1 = Vector2f{path.points[2 * i1], path.points[2 * i1 + 1]};
+ // project pt to line
+ auto t = dot(pt - p0, p1 - p0) / dot(p1 - p0, p1 - p0);
+ auto d_p0 = Vector2f{0, 0};
+ auto d_p1 = Vector2f{0, 0};
+ if (t < 0) {
+ d_p0 += d_closest_pt;
+ } else if (t > 1) {
+ d_p1 += d_closest_pt;
+ } else {
+ auto d_p = d_closest_pt;
+ // p = p0 + t * (p1 - p0)
+ d_p0 += d_p * (1 - t);
+ d_p1 += d_p * t;
+ }
+ atomic_add(d_path.points + 2 * i0, d_p0);
+ atomic_add(d_path.points + 2 * i1, d_p1);
+ } else if (path.num_control_points[base_point_id] == 1) {
+ // Quadratic Bezier curve
+ auto i0 = point_id;
+ auto i1 = point_id + 1;
+ auto i2 = (point_id + 2) % path.num_points;
+ auto p0 = Vector2f{path.points[2 * i0], path.points[2 * i0 + 1]};
+ auto p1 = Vector2f{path.points[2 * i1], path.points[2 * i1 + 1]};
+ auto p2 = Vector2f{path.points[2 * i2], path.points[2 * i2 + 1]};
+ // auto eval = [&](float t) -> Vector2f {
+ // auto tt = 1 - t;
+ // return (tt*tt)*p0 + (2*tt*t)*p1 + (t*t)*p2;
+ // };
+ // auto dist0 = distance(eval(0), pt);
+ // auto dist1 = distance(eval(1), pt);
+ auto d_p0 = Vector2f{0, 0};
+ auto d_p1 = Vector2f{0, 0};
+ auto d_p2 = Vector2f{0, 0};
+ auto t = min_t_root;
+ if (t == 0) {
+ d_p0 += d_closest_pt;
+ } else if (t == 1) {
+ d_p2 += d_closest_pt;
+ } else {
+ // The curve is (1-t)^2p0 + 2(1-t)tp1 + t^2p2
+ // = (p0-2p1+p2)t^2+(-2p0+2p1)t+p0 = q
+ // Want to solve (q - pt) dot q' = 0
+ // q' = (p0-2p1+p2)t + (-p0+p1)
+ // Expanding (p0-2p1+p2)^2 t^3 +
+ // 3(p0-2p1+p2)(-p0+p1) t^2 +
+ // (2(-p0+p1)^2+(p0-2p1+p2)(p0-pt))t +
+ // (-p0+p1)(p0-pt) = 0
+ auto A = sum((p0-2*p1+p2)*(p0-2*p1+p2));
+ auto B = sum(3*(p0-2*p1+p2)*(-p0+p1));
+ auto C = sum(2*(-p0+p1)*(-p0+p1)+(p0-2*p1+p2)*(p0-pt));
+ // auto D = sum((-p0+p1)*(p0-pt));
+ auto d_p = d_closest_pt;
+ // p = eval(t)
+ auto tt = 1 - t;
+ // (tt*tt)*p0 + (2*tt*t)*p1 + (t*t)*p2
+ auto d_tt = 2 * tt * dot(d_p, p0) + 2 * t * dot(d_p, p1);
+ auto d_t = -d_tt + 2 * tt * dot(d_p, p1) + 2 * t * dot(d_p, p2);
+ auto d_p0 = d_p * tt * tt;
+ auto d_p1 = 2 * d_p * tt * t;
+ auto d_p2 = d_p * t * t;
+ // implicit function theorem: dt/dA = -1/(p'(t)) * dp/dA
+ auto poly_deriv_t = 3 * A * t * t + 2 * B * t + C;
+ if (fabs(poly_deriv_t) > 1e-6f) {
+ auto d_A = - (d_t / poly_deriv_t) * t * t * t;
+ auto d_B = - (d_t / poly_deriv_t) * t * t;
+ auto d_C = - (d_t / poly_deriv_t) * t;
+ auto d_D = - (d_t / poly_deriv_t);
+ // A = sum((p0-2*p1+p2)*(p0-2*p1+p2))
+ // B = sum(3*(p0-2*p1+p2)*(-p0+p1))
+ // C = sum(2*(-p0+p1)*(-p0+p1)+(p0-2*p1+p2)*(p0-pt))
+ // D = sum((-p0+p1)*(p0-pt))
+ d_p0 += 2*d_A*(p0-2*p1+p2)+
+ 3*d_B*((-p0+p1)-(p0-2*p1+p2))+
+ 2*d_C*(-2*(-p0+p1))+
+ d_C*((p0-pt)+(p0-2*p1+p2))+
+ 2*d_D*(-(p0-pt)+(-p0+p1));
+ d_p1 += (-2)*2*d_A*(p0-2*p1+p2)+
+ 3*d_B*(-2*(-p0+p1)+(p0-2*p1+p2))+
+ 2*d_C*(2*(-p0+p1))+
+ d_C*((-2)*(p0-pt))+
+ d_D*(p0-pt);
+ d_p2 += 2*d_A*(p0-2*p1+p2)+
+ 3*d_B*(-p0+p1)+
+ d_C*(p0-pt);
+ d_pt += d_C*(-(p0-2*p1+p2))+
+ d_D*(-(-p0+p1));
+ }
+ }
+ atomic_add(d_path.points + 2 * i0, d_p0);
+ atomic_add(d_path.points + 2 * i1, d_p1);
+ atomic_add(d_path.points + 2 * i2, d_p2);
+ } else if (path.num_control_points[base_point_id] == 2) {
+ // Cubic Bezier curve
+ auto i0 = point_id;
+ auto i1 = point_id + 1;
+ auto i2 = point_id + 2;
+ auto i3 = (point_id + 3) % path.num_points;
+ auto p0 = Vector2f{path.points[2 * i0], path.points[2 * i0 + 1]};
+ auto p1 = Vector2f{path.points[2 * i1], path.points[2 * i1 + 1]};
+ auto p2 = Vector2f{path.points[2 * i2], path.points[2 * i2 + 1]};
+ auto p3 = Vector2f{path.points[2 * i3], path.points[2 * i3 + 1]};
+ // auto eval = [&](float t) -> Vector2f {
+ // auto tt = 1 - t;
+ // return (tt*tt*tt)*p0 + (3*tt*tt*t)*p1 + (3*tt*t*t)*p2 + (t*t*t)*p3;
+ // };
+ auto d_p0 = Vector2f{0, 0};
+ auto d_p1 = Vector2f{0, 0};
+ auto d_p2 = Vector2f{0, 0};
+ auto d_p3 = Vector2f{0, 0};
+ auto t = min_t_root;
+ if (t == 0) {
+ // closest_pt = p0
+ d_p0 += d_closest_pt;
+ } else if (t == 1) {
+ // closest_pt = p1
+ d_p3 += d_closest_pt;
+ } else {
+ // The curve is (1 - t)^3 p0 + 3 * (1 - t)^2 t p1 + 3 * (1 - t) t^2 p2 + t^3 p3
+ // = (-p0+3p1-3p2+p3) t^3 + (3p0-6p1+3p2) t^2 + (-3p0+3p1) t + p0
+ // Want to solve (q - pt) dot q' = 0
+ // q' = 3*(-p0+3p1-3p2+p3)t^2 + 2*(3p0-6p1+3p2)t + (-3p0+3p1)
+ // Expanding
+ // 3*(-p0+3p1-3p2+p3)^2 t^5
+ // 5*(-p0+3p1-3p2+p3)(3p0-6p1+3p2) t^4
+ // 4*(-p0+3p1-3p2+p3)(-3p0+3p1) + 2*(3p0-6p1+3p2)^2 t^3
+ // 3*(3p0-6p1+3p2)(-3p0+3p1) + 3*(-p0+3p1-3p2+p3)(p0-pt) t^2
+ // (-3p0+3p1)^2+2(p0-pt)(3p0-6p1+3p2) t
+ // (p0-pt)(-3p0+3p1)
+ double A = 3*sum((-p0+3*p1-3*p2+p3)*(-p0+3*p1-3*p2+p3));
+ double B = 5*sum((-p0+3*p1-3*p2+p3)*(3*p0-6*p1+3*p2));
+ double C = 4*sum((-p0+3*p1-3*p2+p3)*(-3*p0+3*p1)) + 2*sum((3*p0-6*p1+3*p2)*(3*p0-6*p1+3*p2));
+ double D = 3*(sum((3*p0-6*p1+3*p2)*(-3*p0+3*p1)) + sum((-p0+3*p1-3*p2+p3)*(p0-pt)));
+ double E = sum((-3*p0+3*p1)*(-3*p0+3*p1)) + 2*sum((p0-pt)*(3*p0-6*p1+3*p2));
+ double F = sum((p0-pt)*(-3*p0+3*p1));
+ B /= A;
+ C /= A;
+ D /= A;
+ E /= A;
+ F /= A;
+ // auto eval_polynomial = [&] (double t) {
+ // return t*t*t*t*t+
+ // B*t*t*t*t+
+ // C*t*t*t+
+ // D*t*t+
+ // E*t+
+ // F;
+ // };
+ auto eval_polynomial_deriv = [&] (double t) {
+ return 5*t*t*t*t+
+ 4*B*t*t*t+
+ 3*C*t*t+
+ 2*D*t+
+ E;
+ };
+
+ // auto p = eval(t);
+ auto d_p = d_closest_pt;
+ // (tt*tt*tt)*p0 + (3*tt*tt*t)*p1 + (3*tt*t*t)*p2 + (t*t*t)*p3
+ auto tt = 1 - t;
+ auto d_tt = 3 * tt * tt * dot(d_p, p0) +
+ 6 * tt * t * dot(d_p, p1) +
+ 3 * t * t * dot(d_p, p2);
+ auto d_t = -d_tt +
+ 3 * tt * tt * dot(d_p, p1) +
+ 6 * tt * t * dot(d_p, p2) +
+ 3 * t * t * dot(d_p, p3);
+ d_p0 += d_p * (tt * tt * tt);
+ d_p1 += d_p * (3 * tt * tt * t);
+ d_p2 += d_p * (3 * tt * t * t);
+ d_p3 += d_p * (t * t * t);
+ // implicit function theorem: dt/dA = -1/(p'(t)) * dp/dA
+ auto poly_deriv_t = eval_polynomial_deriv(t);
+ if (fabs(poly_deriv_t) > 1e-10f) {
+ auto d_B = -(d_t / poly_deriv_t) * t * t * t * t;
+ auto d_C = -(d_t / poly_deriv_t) * t * t * t;
+ auto d_D = -(d_t / poly_deriv_t) * t * t;
+ auto d_E = -(d_t / poly_deriv_t) * t;
+ auto d_F = -(d_t / poly_deriv_t);
+ // B = B' / A
+ // C = C' / A
+ // D = D' / A
+ // E = E' / A
+ // F = F' / A
+ auto d_A = -d_B * B / A
+ -d_C * C / A
+ -d_D * D / A
+ -d_E * E / A
+ -d_F * F / A;
+ d_B /= A;
+ d_C /= A;
+ d_D /= A;
+ d_E /= A;
+ d_F /= A;
+ {
+ double A = 3*sum((-p0+3*p1-3*p2+p3)*(-p0+3*p1-3*p2+p3)) + 1e-3;
+ double B = 5*sum((-p0+3*p1-3*p2+p3)*(3*p0-6*p1+3*p2));
+ double C = 4*sum((-p0+3*p1-3*p2+p3)*(-3*p0+3*p1)) + 2*sum((3*p0-6*p1+3*p2)*(3*p0-6*p1+3*p2));
+ double D = 3*(sum((3*p0-6*p1+3*p2)*(-3*p0+3*p1)) + sum((-p0+3*p1-3*p2+p3)*(p0-pt)));
+ double E = sum((-3*p0+3*p1)*(-3*p0+3*p1)) + 2*sum((p0-pt)*(3*p0-6*p1+3*p2));
+ double F = sum((p0-pt)*(-3*p0+3*p1));
+ B /= A;
+ C /= A;
+ D /= A;
+ E /= A;
+ F /= A;
+ auto eval_polynomial = [&] (double t) {
+ return t*t*t*t*t+
+ B*t*t*t*t+
+ C*t*t*t+
+ D*t*t+
+ E*t+
+ F;
+ };
+ auto eval_polynomial_deriv = [&] (double t) {
+ return 5*t*t*t*t+
+ 4*B*t*t*t+
+ 3*C*t*t+
+ 2*D*t+
+ E;
+ };
+ auto lb = t - 1e-2f;
+ auto ub = t + 1e-2f;
+ auto lb_eval = eval_polynomial(lb);
+ auto ub_eval = eval_polynomial(ub);
+ if (lb_eval > ub_eval) {
+ swap_(lb, ub);
+ }
+ auto t_ = 0.5f * (lb + ub);
+ auto num_iter = 20;
+ for (int it = 0; it < num_iter; it++) {
+ if (!(t_ >= lb && t_ <= ub)) {
+ t_ = 0.5f * (lb + ub);
+ }
+ auto value = eval_polynomial(t_);
+ if (fabs(value) < 1e-5f || it == num_iter - 1) {
+ break;
+ }
+ // The derivative may not be entirely accurate,
+ // but the bisection is going to handle this
+ if (value > 0.f) {
+ ub = t_;
+ } else {
+ lb = t_;
+ }
+ auto derivative = eval_polynomial_deriv(t);
+ t_ -= value / derivative;
+ }
+ }
+ // A = 3*sum((-p0+3*p1-3*p2+p3)*(-p0+3*p1-3*p2+p3))
+ d_p0 += d_A * 3 * (-1) * 2 * (-p0+3*p1-3*p2+p3);
+ d_p1 += d_A * 3 * 3 * 2 * (-p0+3*p1-3*p2+p3);
+ d_p2 += d_A * 3 * (-3) * 2 * (-p0+3*p1-3*p2+p3);
+ d_p3 += d_A * 3 * 1 * 2 * (-p0+3*p1-3*p2+p3);
+ // B = 5*sum((-p0+3*p1-3*p2+p3)*(3*p0-6*p1+3*p2))
+ d_p0 += d_B * 5 * ((-1) * (3*p0-6*p1+3*p2) + 3 * (-p0+3*p1-3*p2+p3));
+ d_p1 += d_B * 5 * (3 * (3*p0-6*p1+3*p2) + (-6) * (-p0+3*p1-3*p2+p3));
+ d_p2 += d_B * 5 * ((-3) * (3*p0-6*p1+3*p2) + 3 * (-p0+3*p1-3*p2+p3));
+ d_p3 += d_B * 5 * (3*p0-6*p1+3*p2);
+ // C = 4*sum((-p0+3*p1-3*p2+p3)*(-3*p0+3*p1)) + 2*sum((3*p0-6*p1+3*p2)*(3*p0-6*p1+3*p2))
+ d_p0 += d_C * 4 * ((-1) * (-3*p0+3*p1) + (-3) * (-p0+3*p1-3*p2+p3)) +
+ d_C * 2 * (3 * 2 * (3*p0-6*p1+3*p2));
+ d_p1 += d_C * 4 * (3 * (-3*p0+3*p1) + 3 * (-p0+3*p1-3*p2+p3)) +
+ d_C * 2 * ((-6) * 2 * (3*p0-6*p1+3*p2));
+ d_p2 += d_C * 4 * ((-3) * (-3*p0+3*p1)) +
+ d_C * 2 * (3 * 2 * (3*p0-6*p1+3*p2));
+ d_p3 += d_C * 4 * (-3*p0+3*p1);
+ // D = 3*(sum((3*p0-6*p1+3*p2)*(-3*p0+3*p1)) + sum((-p0+3*p1-3*p2+p3)*(p0-pt)))
+ d_p0 += d_D * 3 * (3 * (-3*p0+3*p1) + (-3) * (3*p0-6*p1+3*p2)) +
+ d_D * 3 * ((-1) * (p0-pt) + 1 * (-p0+3*p1-3*p2+p3));
+ d_p1 += d_D * 3 * ((-6) * (-3*p0+3*p1) + (3) * (3*p0-6*p1+3*p2)) +
+ d_D * 3 * (3 * (p0-pt));
+ d_p2 += d_D * 3 * (3 * (-3*p0+3*p1)) +
+ d_D * 3 * ((-3) * (p0-pt));
+ d_pt += d_D * 3 * ((-1) * (-p0+3*p1-3*p2+p3));
+ // E = sum((-3*p0+3*p1)*(-3*p0+3*p1)) + 2*sum((p0-pt)*(3*p0-6*p1+3*p2))
+ d_p0 += d_E * ((-3) * 2 * (-3*p0+3*p1)) +
+ d_E * 2 * (1 * (3*p0-6*p1+3*p2) + 3 * (p0-pt));
+ d_p1 += d_E * ( 3 * 2 * (-3*p0+3*p1)) +
+ d_E * 2 * ((-6) * (p0-pt));
+ d_p2 += d_E * 2 * ( 3 * (p0-pt));
+ d_pt += d_E * 2 * ((-1) * (3*p0-6*p1+3*p2));
+ // F = sum((p0-pt)*(-3*p0+3*p1))
+ d_p0 += d_F * (1 * (-3*p0+3*p1)) +
+ d_F * ((-3) * (p0-pt));
+ d_p1 += d_F * (3 * (p0-pt));
+ d_pt += d_F * ((-1) * (-3*p0+3*p1));
+ }
+ }
+ atomic_add(d_path.points + 2 * i0, d_p0);
+ atomic_add(d_path.points + 2 * i1, d_p1);
+ atomic_add(d_path.points + 2 * i2, d_p2);
+ atomic_add(d_path.points + 2 * i3, d_p3);
+ } else {
+ assert(false);
+ }
+}
+
+DEVICE
+inline
+void d_closest_point(const Rect &rect,
+ const Vector2f &pt,
+ const Vector2f &d_closest_pt,
+ Rect &d_rect,
+ Vector2f &d_pt) {
+ auto dist = [&](const Vector2f &p0, const Vector2f &p1) -> float {
+ // project pt to line
+ auto t = dot(pt - p0, p1 - p0) / dot(p1 - p0, p1 - p0);
+ if (t < 0) {
+ return distance(p0, pt);
+ } else if (t > 1) {
+ return distance(p1, pt);
+ } else {
+ return distance(p0 + t * (p1 - p0), pt);
+ }
+ // return 0;
+ };
+ auto left_top = rect.p_min;
+ auto right_top = Vector2f{rect.p_max.x, rect.p_min.y};
+ auto left_bottom = Vector2f{rect.p_min.x, rect.p_max.y};
+ auto right_bottom = rect.p_max;
+ auto left_dist = dist(left_top, left_bottom);
+ auto top_dist = dist(left_top, right_top);
+ auto right_dist = dist(right_top, right_bottom);
+ auto bottom_dist = dist(left_bottom, right_bottom);
+ int min_id = 0;
+ auto min_dist = left_dist;
+ if (top_dist < min_dist) { min_dist = top_dist; min_id = 1; }
+ if (right_dist < min_dist) { min_dist = right_dist; min_id = 2; }
+ if (bottom_dist < min_dist) { min_dist = bottom_dist; min_id = 3; }
+
+ auto d_update = [&](const Vector2f &p0, const Vector2f &p1,
+ const Vector2f &d_closest_pt,
+ Vector2f &d_p0, Vector2f &d_p1) {
+ // project pt to line
+ auto t = dot(pt - p0, p1 - p0) / dot(p1 - p0, p1 - p0);
+ if (t < 0) {
+ d_p0 += d_closest_pt;
+ } else if (t > 1) {
+ d_p1 += d_closest_pt;
+ } else {
+ // p = p0 + t * (p1 - p0)
+ auto d_p = d_closest_pt;
+ d_p0 += d_p * (1 - t);
+ d_p1 += d_p * t;
+ auto d_t = sum(d_p * (p1 - p0));
+ // t = dot(pt - p0, p1 - p0) / dot(p1 - p0, p1 - p0)
+ auto d_numerator = d_t / dot(p1 - p0, p1 - p0);
+ auto d_denominator = d_t * (-t) / dot(p1 - p0, p1 - p0);
+ // numerator = dot(pt - p0, p1 - p0)
+ d_pt += (p1 - p0) * d_numerator;
+ d_p1 += (pt - p0) * d_numerator;
+ d_p0 += ((p0 - p1) + (p0 - pt)) * d_numerator;
+ // denominator = dot(p1 - p0, p1 - p0)
+ d_p1 += 2 * (p1 - p0) * d_denominator;
+ d_p0 += 2 * (p0 - p1) * d_denominator;
+ }
+ };
+ auto d_left_top = Vector2f{0, 0};
+ auto d_right_top = Vector2f{0, 0};
+ auto d_left_bottom = Vector2f{0, 0};
+ auto d_right_bottom = Vector2f{0, 0};
+ if (min_id == 0) {
+ d_update(left_top, left_bottom, d_closest_pt, d_left_top, d_left_bottom);
+ } else if (min_id == 1) {
+ d_update(left_top, right_top, d_closest_pt, d_left_top, d_right_top);
+ } else if (min_id == 2) {
+ d_update(right_top, right_bottom, d_closest_pt, d_right_top, d_right_bottom);
+ } else {
+ assert(min_id == 3);
+ d_update(left_bottom, right_bottom, d_closest_pt, d_left_bottom, d_right_bottom);
+ }
+ auto d_p_min = Vector2f{0, 0};
+ auto d_p_max = Vector2f{0, 0};
+ // left_top = rect.p_min
+ // right_top = Vector2f{rect.p_max.x, rect.p_min.y}
+ // left_bottom = Vector2f{rect.p_min.x, rect.p_max.y}
+ // right_bottom = rect.p_max
+ d_p_min += d_left_top;
+ d_p_max.x += d_right_top.x;
+ d_p_min.y += d_right_top.y;
+ d_p_min.x += d_left_bottom.x;
+ d_p_max.y += d_left_bottom.y;
+ d_p_max += d_right_bottom;
+ atomic_add(d_rect.p_min, d_p_min);
+ atomic_add(d_rect.p_max, d_p_max);
+}
+
+DEVICE
+inline
+void d_closest_point(const Shape &shape,
+ const Vector2f &pt,
+ const Vector2f &d_closest_pt,
+ const ClosestPointPathInfo &path_info,
+ Shape &d_shape,
+ Vector2f &d_pt) {
+ switch (shape.type) {
+ case ShapeType::Circle:
+ d_closest_point(*(const Circle *)shape.ptr,
+ pt,
+ d_closest_pt,
+ *(Circle *)d_shape.ptr,
+ d_pt);
+ break;
+ case ShapeType::Ellipse:
+ // https://www.geometrictools.com/Documentation/DistancePointEllipseEllipsoid.pdf
+ assert(false);
+ break;
+ case ShapeType::Path:
+ d_closest_point(*(const Path *)shape.ptr,
+ pt,
+ d_closest_pt,
+ path_info,
+ *(Path *)d_shape.ptr,
+ d_pt);
+ break;
+ case ShapeType::Rect:
+ d_closest_point(*(const Rect *)shape.ptr,
+ pt,
+ d_closest_pt,
+ *(Rect *)d_shape.ptr,
+ d_pt);
+ break;
+ }
+}
+
+DEVICE
+inline
+void d_compute_distance(const Matrix3x3f &canvas_to_shape,
+ const Matrix3x3f &shape_to_canvas,
+ const Shape &shape,
+ const Vector2f &pt,
+ const Vector2f &closest_pt,
+ const ClosestPointPathInfo &path_info,
+ float d_dist,
+ Matrix3x3f &d_shape_to_canvas,
+ Shape &d_shape,
+ float *d_translation) {
+ if (distance_squared(pt, closest_pt) < 1e-10f) {
+ // The derivative at distance=0 is undefined
+ return;
+ }
+ assert(isfinite(d_dist));
+ // pt is in canvas space, transform it to shape's local space
+ auto local_pt = xform_pt(canvas_to_shape, pt);
+ auto local_closest_pt = xform_pt(canvas_to_shape, closest_pt);
+ // auto local_closest_pt = closest_point(shape, local_pt);
+ // auto closest_pt = xform_pt(shape_group.shape_to_canvas, local_closest_pt);
+ // auto dist = distance(closest_pt, pt);
+ auto d_pt = Vector2f{0, 0};
+ auto d_closest_pt = Vector2f{0, 0};
+ d_distance(closest_pt, pt, d_dist, d_closest_pt, d_pt);
+ assert(isfinite(d_pt));
+ assert(isfinite(d_closest_pt));
+ // auto closest_pt = xform_pt(shape_group.shape_to_canvas, local_closest_pt);
+ auto d_local_closest_pt = Vector2f{0, 0};
+ auto d_shape_to_canvas_ = Matrix3x3f();
+ d_xform_pt(shape_to_canvas, local_closest_pt, d_closest_pt,
+ d_shape_to_canvas_, d_local_closest_pt);
+ assert(isfinite(d_local_closest_pt));
+ auto d_local_pt = Vector2f{0, 0};
+ d_closest_point(shape, local_pt, d_local_closest_pt, path_info, d_shape, d_local_pt);
+ assert(isfinite(d_local_pt));
+ auto d_canvas_to_shape = Matrix3x3f();
+ d_xform_pt(canvas_to_shape,
+ pt,
+ d_local_pt,
+ d_canvas_to_shape,
+ d_pt);
+ // http://jack.valmadre.net/notes/2016/09/04/back-prop-differentials/#back-propagation-using-differentials
+ auto tc2s = transpose(canvas_to_shape);
+ d_shape_to_canvas_ += -tc2s * d_canvas_to_shape * tc2s;
+ atomic_add(&d_shape_to_canvas(0, 0), d_shape_to_canvas_);
+ if (d_translation != nullptr) {
+ atomic_add(d_translation, -d_pt);
+ }
+}
diff --git a/config/base.yaml b/config/base.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..35941a460665c58db80aad4db1aeaf99119a0b8f
--- /dev/null
+++ b/config/base.yaml
@@ -0,0 +1,91 @@
+
+default:
+ use_ycrcb: False
+ seginit:
+ type: circle
+ radius: 5
+ save:
+ init: false
+ image: false
+ output: true
+ video: false
+ loss: false
+ trainable:
+ bg: False
+ record: True
+ stroke: False
+# num_segments: 4
+ num_iter: 500
+ lr_base:
+ bg: 0.01
+ point: 1
+ color: 0.01
+ stroke_width: null
+ stroke_color: null
+ coord_init:
+ type: sparse
+ seed: 0
+ loss:
+ use_l1_loss: false
+ use_distance_weighted_loss: true
+ xing_loss_weight: 0.01
+ bis_loss_weight: null
+
+
+experiment_1x1:
+ path_schedule:
+ type: repeat
+ max_path: 1
+ schedule_each: 1
+
+experiment_4x1:
+ path_schedule:
+ type: repeat
+ max_path: 4
+ schedule_each: 1
+
+experiment_5x1:
+ path_schedule:
+ type: repeat
+ max_path: 5
+ schedule_each: 1
+
+experiment_8x1:
+ path_schedule:
+ type: repeat
+ max_path: 8
+ schedule_each: 1
+
+experiment_16x1:
+ path_schedule:
+ type: repeat
+ max_path: 16
+ schedule_each: 1
+
+experiment_32x1:
+ path_schedule:
+ type: repeat
+ max_path: 32
+ schedule_each: 1
+
+experiment_1357:
+ path_schedule:
+ type: list
+ schedule: [1, 3, 5, 7]
+
+
+experiment_exp2_256:
+ path_schedule:
+ type: exp
+ base: 2
+ max_path: 256
+ max_path_per_iter: 32
+
+
+experiment_exp2_128:
+ path_schedule:
+ type: exp
+ base: 2
+ max_path: 128
+ max_path_per_iter: 32
+
diff --git a/cuda_utils.h b/cuda_utils.h
new file mode 100644
index 0000000000000000000000000000000000000000..1e4609babc129a27397df72879bd6c8f55e71d1a
--- /dev/null
+++ b/cuda_utils.h
@@ -0,0 +1,53 @@
+#pragma once
+
+#ifdef __CUDACC__
+ #include
+ #include
+#endif
+#include
+#include
+#include
+
+#ifdef __CUDACC__
+#define checkCuda(x) do { if((x)!=cudaSuccess) { \
+ printf("CUDA Runtime Error: %s at %s:%d\n",\
+ cudaGetErrorString(x),__FILE__,__LINE__);\
+ exit(1);}} while(0)
+#endif
+
+template
+DEVICE
+inline T infinity() {
+#ifdef __CUDA_ARCH__
+ const unsigned long long ieee754inf = 0x7ff0000000000000;
+ return __longlong_as_double(ieee754inf);
+#else
+ return std::numeric_limits::infinity();
+#endif
+}
+
+template <>
+DEVICE
+inline double infinity() {
+#ifdef __CUDA_ARCH__
+ return __longlong_as_double(0x7ff0000000000000ULL);
+#else
+ return std::numeric_limits::infinity();
+#endif
+}
+
+template <>
+DEVICE
+inline float infinity() {
+#ifdef __CUDA_ARCH__
+ return __int_as_float(0x7f800000);
+#else
+ return std::numeric_limits::infinity();
+#endif
+}
+
+inline void cuda_synchronize() {
+#ifdef __CUDACC__
+ checkCuda(cudaDeviceSynchronize());
+#endif
+}
diff --git a/data/demo1.png b/data/demo1.png
new file mode 100644
index 0000000000000000000000000000000000000000..5705c2ff34aa0df1cffe65d5e5be7b41a607224c
Binary files /dev/null and b/data/demo1.png differ
diff --git a/data/demo2.jpg b/data/demo2.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..7ccdcf7d82f4dae849ceec62f68d10a6acddbcdd
Binary files /dev/null and b/data/demo2.jpg differ
diff --git a/data/demo3.png b/data/demo3.png
new file mode 100644
index 0000000000000000000000000000000000000000..6355c30cb8be9014029029f9b69453bae47c8b80
Binary files /dev/null and b/data/demo3.png differ
diff --git a/diffvg.cpp b/diffvg.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..7346d24b758b135bdd402fdb67ea412f48419eb3
--- /dev/null
+++ b/diffvg.cpp
@@ -0,0 +1,1792 @@
+#include "diffvg.h"
+#include "aabb.h"
+#include "shape.h"
+#include "sample_boundary.h"
+#include "atomic.h"
+#include "cdf.h"
+#include "compute_distance.h"
+#include "cuda_utils.h"
+#include "edge_query.h"
+#include "filter.h"
+#include "matrix.h"
+#include "parallel.h"
+#include "pcg.h"
+#include "ptr.h"
+#include "scene.h"
+#include "vector.h"
+#include "winding_number.h"
+#include "within_distance.h"
+#include
+#include
+#include
+#include
+#include
+
+namespace py = pybind11;
+
+struct Command {
+ int shape_group_id;
+ int shape_id;
+ int point_id; // Only used by path
+};
+
+DEVICE
+bool is_inside(const SceneData &scene_data,
+ int shape_group_id,
+ const Vector2f &pt,
+ EdgeQuery *edge_query) {
+ const ShapeGroup &shape_group = scene_data.shape_groups[shape_group_id];
+ // pt is in canvas space, transform it to shape's local space
+ auto local_pt = xform_pt(shape_group.canvas_to_shape, pt);
+ const auto &bvh_nodes = scene_data.shape_groups_bvh_nodes[shape_group_id];
+ const AABB &bbox = bvh_nodes[2 * shape_group.num_shapes - 2].box;
+ if (!inside(bbox, local_pt)) {
+ return false;
+ }
+ auto winding_number = 0;
+ // Traverse the shape group BVH
+ constexpr auto max_bvh_stack_size = 64;
+ int bvh_stack[max_bvh_stack_size];
+ auto stack_size = 0;
+ bvh_stack[stack_size++] = 2 * shape_group.num_shapes - 2;
+ while (stack_size > 0) {
+ const BVHNode &node = bvh_nodes[bvh_stack[--stack_size]];
+ if (node.child1 < 0) {
+ // leaf
+ auto shape_id = node.child0;
+ auto w = compute_winding_number(
+ scene_data.shapes[shape_id], scene_data.path_bvhs[shape_id], local_pt);
+ winding_number += w;
+ if (edge_query != nullptr) {
+ if (edge_query->shape_group_id == shape_group_id &&
+ edge_query->shape_id == shape_id) {
+ if ((shape_group.use_even_odd_rule && abs(w) % 2 == 1) ||
+ (!shape_group.use_even_odd_rule && w != 0)) {
+ edge_query->hit = true;
+ }
+ }
+ }
+ } else {
+ assert(node.child0 >= 0 && node.child1 >= 0);
+ const AABB &b0 = bvh_nodes[node.child0].box;
+ if (inside(b0, local_pt)) {
+ bvh_stack[stack_size++] = node.child0;
+ }
+ const AABB &b1 = bvh_nodes[node.child1].box;
+ if (inside(b1, local_pt)) {
+ bvh_stack[stack_size++] = node.child1;
+ }
+ assert(stack_size <= max_bvh_stack_size);
+ }
+ }
+ if (shape_group.use_even_odd_rule) {
+ return abs(winding_number) % 2 == 1;
+ } else {
+ return winding_number != 0;
+ }
+}
+
+DEVICE void accumulate_boundary_gradient(const Shape &shape,
+ float contrib,
+ float t,
+ const Vector2f &normal,
+ const BoundaryData &boundary_data,
+ Shape &d_shape,
+ const Matrix3x3f &shape_to_canvas,
+ const Vector2f &local_boundary_pt,
+ Matrix3x3f &d_shape_to_canvas) {
+ assert(isfinite(contrib));
+ assert(isfinite(normal));
+ // According to Reynold transport theorem,
+ // the Jacobian of the boundary integral is dot(velocity, normal),
+ // where the velocity depends on the variable being differentiated with.
+ if (boundary_data.is_stroke) {
+ auto has_path_thickness = false;
+ if (shape.type == ShapeType::Path) {
+ const Path &path = *(const Path *)shape.ptr;
+ has_path_thickness = path.thickness != nullptr;
+ }
+ // differentiate stroke width: velocity is the same as normal
+ if (has_path_thickness) {
+ Path *d_p = (Path*)d_shape.ptr;
+ auto base_point_id = boundary_data.path.base_point_id;
+ auto point_id = boundary_data.path.point_id;
+ auto t = boundary_data.path.t;
+ const Path &path = *(const Path *)shape.ptr;
+ if (path.num_control_points[base_point_id] == 0) {
+ // Straight line
+ auto i0 = point_id;
+ auto i1 = (point_id + 1) % path.num_points;
+ // r = r0 + t * (r1 - r0)
+ atomic_add(&d_p->thickness[i0], (1 - t) * contrib);
+ atomic_add(&d_p->thickness[i1], ( t) * contrib);
+ } else if (path.num_control_points[base_point_id] == 1) {
+ // Quadratic Bezier curve
+ auto i0 = point_id;
+ auto i1 = point_id + 1;
+ auto i2 = (point_id + 2) % path.num_points;
+ // r = (1-t)^2r0 + 2(1-t)t r1 + t^2 r2
+ atomic_add(&d_p->thickness[i0], square(1 - t) * contrib);
+ atomic_add(&d_p->thickness[i1], (2*(1-t)*t) * contrib);
+ atomic_add(&d_p->thickness[i2], (t*t) * contrib);
+ } else if (path.num_control_points[base_point_id] == 2) {
+ auto i0 = point_id;
+ auto i1 = point_id + 1;
+ auto i2 = point_id + 2;
+ auto i3 = (point_id + 3) % path.num_points;
+ // r = (1-t)^3r0 + 3*(1-t)^2tr1 + 3*(1-t)t^2r2 + t^3r3
+ atomic_add(&d_p->thickness[i0], cubic(1 - t) * contrib);
+ atomic_add(&d_p->thickness[i1], 3 * square(1 - t) * t * contrib);
+ atomic_add(&d_p->thickness[i2], 3 * (1 - t) * t * t * contrib);
+ atomic_add(&d_p->thickness[i3], t * t * t * contrib);
+ } else {
+ assert(false);
+ }
+ } else {
+ atomic_add(&d_shape.stroke_width, contrib);
+ }
+ }
+ switch (shape.type) {
+ case ShapeType::Circle: {
+ Circle *d_p = (Circle*)d_shape.ptr;
+ // velocity for the center is (1, 0) for x and (0, 1) for y
+ atomic_add(&d_p->center[0], normal * contrib);
+ // velocity for the radius is the same as the normal
+ atomic_add(&d_p->radius, contrib);
+ break;
+ } case ShapeType::Ellipse: {
+ Ellipse *d_p = (Ellipse*)d_shape.ptr;
+ // velocity for the center is (1, 0) for x and (0, 1) for y
+ atomic_add(&d_p->center[0], normal * contrib);
+ // velocity for the radius:
+ // x = center.x + r.x * cos(2pi * t)
+ // y = center.y + r.y * sin(2pi * t)
+ // for r.x: (cos(2pi * t), 0)
+ // for r.y: (0, sin(2pi * t))
+ atomic_add(&d_p->radius.x, cos(2 * float(M_PI) * t) * normal.x * contrib);
+ atomic_add(&d_p->radius.y, sin(2 * float(M_PI) * t) * normal.y * contrib);
+ break;
+ } case ShapeType::Path: {
+ Path *d_p = (Path*)d_shape.ptr;
+ auto base_point_id = boundary_data.path.base_point_id;
+ auto point_id = boundary_data.path.point_id;
+ auto t = boundary_data.path.t;
+ const Path &path = *(const Path *)shape.ptr;
+ if (path.num_control_points[base_point_id] == 0) {
+ // Straight line
+ auto i0 = point_id;
+ auto i1 = (point_id + 1) % path.num_points;
+ // pt = p0 + t * (p1 - p0)
+ // velocity for p0.x: (1 - t, 0)
+ // p0.y: ( 0, 1 - t)
+ // p1.x: ( t, 0)
+ // p1.y: ( 0, t)
+ atomic_add(&d_p->points[2 * i0 + 0], (1 - t) * normal.x * contrib);
+ atomic_add(&d_p->points[2 * i0 + 1], (1 - t) * normal.y * contrib);
+ atomic_add(&d_p->points[2 * i1 + 0], ( t) * normal.x * contrib);
+ atomic_add(&d_p->points[2 * i1 + 1], ( t) * normal.y * contrib);
+ } else if (path.num_control_points[base_point_id] == 1) {
+ // Quadratic Bezier curve
+ auto i0 = point_id;
+ auto i1 = point_id + 1;
+ auto i2 = (point_id + 2) % path.num_points;
+ // pt = (1-t)^2p0 + 2(1-t)t p1 + t^2 p2
+ // velocity for p0.x: ((1-t)^2, 0)
+ // p0.y: ( 0, (1-t)^2)
+ // p1.x: (2(1-t)t, 0)
+ // p1.y: ( 0, 2(1-t)t)
+ // p1.x: ( t^2, 0)
+ // p1.y: ( 0, t^2)
+ atomic_add(&d_p->points[2 * i0 + 0], square(1 - t) * normal.x * contrib);
+ atomic_add(&d_p->points[2 * i0 + 1], square(1 - t) * normal.y * contrib);
+ atomic_add(&d_p->points[2 * i1 + 0], (2*(1-t)*t) * normal.x * contrib);
+ atomic_add(&d_p->points[2 * i1 + 1], (2*(1-t)*t) * normal.y * contrib);
+ atomic_add(&d_p->points[2 * i2 + 0], (t*t) * normal.x * contrib);
+ atomic_add(&d_p->points[2 * i2 + 1], (t*t) * normal.y * contrib);
+ } else if (path.num_control_points[base_point_id] == 2) {
+ auto i0 = point_id;
+ auto i1 = point_id + 1;
+ auto i2 = point_id + 2;
+ auto i3 = (point_id + 3) % path.num_points;
+ // pt = (1-t)^3p0 + 3*(1-t)^2tp1 + 3*(1-t)t^2p2 + t^3p3
+ // velocity for p0.x: ( (1-t)^3, 0)
+ // p0.y: ( 0, (1-t)^3)
+ // p1.x: (3*(1-t)^2t, 0)
+ // p1.y: ( 0, 3*(1-t)^2t)
+ // p2.x: (3*(1-t)t^2, 0)
+ // p2.y: ( 0, 3*(1-t)t^2)
+ // p2.x: ( t^3, 0)
+ // p2.y: ( 0, t^3)
+ atomic_add(&d_p->points[2 * i0 + 0], cubic(1 - t) * normal.x * contrib);
+ atomic_add(&d_p->points[2 * i0 + 1], cubic(1 - t) * normal.y * contrib);
+ atomic_add(&d_p->points[2 * i1 + 0], 3 * square(1 - t) * t * normal.x * contrib);
+ atomic_add(&d_p->points[2 * i1 + 1], 3 * square(1 - t) * t * normal.y * contrib);
+ atomic_add(&d_p->points[2 * i2 + 0], 3 * (1 - t) * t * t * normal.x * contrib);
+ atomic_add(&d_p->points[2 * i2 + 1], 3 * (1 - t) * t * t * normal.y * contrib);
+ atomic_add(&d_p->points[2 * i3 + 0], t * t * t * normal.x * contrib);
+ atomic_add(&d_p->points[2 * i3 + 1], t * t * t * normal.y * contrib);
+ } else {
+ assert(false);
+ }
+ break;
+ } case ShapeType::Rect: {
+ Rect *d_p = (Rect*)d_shape.ptr;
+ // The velocity depends on the position of the boundary
+ if (normal == Vector2f{-1, 0}) {
+ // left
+ // velocity for p_min is (1, 0) for x and (0, 0) for y
+ atomic_add(&d_p->p_min.x, -contrib);
+ } else if (normal == Vector2f{1, 0}) {
+ // right
+ // velocity for p_max is (1, 0) for x and (0, 0) for y
+ atomic_add(&d_p->p_max.x, contrib);
+ } else if (normal == Vector2f{0, -1}) {
+ // top
+ // velocity for p_min is (0, 0) for x and (0, 1) for y
+ atomic_add(&d_p->p_min.y, -contrib);
+ } else if (normal == Vector2f{0, 1}) {
+ // bottom
+ // velocity for p_max is (0, 0) for x and (0, 1) for y
+ atomic_add(&d_p->p_max.y, contrib);
+ } else {
+ // incorrect normal assignment?
+ assert(false);
+ }
+ break;
+ } default: {
+ assert(false);
+ break;
+ }
+ }
+ // for shape_to_canvas we have the following relationship:
+ // boundary_pt = xform_pt(shape_to_canvas, local_pt)
+ // the velocity is the derivative of boundary_pt with respect to shape_to_canvas
+ // we can use reverse-mode AD to compute the dot product of the velocity and the Jacobian
+ // by passing the normal in d_xform_pt
+ auto d_shape_to_canvas_ = Matrix3x3f();
+ auto d_local_boundary_pt = Vector2f{0, 0};
+ d_xform_pt(shape_to_canvas,
+ local_boundary_pt,
+ normal * contrib,
+ d_shape_to_canvas_,
+ d_local_boundary_pt);
+ atomic_add(&d_shape_to_canvas(0, 0), d_shape_to_canvas_);
+}
+
+DEVICE
+Vector4f sample_color(const ColorType &color_type,
+ void *color,
+ const Vector2f &pt) {
+ switch (color_type) {
+ case ColorType::Constant: {
+ auto c = (const Constant*)color;
+ assert(isfinite(c->color));
+ return c->color;
+ } case ColorType::LinearGradient: {
+ auto c = (const LinearGradient*)color;
+ // Project pt to (c->begin, c->end)
+ auto beg = c->begin;
+ auto end = c->end;
+ auto t = dot(pt - beg, end - beg) / max(dot(end - beg, end - beg), 1e-3f);
+ // Find the correponding stop:
+ if (t < c->stop_offsets[0]) {
+ return Vector4f{c->stop_colors[0],
+ c->stop_colors[1],
+ c->stop_colors[2],
+ c->stop_colors[3]};
+ }
+ for (int i = 0; i < c->num_stops - 1; i++) {
+ auto offset_curr = c->stop_offsets[i];
+ auto offset_next = c->stop_offsets[i + 1];
+ assert(offset_next > offset_curr);
+ if (t >= offset_curr && t < offset_next) {
+ auto color_curr = Vector4f{
+ c->stop_colors[4 * i + 0],
+ c->stop_colors[4 * i + 1],
+ c->stop_colors[4 * i + 2],
+ c->stop_colors[4 * i + 3]};
+ auto color_next = Vector4f{
+ c->stop_colors[4 * (i + 1) + 0],
+ c->stop_colors[4 * (i + 1) + 1],
+ c->stop_colors[4 * (i + 1) + 2],
+ c->stop_colors[4 * (i + 1) + 3]};
+ auto tt = (t - offset_curr) / (offset_next - offset_curr);
+ assert(isfinite(tt));
+ assert(isfinite(color_curr));
+ assert(isfinite(color_next));
+ return color_curr * (1 - tt) + color_next * tt;
+ }
+ }
+ return Vector4f{c->stop_colors[4 * (c->num_stops - 1) + 0],
+ c->stop_colors[4 * (c->num_stops - 1) + 1],
+ c->stop_colors[4 * (c->num_stops - 1) + 2],
+ c->stop_colors[4 * (c->num_stops - 1) + 3]};
+ } case ColorType::RadialGradient: {
+ auto c = (const RadialGradient*)color;
+ // Distance from pt to center
+ auto offset = pt - c->center;
+ auto normalized_offset = offset / c->radius;
+ auto t = length(normalized_offset);
+ // Find the correponding stop:
+ if (t < c->stop_offsets[0]) {
+ return Vector4f{c->stop_colors[0],
+ c->stop_colors[1],
+ c->stop_colors[2],
+ c->stop_colors[3]};
+ }
+ for (int i = 0; i < c->num_stops - 1; i++) {
+ auto offset_curr = c->stop_offsets[i];
+ auto offset_next = c->stop_offsets[i + 1];
+ assert(offset_next > offset_curr);
+ if (t >= offset_curr && t < offset_next) {
+ auto color_curr = Vector4f{
+ c->stop_colors[4 * i + 0],
+ c->stop_colors[4 * i + 1],
+ c->stop_colors[4 * i + 2],
+ c->stop_colors[4 * i + 3]};
+ auto color_next = Vector4f{
+ c->stop_colors[4 * (i + 1) + 0],
+ c->stop_colors[4 * (i + 1) + 1],
+ c->stop_colors[4 * (i + 1) + 2],
+ c->stop_colors[4 * (i + 1) + 3]};
+ auto tt = (t - offset_curr) / (offset_next - offset_curr);
+ assert(isfinite(tt));
+ assert(isfinite(color_curr));
+ assert(isfinite(color_next));
+ return color_curr * (1 - tt) + color_next * tt;
+ }
+ }
+ return Vector4f{c->stop_colors[4 * (c->num_stops - 1) + 0],
+ c->stop_colors[4 * (c->num_stops - 1) + 1],
+ c->stop_colors[4 * (c->num_stops - 1) + 2],
+ c->stop_colors[4 * (c->num_stops - 1) + 3]};
+ } default: {
+ assert(false);
+ }
+ }
+ return Vector4f{};
+}
+
+DEVICE
+void d_sample_color(const ColorType &color_type,
+ void *color_ptr,
+ const Vector2f &pt,
+ const Vector4f &d_color,
+ void *d_color_ptr,
+ float *d_translation) {
+ switch (color_type) {
+ case ColorType::Constant: {
+ auto d_c = (Constant*)d_color_ptr;
+ atomic_add(&d_c->color[0], d_color);
+ return;
+ } case ColorType::LinearGradient: {
+ auto c = (const LinearGradient*)color_ptr;
+ auto d_c = (LinearGradient*)d_color_ptr;
+ // Project pt to (c->begin, c->end)
+ auto beg = c->begin;
+ auto end = c->end;
+ auto t = dot(pt - beg, end - beg) / max(dot(end - beg, end - beg), 1e-3f);
+ // Find the correponding stop:
+ if (t < c->stop_offsets[0]) {
+ atomic_add(&d_c->stop_colors[0], d_color);
+ return;
+ }
+ for (int i = 0; i < c->num_stops - 1; i++) {
+ auto offset_curr = c->stop_offsets[i];
+ auto offset_next = c->stop_offsets[i + 1];
+ assert(offset_next > offset_curr);
+ if (t >= offset_curr && t < offset_next) {
+ auto color_curr = Vector4f{
+ c->stop_colors[4 * i + 0],
+ c->stop_colors[4 * i + 1],
+ c->stop_colors[4 * i + 2],
+ c->stop_colors[4 * i + 3]};
+ auto color_next = Vector4f{
+ c->stop_colors[4 * (i + 1) + 0],
+ c->stop_colors[4 * (i + 1) + 1],
+ c->stop_colors[4 * (i + 1) + 2],
+ c->stop_colors[4 * (i + 1) + 3]};
+ auto tt = (t - offset_curr) / (offset_next - offset_curr);
+ // return color_curr * (1 - tt) + color_next * tt;
+ auto d_color_curr = d_color * (1 - tt);
+ auto d_color_next = d_color * tt;
+ auto d_tt = sum(d_color * (color_next - color_curr));
+ auto d_offset_next = -d_tt * tt / (offset_next - offset_curr);
+ auto d_offset_curr = d_tt * ((tt - 1.f) / (offset_next - offset_curr));
+ auto d_t = d_tt / (offset_next - offset_curr);
+ assert(isfinite(d_tt));
+ atomic_add(&d_c->stop_colors[4 * i], d_color_curr);
+ atomic_add(&d_c->stop_colors[4 * (i + 1)], d_color_next);
+ atomic_add(&d_c->stop_offsets[i], d_offset_curr);
+ atomic_add(&d_c->stop_offsets[i + 1], d_offset_next);
+ // auto t = dot(pt - beg, end - beg) / max(dot(end - beg, end - beg), 1e-6f);
+ // l = max(dot(end - beg, end - beg), 1e-3f)
+ // t = dot(pt - beg, end - beg) / l;
+ auto l = max(dot(end - beg, end - beg), 1e-3f);
+ auto d_beg = d_t * (-(pt - beg)-(end - beg)) / l;
+ auto d_end = d_t * (pt - beg) / l;
+ auto d_l = -d_t * t / l;
+ if (dot(end - beg, end - beg) > 1e-3f) {
+ d_beg += 2 * d_l * (beg - end);
+ d_end += 2 * d_l * (end - beg);
+ }
+ atomic_add(&d_c->begin[0], d_beg);
+ atomic_add(&d_c->end[0], d_end);
+ if (d_translation != nullptr) {
+ atomic_add(d_translation, (d_beg + d_end));
+ }
+ return;
+ }
+ }
+ atomic_add(&d_c->stop_colors[4 * (c->num_stops - 1)], d_color);
+ return;
+ } case ColorType::RadialGradient: {
+ auto c = (const RadialGradient*)color_ptr;
+ auto d_c = (RadialGradient*)d_color_ptr;
+ // Distance from pt to center
+ auto offset = pt - c->center;
+ auto normalized_offset = offset / c->radius;
+ auto t = length(normalized_offset);
+ // Find the correponding stop:
+ if (t < c->stop_offsets[0]) {
+ atomic_add(&d_c->stop_colors[0], d_color);
+ return;
+ }
+ for (int i = 0; i < c->num_stops - 1; i++) {
+ auto offset_curr = c->stop_offsets[i];
+ auto offset_next = c->stop_offsets[i + 1];
+ assert(offset_next > offset_curr);
+ if (t >= offset_curr && t < offset_next) {
+ auto color_curr = Vector4f{
+ c->stop_colors[4 * i + 0],
+ c->stop_colors[4 * i + 1],
+ c->stop_colors[4 * i + 2],
+ c->stop_colors[4 * i + 3]};
+ auto color_next = Vector4f{
+ c->stop_colors[4 * (i + 1) + 0],
+ c->stop_colors[4 * (i + 1) + 1],
+ c->stop_colors[4 * (i + 1) + 2],
+ c->stop_colors[4 * (i + 1) + 3]};
+ auto tt = (t - offset_curr) / (offset_next - offset_curr);
+ assert(isfinite(tt));
+ // return color_curr * (1 - tt) + color_next * tt;
+ auto d_color_curr = d_color * (1 - tt);
+ auto d_color_next = d_color * tt;
+ auto d_tt = sum(d_color * (color_next - color_curr));
+ auto d_offset_next = -d_tt * tt / (offset_next - offset_curr);
+ auto d_offset_curr = d_tt * ((tt - 1.f) / (offset_next - offset_curr));
+ auto d_t = d_tt / (offset_next - offset_curr);
+ assert(isfinite(d_t));
+ atomic_add(&d_c->stop_colors[4 * i], d_color_curr);
+ atomic_add(&d_c->stop_colors[4 * (i + 1)], d_color_next);
+ atomic_add(&d_c->stop_offsets[i], d_offset_curr);
+ atomic_add(&d_c->stop_offsets[i + 1], d_offset_next);
+ // offset = pt - c->center
+ // normalized_offset = offset / c->radius
+ // t = length(normalized_offset)
+ auto d_normalized_offset = d_length(normalized_offset, d_t);
+ auto d_offset = d_normalized_offset / c->radius;
+ auto d_radius = -d_normalized_offset * offset / (c->radius * c->radius);
+ auto d_center = -d_offset;
+ atomic_add(&d_c->center[0], d_center);
+ atomic_add(&d_c->radius[0], d_radius);
+ if (d_translation != nullptr) {
+ atomic_add(d_translation, d_center);
+ }
+ }
+ }
+ atomic_add(&d_c->stop_colors[4 * (c->num_stops - 1)], d_color);
+ return;
+ } default: {
+ assert(false);
+ }
+ }
+}
+
+struct Fragment {
+ Vector3f color;
+ float alpha;
+ int group_id;
+ bool is_stroke;
+};
+
+struct PrefilterFragment {
+ Vector3f color;
+ float alpha;
+ int group_id;
+ bool is_stroke;
+ int shape_id;
+ float distance;
+ Vector2f closest_pt;
+ ClosestPointPathInfo path_info;
+ bool within_distance;
+};
+
+DEVICE
+Vector4f sample_color(const SceneData &scene,
+ const Vector4f *background_color,
+ const Vector2f &screen_pt,
+ const Vector4f *d_color = nullptr,
+ EdgeQuery *edge_query = nullptr,
+ Vector4f *d_background_color = nullptr,
+ float *d_translation = nullptr) {
+ if (edge_query != nullptr) {
+ edge_query->hit = false;
+ }
+
+ // screen_pt is in screen space ([0, 1), [0, 1)),
+ // need to transform to canvas space
+ auto pt = screen_pt;
+ pt.x *= scene.canvas_width;
+ pt.y *= scene.canvas_height;
+ constexpr auto max_hit_shapes = 256;
+ constexpr auto max_bvh_stack_size = 64;
+ Fragment fragments[max_hit_shapes];
+ int bvh_stack[max_bvh_stack_size];
+ auto stack_size = 0;
+ auto num_fragments = 0;
+ bvh_stack[stack_size++] = 2 * scene.num_shape_groups - 2;
+ while (stack_size > 0) {
+ const BVHNode &node = scene.bvh_nodes[bvh_stack[--stack_size]];
+ if (node.child1 < 0) {
+ // leaf
+ auto group_id = node.child0;
+ const ShapeGroup &shape_group = scene.shape_groups[group_id];
+ if (shape_group.stroke_color != nullptr) {
+ if (within_distance(scene, group_id, pt, edge_query)) {
+ auto color_alpha = sample_color(shape_group.stroke_color_type,
+ shape_group.stroke_color,
+ pt);
+ Fragment f;
+ f.color = Vector3f{color_alpha[0], color_alpha[1], color_alpha[2]};
+ f.alpha = color_alpha[3];
+ f.group_id = group_id;
+ f.is_stroke = true;
+ assert(num_fragments < max_hit_shapes);
+ fragments[num_fragments++] = f;
+ }
+ }
+ if (shape_group.fill_color != nullptr) {
+ if (is_inside(scene, group_id, pt, edge_query)) {
+ auto color_alpha = sample_color(shape_group.fill_color_type,
+ shape_group.fill_color,
+ pt);
+ Fragment f;
+ f.color = Vector3f{color_alpha[0], color_alpha[1], color_alpha[2]};
+ f.alpha = color_alpha[3];
+ f.group_id = group_id;
+ f.is_stroke = false;
+ assert(num_fragments < max_hit_shapes);
+ fragments[num_fragments++] = f;
+ }
+ }
+ } else {
+ assert(node.child0 >= 0 && node.child1 >= 0);
+ const AABB &b0 = scene.bvh_nodes[node.child0].box;
+ if (inside(b0, pt, scene.bvh_nodes[node.child0].max_radius)) {
+ bvh_stack[stack_size++] = node.child0;
+ }
+ const AABB &b1 = scene.bvh_nodes[node.child1].box;
+ if (inside(b1, pt, scene.bvh_nodes[node.child1].max_radius)) {
+ bvh_stack[stack_size++] = node.child1;
+ }
+ assert(stack_size <= max_bvh_stack_size);
+ }
+ }
+ if (num_fragments <= 0) {
+ if (background_color != nullptr) {
+ if (d_background_color != nullptr) {
+ *d_background_color = *d_color;
+ }
+ return *background_color;
+ }
+ return Vector4f{0, 0, 0, 0};
+ }
+ // Sort the fragments from back to front (i.e. increasing order of group id)
+ // https://github.com/frigaut/yorick-imutil/blob/master/insort.c#L37
+ for (int i = 1; i < num_fragments; i++) {
+ auto j = i;
+ auto temp = fragments[j];
+ while (j > 0 && fragments[j - 1].group_id > temp.group_id) {
+ fragments[j] = fragments[j - 1];
+ j--;
+ }
+ fragments[j] = temp;
+ }
+ // Blend the color
+ Vector3f accum_color[max_hit_shapes];
+ float accum_alpha[max_hit_shapes];
+ // auto hit_opaque = false;
+ auto first_alpha = 0.f;
+ auto first_color = Vector3f{0, 0, 0};
+ if (background_color != nullptr) {
+ first_alpha = background_color->w;
+ first_color = Vector3f{background_color->x,
+ background_color->y,
+ background_color->z};
+ }
+ for (int i = 0; i < num_fragments; i++) {
+ const Fragment &fragment = fragments[i];
+ auto new_color = fragment.color;
+ auto new_alpha = fragment.alpha;
+ auto prev_alpha = i > 0 ? accum_alpha[i - 1] : first_alpha;
+ auto prev_color = i > 0 ? accum_color[i - 1] : first_color;
+ if (edge_query != nullptr) {
+ // Do we hit the target shape?
+ if (new_alpha >= 1.f && edge_query->hit) {
+ // A fully opaque shape in front of the target occludes it
+ edge_query->hit = false;
+ }
+ if (edge_query->shape_group_id == fragment.group_id) {
+ edge_query->hit = true;
+ }
+ }
+ // prev_color is alpha premultiplied, don't need to multiply with
+ // prev_alpha
+ accum_color[i] = prev_color * (1 - new_alpha) + new_alpha * new_color;
+ accum_alpha[i] = prev_alpha * (1 - new_alpha) + new_alpha;
+ }
+ auto final_color = accum_color[num_fragments - 1];
+ auto final_alpha = accum_alpha[num_fragments - 1];
+ if (final_alpha > 1e-6f) {
+ final_color /= final_alpha;
+ }
+ assert(isfinite(final_color));
+ assert(isfinite(final_alpha));
+ if (d_color != nullptr) {
+ // Backward pass
+ auto d_final_color = Vector3f{(*d_color)[0], (*d_color)[1], (*d_color)[2]};
+ auto d_final_alpha = (*d_color)[3];
+ auto d_curr_color = d_final_color;
+ auto d_curr_alpha = d_final_alpha;
+ if (final_alpha > 1e-6f) {
+ // final_color = curr_color / final_alpha
+ d_curr_color = d_final_color / final_alpha;
+ d_curr_alpha -= sum(d_final_color * final_color) / final_alpha;
+ }
+ assert(isfinite(*d_color));
+ assert(isfinite(d_curr_color));
+ assert(isfinite(d_curr_alpha));
+ for (int i = num_fragments - 1; i >= 0; i--) {
+ // color[n] = prev_color * (1 - new_alpha) + new_alpha * new_color;
+ // alpha[n] = prev_alpha * (1 - new_alpha) + new_alpha;
+ auto prev_alpha = i > 0 ? accum_alpha[i - 1] : first_alpha;
+ auto prev_color = i > 0 ? accum_color[i - 1] : first_color;
+ auto d_prev_alpha = d_curr_alpha * (1.f - fragments[i].alpha);
+ auto d_alpha_i = d_curr_alpha * (1.f - prev_alpha);
+ d_alpha_i += sum(d_curr_color * (fragments[i].color - prev_color));
+ auto d_prev_color = d_curr_color * (1 - fragments[i].alpha);
+ auto d_color_i = d_curr_color * fragments[i].alpha;
+ auto group_id = fragments[i].group_id;
+ if (fragments[i].is_stroke) {
+ d_sample_color(scene.shape_groups[group_id].stroke_color_type,
+ scene.shape_groups[group_id].stroke_color,
+ pt,
+ Vector4f{d_color_i[0], d_color_i[1], d_color_i[2], d_alpha_i},
+ scene.d_shape_groups[group_id].stroke_color,
+ d_translation);
+ } else {
+ d_sample_color(scene.shape_groups[group_id].fill_color_type,
+ scene.shape_groups[group_id].fill_color,
+ pt,
+ Vector4f{d_color_i[0], d_color_i[1], d_color_i[2], d_alpha_i},
+ scene.d_shape_groups[group_id].fill_color,
+ d_translation);
+ }
+ d_curr_color = d_prev_color;
+ d_curr_alpha = d_prev_alpha;
+ }
+ if (d_background_color != nullptr) {
+ d_background_color->x += d_curr_color.x;
+ d_background_color->y += d_curr_color.y;
+ d_background_color->z += d_curr_color.z;
+ d_background_color->w += d_curr_alpha;
+ }
+ }
+ return Vector4f{final_color[0], final_color[1], final_color[2], final_alpha};
+}
+
+DEVICE
+float sample_distance(const SceneData &scene,
+ const Vector2f &screen_pt,
+ float weight,
+ const float *d_dist = nullptr,
+ float *d_translation = nullptr) {
+ // screen_pt is in screen space ([0, 1), [0, 1)),
+ // need to transform to canvas space
+ auto pt = screen_pt;
+ pt.x *= scene.canvas_width;
+ pt.y *= scene.canvas_height;
+ // for each shape
+ auto min_group_id = -1;
+ auto min_distance = 0.f;
+ auto min_shape_id = -1;
+ auto closest_pt = Vector2f{0, 0};
+ auto min_path_info = ClosestPointPathInfo{-1, -1, 0};
+ for (int group_id = scene.num_shape_groups - 1; group_id >= 0; group_id--) {
+ auto s = -1;
+ auto p = Vector2f{0, 0};
+ ClosestPointPathInfo local_path_info;
+ auto d = infinity();
+ if (compute_distance(scene, group_id, pt, infinity(), &s, &p, &local_path_info, &d)) {
+ if (min_group_id == -1 || d < min_distance) {
+ min_distance = d;
+ min_group_id = group_id;
+ min_shape_id = s;
+ closest_pt = p;
+ min_path_info = local_path_info;
+ }
+ }
+ }
+ if (min_group_id == -1) {
+ return min_distance;
+ }
+ min_distance *= weight;
+ auto inside = false;
+ const ShapeGroup &shape_group = scene.shape_groups[min_group_id];
+ if (shape_group.fill_color != nullptr) {
+ inside = is_inside(scene,
+ min_group_id,
+ pt,
+ nullptr);
+ if (inside) {
+ min_distance = -min_distance;
+ }
+ }
+ assert((min_group_id >= 0 && min_shape_id >= 0) || scene.num_shape_groups == 0);
+ if (d_dist != nullptr) {
+ auto d_abs_dist = inside ? -(*d_dist) : (*d_dist);
+ const ShapeGroup &shape_group = scene.shape_groups[min_group_id];
+ const Shape &shape = scene.shapes[min_shape_id];
+ ShapeGroup &d_shape_group = scene.d_shape_groups[min_group_id];
+ Shape &d_shape = scene.d_shapes[min_shape_id];
+ d_compute_distance(shape_group.canvas_to_shape,
+ shape_group.shape_to_canvas,
+ shape,
+ pt,
+ closest_pt,
+ min_path_info,
+ d_abs_dist,
+ d_shape_group.shape_to_canvas,
+ d_shape,
+ d_translation);
+ }
+ return min_distance;
+}
+
+// Gather d_color from d_image inside the filter kernel, normalize by
+// weight_image.
+DEVICE
+Vector4f gather_d_color(const Filter &filter,
+ const float *d_color_image,
+ const float *weight_image,
+ int width,
+ int height,
+ const Vector2f &pt) {
+ auto x = int(pt.x);
+ auto y = int(pt.y);
+ auto radius = filter.radius;
+ assert(radius > 0);
+ auto ri = (int)ceil(radius);
+ auto d_color = Vector4f{0, 0, 0, 0};
+ for (int dy = -ri; dy <= ri; dy++) {
+ for (int dx = -ri; dx <= ri; dx++) {
+ auto xx = x + dx;
+ auto yy = y + dy;
+ if (xx >= 0 && xx < width && yy >= 0 && yy < height) {
+ auto xc = xx + 0.5f;
+ auto yc = yy + 0.5f;
+ auto filter_weight =
+ compute_filter_weight(filter, xc - pt.x, yc - pt.y);
+ // pixel = \sum weight * color / \sum weight
+ auto weight_sum = weight_image[yy * width + xx];
+ if (weight_sum > 0) {
+ d_color += (filter_weight / weight_sum) * Vector4f{
+ d_color_image[4 * (yy * width + xx) + 0],
+ d_color_image[4 * (yy * width + xx) + 1],
+ d_color_image[4 * (yy * width + xx) + 2],
+ d_color_image[4 * (yy * width + xx) + 3],
+ };
+ }
+ }
+ }
+ }
+ return d_color;
+}
+
+DEVICE
+float smoothstep(float d) {
+ auto t = clamp((d + 1.f) / 2.f, 0.f, 1.f);
+ return t * t * (3 - 2 * t);
+}
+
+DEVICE
+float d_smoothstep(float d, float d_ret) {
+ if (d < -1.f || d > 1.f) {
+ return 0.f;
+ }
+ auto t = (d + 1.f) / 2.f;
+ // ret = t * t * (3 - 2 * t)
+ // = 3 * t * t - 2 * t * t * t
+ auto d_t = d_ret * (6 * t - 6 * t * t);
+ return d_t / 2.f;
+}
+
+DEVICE
+Vector4f sample_color_prefiltered(const SceneData &scene,
+ const Vector4f *background_color,
+ const Vector2f &screen_pt,
+ const Vector4f *d_color = nullptr,
+ Vector4f *d_background_color = nullptr,
+ float *d_translation = nullptr) {
+ // screen_pt is in screen space ([0, 1), [0, 1)),
+ // need to transform to canvas space
+ auto pt = screen_pt;
+ pt.x *= scene.canvas_width;
+ pt.y *= scene.canvas_height;
+ constexpr auto max_hit_shapes = 64;
+ constexpr auto max_bvh_stack_size = 64;
+ PrefilterFragment fragments[max_hit_shapes];
+ int bvh_stack[max_bvh_stack_size];
+ auto stack_size = 0;
+ auto num_fragments = 0;
+ bvh_stack[stack_size++] = 2 * scene.num_shape_groups - 2;
+ while (stack_size > 0) {
+ const BVHNode &node = scene.bvh_nodes[bvh_stack[--stack_size]];
+ if (node.child1 < 0) {
+ // leaf
+ auto group_id = node.child0;
+ const ShapeGroup &shape_group = scene.shape_groups[group_id];
+ if (shape_group.stroke_color != nullptr) {
+ auto min_shape_id = -1;
+ auto closest_pt = Vector2f{0, 0};
+ auto local_path_info = ClosestPointPathInfo{-1, -1, 0};
+ auto d = infinity();
+ compute_distance(scene, group_id, pt, infinity(),
+ &min_shape_id, &closest_pt, &local_path_info, &d);
+ assert(min_shape_id != -1);
+ const auto &shape = scene.shapes[min_shape_id];
+ auto w = smoothstep(fabs(d) + shape.stroke_width) -
+ smoothstep(fabs(d) - shape.stroke_width);
+ if (w > 0) {
+ auto color_alpha = sample_color(shape_group.stroke_color_type,
+ shape_group.stroke_color,
+ pt);
+ color_alpha[3] *= w;
+
+ PrefilterFragment f;
+ f.color = Vector3f{color_alpha[0], color_alpha[1], color_alpha[2]};
+ f.alpha = color_alpha[3];
+ f.group_id = group_id;
+ f.shape_id = min_shape_id;
+ f.distance = d;
+ f.closest_pt = closest_pt;
+ f.is_stroke = true;
+ f.path_info = local_path_info;
+ f.within_distance = true;
+ assert(num_fragments < max_hit_shapes);
+ fragments[num_fragments++] = f;
+ }
+ }
+ if (shape_group.fill_color != nullptr) {
+ auto min_shape_id = -1;
+ auto closest_pt = Vector2f{0, 0};
+ auto local_path_info = ClosestPointPathInfo{-1, -1, 0};
+ auto d = infinity();
+ auto found = compute_distance(scene,
+ group_id,
+ pt,
+ 1.f,
+ &min_shape_id,
+ &closest_pt,
+ &local_path_info,
+ &d);
+ auto inside = is_inside(scene, group_id, pt, nullptr);
+ if (found || inside) {
+ if (!inside) {
+ d = -d;
+ }
+ auto w = smoothstep(d);
+ if (w > 0) {
+ auto color_alpha = sample_color(shape_group.fill_color_type,
+ shape_group.fill_color,
+ pt);
+ color_alpha[3] *= w;
+
+ PrefilterFragment f;
+ f.color = Vector3f{color_alpha[0], color_alpha[1], color_alpha[2]};
+ f.alpha = color_alpha[3];
+ f.group_id = group_id;
+ f.shape_id = min_shape_id;
+ f.distance = d;
+ f.closest_pt = closest_pt;
+ f.is_stroke = false;
+ f.path_info = local_path_info;
+ f.within_distance = found;
+ assert(num_fragments < max_hit_shapes);
+ fragments[num_fragments++] = f;
+ }
+ }
+ }
+ } else {
+ assert(node.child0 >= 0 && node.child1 >= 0);
+ const AABB &b0 = scene.bvh_nodes[node.child0].box;
+ if (inside(b0, pt, scene.bvh_nodes[node.child0].max_radius)) {
+ bvh_stack[stack_size++] = node.child0;
+ }
+ const AABB &b1 = scene.bvh_nodes[node.child1].box;
+ if (inside(b1, pt, scene.bvh_nodes[node.child1].max_radius)) {
+ bvh_stack[stack_size++] = node.child1;
+ }
+ assert(stack_size <= max_bvh_stack_size);
+ }
+ }
+ if (num_fragments <= 0) {
+ if (background_color != nullptr) {
+ if (d_background_color != nullptr) {
+ *d_background_color = *d_color;
+ }
+ return *background_color;
+ }
+ return Vector4f{0, 0, 0, 0};
+ }
+ // Sort the fragments from back to front (i.e. increasing order of group id)
+ // https://github.com/frigaut/yorick-imutil/blob/master/insort.c#L37
+ for (int i = 1; i < num_fragments; i++) {
+ auto j = i;
+ auto temp = fragments[j];
+ while (j > 0 && fragments[j - 1].group_id > temp.group_id) {
+ fragments[j] = fragments[j - 1];
+ j--;
+ }
+ fragments[j] = temp;
+ }
+ // Blend the color
+ Vector3f accum_color[max_hit_shapes];
+ float accum_alpha[max_hit_shapes];
+ auto first_alpha = 0.f;
+ auto first_color = Vector3f{0, 0, 0};
+ if (background_color != nullptr) {
+ first_alpha = background_color->w;
+ first_color = Vector3f{background_color->x,
+ background_color->y,
+ background_color->z};
+ }
+ for (int i = 0; i < num_fragments; i++) {
+ const PrefilterFragment &fragment = fragments[i];
+ auto new_color = fragment.color;
+ auto new_alpha = fragment.alpha;
+ auto prev_alpha = i > 0 ? accum_alpha[i - 1] : first_alpha;
+ auto prev_color = i > 0 ? accum_color[i - 1] : first_color;
+ // prev_color is alpha premultiplied, don't need to multiply with
+ // prev_alpha
+ accum_color[i] = prev_color * (1 - new_alpha) + new_alpha * new_color;
+ accum_alpha[i] = prev_alpha * (1 - new_alpha) + new_alpha;
+ }
+ auto final_color = accum_color[num_fragments - 1];
+ auto final_alpha = accum_alpha[num_fragments - 1];
+ if (final_alpha > 1e-6f) {
+ final_color /= final_alpha;
+ }
+ assert(isfinite(final_color));
+ assert(isfinite(final_alpha));
+ if (d_color != nullptr) {
+ // Backward pass
+ auto d_final_color = Vector3f{(*d_color)[0], (*d_color)[1], (*d_color)[2]};
+ auto d_final_alpha = (*d_color)[3];
+ auto d_curr_color = d_final_color;
+ auto d_curr_alpha = d_final_alpha;
+ if (final_alpha > 1e-6f) {
+ // final_color = curr_color / final_alpha
+ d_curr_color = d_final_color / final_alpha;
+ d_curr_alpha -= sum(d_final_color * final_color) / final_alpha;
+ }
+ assert(isfinite(*d_color));
+ assert(isfinite(d_curr_color));
+ assert(isfinite(d_curr_alpha));
+ for (int i = num_fragments - 1; i >= 0; i--) {
+ // color[n] = prev_color * (1 - new_alpha) + new_alpha * new_color;
+ // alpha[n] = prev_alpha * (1 - new_alpha) + new_alpha;
+ auto prev_alpha = i > 0 ? accum_alpha[i - 1] : first_alpha;
+ auto prev_color = i > 0 ? accum_color[i - 1] : first_color;
+ auto d_prev_alpha = d_curr_alpha * (1.f - fragments[i].alpha);
+ auto d_alpha_i = d_curr_alpha * (1.f - prev_alpha);
+ d_alpha_i += sum(d_curr_color * (fragments[i].color - prev_color));
+ auto d_prev_color = d_curr_color * (1 - fragments[i].alpha);
+ auto d_color_i = d_curr_color * fragments[i].alpha;
+ auto group_id = fragments[i].group_id;
+ if (fragments[i].is_stroke) {
+ const auto &shape = scene.shapes[fragments[i].shape_id];
+ auto d = fragments[i].distance;
+ auto abs_d_plus_width = fabs(d) + shape.stroke_width;
+ auto abs_d_minus_width = fabs(d) - shape.stroke_width;
+ auto w = smoothstep(abs_d_plus_width) -
+ smoothstep(abs_d_minus_width);
+ if (w != 0) {
+ auto d_w = w > 0 ? (fragments[i].alpha / w) * d_alpha_i : 0.f;
+ d_alpha_i *= w;
+
+ // Backprop to color
+ d_sample_color(scene.shape_groups[group_id].stroke_color_type,
+ scene.shape_groups[group_id].stroke_color,
+ pt,
+ Vector4f{d_color_i[0], d_color_i[1], d_color_i[2], d_alpha_i},
+ scene.d_shape_groups[group_id].stroke_color,
+ d_translation);
+
+ auto d_abs_d_plus_width = d_smoothstep(abs_d_plus_width, d_w);
+ auto d_abs_d_minus_width = -d_smoothstep(abs_d_minus_width, d_w);
+
+ auto d_d = d_abs_d_plus_width + d_abs_d_minus_width;
+ if (d < 0) {
+ d_d = -d_d;
+ }
+ auto d_stroke_width = d_abs_d_plus_width - d_abs_d_minus_width;
+
+ const auto &shape_group = scene.shape_groups[group_id];
+ ShapeGroup &d_shape_group = scene.d_shape_groups[group_id];
+ Shape &d_shape = scene.d_shapes[fragments[i].shape_id];
+ if (fabs(d_d) > 1e-10f) {
+ d_compute_distance(shape_group.canvas_to_shape,
+ shape_group.shape_to_canvas,
+ shape,
+ pt,
+ fragments[i].closest_pt,
+ fragments[i].path_info,
+ d_d,
+ d_shape_group.shape_to_canvas,
+ d_shape,
+ d_translation);
+ }
+ atomic_add(&d_shape.stroke_width, d_stroke_width);
+ }
+ } else {
+ const auto &shape = scene.shapes[fragments[i].shape_id];
+ auto d = fragments[i].distance;
+ auto w = smoothstep(d);
+ if (w != 0) {
+ // color_alpha[3] = color_alpha[3] * w;
+ auto d_w = w > 0 ? (fragments[i].alpha / w) * d_alpha_i : 0.f;
+ d_alpha_i *= w;
+
+ d_sample_color(scene.shape_groups[group_id].fill_color_type,
+ scene.shape_groups[group_id].fill_color,
+ pt,
+ Vector4f{d_color_i[0], d_color_i[1], d_color_i[2], d_alpha_i},
+ scene.d_shape_groups[group_id].fill_color,
+ d_translation);
+
+ // w = smoothstep(d)
+ auto d_d = d_smoothstep(d, d_w);
+ if (d < 0) {
+ d_d = -d_d;
+ }
+
+ const auto &shape_group = scene.shape_groups[group_id];
+ ShapeGroup &d_shape_group = scene.d_shape_groups[group_id];
+ Shape &d_shape = scene.d_shapes[fragments[i].shape_id];
+ if (fabs(d_d) > 1e-10f && fragments[i].within_distance) {
+ d_compute_distance(shape_group.canvas_to_shape,
+ shape_group.shape_to_canvas,
+ shape,
+ pt,
+ fragments[i].closest_pt,
+ fragments[i].path_info,
+ d_d,
+ d_shape_group.shape_to_canvas,
+ d_shape,
+ d_translation);
+ }
+ }
+ }
+ d_curr_color = d_prev_color;
+ d_curr_alpha = d_prev_alpha;
+ }
+ if (d_background_color != nullptr) {
+ d_background_color->x += d_curr_color.x;
+ d_background_color->y += d_curr_color.y;
+ d_background_color->z += d_curr_color.z;
+ d_background_color->w += d_curr_alpha;
+ }
+ }
+ return Vector4f{final_color[0], final_color[1], final_color[2], final_alpha};
+}
+
+struct weight_kernel {
+ DEVICE void operator()(int idx) {
+ auto rng_state = init_pcg32(idx, seed);
+ // height * width * num_samples_y * num_samples_x
+ auto sx = idx % num_samples_x;
+ auto sy = (idx / num_samples_x) % num_samples_y;
+ auto x = (idx / (num_samples_x * num_samples_y)) % width;
+ auto y = (idx / (num_samples_x * num_samples_y * width));
+ assert(y < height);
+ auto rx = next_pcg32_float(&rng_state);
+ auto ry = next_pcg32_float(&rng_state);
+ if (use_prefiltering) {
+ rx = ry = 0.5f;
+ }
+ auto pt = Vector2f{x + ((float)sx + rx) / num_samples_x,
+ y + ((float)sy + ry) / num_samples_y};
+ auto radius = scene.filter->radius;
+ assert(radius >= 0);
+ auto ri = (int)ceil(radius);
+ for (int dy = -ri; dy <= ri; dy++) {
+ for (int dx = -ri; dx <= ri; dx++) {
+ auto xx = x + dx;
+ auto yy = y + dy;
+ if (xx >= 0 && xx < width && yy >= 0 && yy < height) {
+ auto xc = xx + 0.5f;
+ auto yc = yy + 0.5f;
+ auto filter_weight = compute_filter_weight(*scene.filter,
+ xc - pt.x,
+ yc - pt.y);
+ atomic_add(weight_image[yy * width + xx], filter_weight);
+ }
+ }
+ }
+ }
+
+ SceneData scene;
+ float *weight_image;
+ int width;
+ int height;
+ int num_samples_x;
+ int num_samples_y;
+ uint64_t seed;
+ bool use_prefiltering;
+};
+
+// We use a "mega kernel" for rendering
+struct render_kernel {
+ DEVICE void operator()(int idx) {
+ // height * width * num_samples_y * num_samples_x
+ auto pt = Vector2f{0, 0};
+ auto x = 0;
+ auto y = 0;
+ if (eval_positions == nullptr) {
+ auto rng_state = init_pcg32(idx, seed);
+ auto sx = idx % num_samples_x;
+ auto sy = (idx / num_samples_x) % num_samples_y;
+ x = (idx / (num_samples_x * num_samples_y)) % width;
+ y = (idx / (num_samples_x * num_samples_y * width));
+ assert(x < width && y < height);
+ auto rx = next_pcg32_float(&rng_state);
+ auto ry = next_pcg32_float(&rng_state);
+ if (use_prefiltering) {
+ rx = ry = 0.5f;
+ }
+ pt = Vector2f{x + ((float)sx + rx) / num_samples_x,
+ y + ((float)sy + ry) / num_samples_y};
+ } else {
+ pt = Vector2f{eval_positions[2 * idx],
+ eval_positions[2 * idx + 1]};
+ x = int(pt.x);
+ y = int(pt.y);
+ }
+
+ // normalize pt to [0, 1]
+ auto npt = pt;
+ npt.x /= width;
+ npt.y /= height;
+ auto num_samples = num_samples_x * num_samples_y;
+ if (render_image != nullptr || d_render_image != nullptr) {
+ Vector4f d_color = Vector4f{0, 0, 0, 0};
+ if (d_render_image != nullptr) {
+ // Gather d_color from d_render_image inside the filter kernel
+ // normalize using weight_image
+ d_color = gather_d_color(*scene.filter,
+ d_render_image,
+ weight_image,
+ width,
+ height,
+ pt);
+ }
+ auto color = Vector4f{0, 0, 0, 0};
+ if (use_prefiltering) {
+ color = sample_color_prefiltered(scene,
+ background_image != nullptr ? (const Vector4f*)&background_image[4 * ((y * width) + x)] : nullptr,
+ npt,
+ d_render_image != nullptr ? &d_color : nullptr,
+ d_background_image != nullptr ? (Vector4f*)&d_background_image[4 * ((y * width) + x)] : nullptr,
+ d_translation != nullptr ? &d_translation[2 * (y * width + x)] : nullptr);
+ } else {
+ color = sample_color(scene,
+ background_image != nullptr ? (const Vector4f*)&background_image[4 * ((y * width) + x)] : nullptr,
+ npt,
+ d_render_image != nullptr ? &d_color : nullptr,
+ nullptr,
+ d_background_image != nullptr ? (Vector4f*)&d_background_image[4 * ((y * width) + x)] : nullptr,
+ d_translation != nullptr ? &d_translation[2 * (y * width + x)] : nullptr);
+ }
+ assert(isfinite(color));
+ // Splat color onto render_image
+ auto radius = scene.filter->radius;
+ assert(radius >= 0);
+ auto ri = (int)ceil(radius);
+ for (int dy = -ri; dy <= ri; dy++) {
+ for (int dx = -ri; dx <= ri; dx++) {
+ auto xx = x + dx;
+ auto yy = y + dy;
+ if (xx >= 0 && xx < width && yy >= 0 && yy < height &&
+ weight_image[yy * width + xx] > 0) {
+ auto weight_sum = weight_image[yy * width + xx];
+ auto xc = xx + 0.5f;
+ auto yc = yy + 0.5f;
+ auto filter_weight = compute_filter_weight(*scene.filter,
+ xc - pt.x,
+ yc - pt.y);
+ auto weighted_color = filter_weight * color / weight_sum;
+ if (render_image != nullptr) {
+ atomic_add(render_image[4 * (yy * width + xx) + 0],
+ weighted_color[0]);
+ atomic_add(render_image[4 * (yy * width + xx) + 1],
+ weighted_color[1]);
+ atomic_add(render_image[4 * (yy * width + xx) + 2],
+ weighted_color[2]);
+ atomic_add(render_image[4 * (yy * width + xx) + 3],
+ weighted_color[3]);
+ }
+ if (d_render_image != nullptr) {
+ // Backprop to filter_weight
+ // pixel = \sum weight * color / \sum weight
+ auto d_pixel = Vector4f{
+ d_render_image[4 * (yy * width + xx) + 0],
+ d_render_image[4 * (yy * width + xx) + 1],
+ d_render_image[4 * (yy * width + xx) + 2],
+ d_render_image[4 * (yy * width + xx) + 3],
+ };
+ auto d_weight =
+ (dot(d_pixel, color) * weight_sum -
+ filter_weight * dot(d_pixel, color) * (weight_sum - filter_weight)) /
+ square(weight_sum);
+ d_compute_filter_weight(*scene.filter,
+ xc - pt.x,
+ yc - pt.y,
+ d_weight,
+ scene.d_filter);
+ }
+ }
+ }
+ }
+ }
+ if (sdf_image != nullptr || d_sdf_image != nullptr) {
+ float d_dist = 0.f;
+ if (d_sdf_image != nullptr) {
+ if (eval_positions == nullptr) {
+ d_dist = d_sdf_image[y * width + x];
+ } else {
+ d_dist = d_sdf_image[idx];
+ }
+ }
+ auto weight = eval_positions == nullptr ? 1.f / num_samples : 1.f;
+ auto dist = sample_distance(scene, npt, weight,
+ d_sdf_image != nullptr ? &d_dist : nullptr,
+ d_translation != nullptr ? &d_translation[2 * (y * width + x)] : nullptr);
+ if (sdf_image != nullptr) {
+ if (eval_positions == nullptr) {
+ atomic_add(sdf_image[y * width + x], dist);
+ } else {
+ atomic_add(sdf_image[idx], dist);
+ }
+ }
+ }
+ }
+
+ SceneData scene;
+ float *background_image;
+ float *render_image;
+ float *weight_image;
+ float *sdf_image;
+ float *d_background_image;
+ float *d_render_image;
+ float *d_sdf_image;
+ float *d_translation;
+ int width;
+ int height;
+ int num_samples_x;
+ int num_samples_y;
+ uint64_t seed;
+ bool use_prefiltering;
+ float *eval_positions;
+};
+
+struct BoundarySample {
+ Vector2f pt;
+ Vector2f local_pt;
+ Vector2f normal;
+ int shape_group_id;
+ int shape_id;
+ float t;
+ BoundaryData data;
+ float pdf;
+};
+
+struct sample_boundary_kernel {
+ DEVICE void operator()(int idx) {
+ boundary_samples[idx].pt = Vector2f{0, 0};
+ boundary_samples[idx].shape_id = -1;
+ boundary_ids[idx] = idx;
+ morton_codes[idx] = 0;
+
+ auto rng_state = init_pcg32(idx, seed);
+ auto u = next_pcg32_float(&rng_state);
+ // Sample a shape
+ auto sample_id = sample(scene.sample_shapes_cdf,
+ scene.num_total_shapes,
+ u);
+ assert(sample_id >= 0 && sample_id < scene.num_total_shapes);
+ auto shape_id = scene.sample_shape_id[sample_id];
+ assert(shape_id >= 0 && shape_id < scene.num_shapes);
+ auto shape_group_id = scene.sample_group_id[sample_id];
+ assert(shape_group_id >= 0 && shape_group_id < scene.num_shape_groups);
+ auto shape_pmf = scene.sample_shapes_pmf[shape_id];
+ if (shape_pmf <= 0) {
+ return;
+ }
+ // Sample a point on the boundary of the shape
+ auto boundary_pdf = 0.f;
+ auto normal = Vector2f{0, 0};
+ auto t = next_pcg32_float(&rng_state);
+ BoundaryData boundary_data;
+ const ShapeGroup &shape_group = scene.shape_groups[shape_group_id];
+ auto local_boundary_pt = sample_boundary(
+ scene, shape_group_id, shape_id,
+ t, normal, boundary_pdf, boundary_data);
+ if (boundary_pdf <= 0) {
+ return;
+ }
+
+ // local_boundary_pt & normal are in shape's local space,
+ // transform them to canvas space
+ auto boundary_pt = xform_pt(shape_group.shape_to_canvas, local_boundary_pt);
+ normal = xform_normal(shape_group.canvas_to_shape, normal);
+ // Normalize boundary_pt to [0, 1)
+ boundary_pt.x /= scene.canvas_width;
+ boundary_pt.y /= scene.canvas_height;
+
+ boundary_samples[idx].pt = boundary_pt;
+ boundary_samples[idx].local_pt = local_boundary_pt;
+ boundary_samples[idx].normal = normal;
+ boundary_samples[idx].shape_group_id = shape_group_id;
+ boundary_samples[idx].shape_id = shape_id;
+ boundary_samples[idx].t = t;
+ boundary_samples[idx].data = boundary_data;
+ boundary_samples[idx].pdf = shape_pmf * boundary_pdf;
+ TVector2 p_i{boundary_pt.x * 1023, boundary_pt.y * 1023};
+ morton_codes[idx] = (expand_bits(p_i.x) << 1u) |
+ (expand_bits(p_i.y) << 0u);
+ }
+
+ SceneData scene;
+ uint64_t seed;
+ BoundarySample *boundary_samples;
+ int *boundary_ids;
+ uint32_t *morton_codes;
+};
+
+struct render_edge_kernel {
+ DEVICE void operator()(int idx) {
+ auto bid = boundary_ids[idx];
+ if (boundary_samples[bid].shape_id == -1) {
+ return;
+ }
+ auto boundary_pt = boundary_samples[bid].pt;
+ auto local_boundary_pt = boundary_samples[bid].local_pt;
+ auto normal = boundary_samples[bid].normal;
+ auto shape_group_id = boundary_samples[bid].shape_group_id;
+ auto shape_id = boundary_samples[bid].shape_id;
+ auto t = boundary_samples[bid].t;
+ auto boundary_data = boundary_samples[bid].data;
+ auto pdf = boundary_samples[bid].pdf;
+
+ const ShapeGroup &shape_group = scene.shape_groups[shape_group_id];
+
+ auto bx = int(boundary_pt.x * width);
+ auto by = int(boundary_pt.y * height);
+ if (bx < 0 || bx >= width || by < 0 || by >= height) {
+ return;
+ }
+
+ // Sample the two sides of the boundary
+ auto inside_query = EdgeQuery{shape_group_id, shape_id, false};
+ auto outside_query = EdgeQuery{shape_group_id, shape_id, false};
+ auto color_inside = sample_color(scene,
+ background_image != nullptr ? (const Vector4f *)&background_image[4 * ((by * width) + bx)] : nullptr,
+ boundary_pt - 1e-4f * normal,
+ nullptr, &inside_query);
+ auto color_outside = sample_color(scene,
+ background_image != nullptr ? (const Vector4f *)&background_image[4 * ((by * width) + bx)] : nullptr,
+ boundary_pt + 1e-4f * normal,
+ nullptr, &outside_query);
+ if (!inside_query.hit && !outside_query.hit) {
+ // occluded
+ return;
+ }
+ if (!inside_query.hit) {
+ normal = -normal;
+ swap_(inside_query, outside_query);
+ swap_(color_inside, color_outside);
+ }
+ // Boundary point in screen space
+ auto sboundary_pt = boundary_pt;
+ sboundary_pt.x *= width;
+ sboundary_pt.y *= height;
+ auto d_color = gather_d_color(*scene.filter,
+ d_render_image,
+ weight_image,
+ width,
+ height,
+ sboundary_pt);
+ // Normalization factor
+ d_color /= float(scene.canvas_width * scene.canvas_height);
+
+ assert(isfinite(d_color));
+ assert(isfinite(pdf) && pdf > 0);
+ auto contrib = dot(color_inside - color_outside, d_color) / pdf;
+ ShapeGroup &d_shape_group = scene.d_shape_groups[shape_group_id];
+ accumulate_boundary_gradient(scene.shapes[shape_id],
+ contrib, t, normal, boundary_data, scene.d_shapes[shape_id],
+ shape_group.shape_to_canvas, local_boundary_pt, d_shape_group.shape_to_canvas);
+ // Don't need to backprop to filter weights:
+ // \int f'(x) g(x) dx doesn't contain discontinuities
+ // if f is continuous, even if g is discontinuous
+ if (d_translation != nullptr) {
+ // According to Reynold transport theorem,
+ // the Jacobian of the boundary integral is dot(velocity, normal)
+ // The velocity of the object translating x is (1, 0)
+ // The velocity of the object translating y is (0, 1)
+ atomic_add(&d_translation[2 * (by * width + bx) + 0], normal.x * contrib);
+ atomic_add(&d_translation[2 * (by * width + bx) + 1], normal.y * contrib);
+ }
+ }
+
+ SceneData scene;
+ const float *background_image;
+ const BoundarySample *boundary_samples;
+ const int *boundary_ids;
+ float *weight_image;
+ float *d_render_image;
+ float *d_translation;
+ int width;
+ int height;
+ int num_samples_x;
+ int num_samples_y;
+};
+
+void render(std::shared_ptr scene,
+ ptr background_image,
+ ptr render_image,
+ ptr render_sdf,
+ int width,
+ int height,
+ int num_samples_x,
+ int num_samples_y,
+ uint64_t seed,
+ ptr d_background_image,
+ ptr d_render_image,
+ ptr d_render_sdf,
+ ptr d_translation,
+ bool use_prefiltering,
+ ptr eval_positions,
+ int num_eval_positions) {
+#ifdef __NVCC__
+ int old_device_id = -1;
+ if (scene->use_gpu) {
+ checkCuda(cudaGetDevice(&old_device_id));
+ if (scene->gpu_index != -1) {
+ checkCuda(cudaSetDevice(scene->gpu_index));
+ }
+ }
+#endif
+ parallel_init();
+
+ float *weight_image = nullptr;
+ // Allocate and zero the weight image
+ if (scene->use_gpu) {
+#ifdef __CUDACC__
+ if (eval_positions.get() == nullptr) {
+ checkCuda(cudaMallocManaged(&weight_image, width * height * sizeof(float)));
+ cudaMemset(weight_image, 0, width * height * sizeof(float));
+ }
+#else
+ assert(false);
+#endif
+ } else {
+ if (eval_positions.get() == nullptr) {
+ weight_image = (float*)malloc(width * height * sizeof(float));
+ memset(weight_image, 0, width * height * sizeof(float));
+ }
+ }
+
+ if (render_image.get() != nullptr || d_render_image.get() != nullptr ||
+ render_sdf.get() != nullptr || d_render_sdf.get() != nullptr) {
+ if (weight_image != nullptr) {
+ parallel_for(weight_kernel{
+ get_scene_data(*scene.get()),
+ weight_image,
+ width,
+ height,
+ num_samples_x,
+ num_samples_y,
+ seed
+ }, width * height * num_samples_x * num_samples_y, scene->use_gpu);
+ }
+
+ auto num_samples = eval_positions.get() == nullptr ?
+ width * height * num_samples_x * num_samples_y : num_eval_positions;
+ parallel_for(render_kernel{
+ get_scene_data(*scene.get()),
+ background_image.get(),
+ render_image.get(),
+ weight_image,
+ render_sdf.get(),
+ d_background_image.get(),
+ d_render_image.get(),
+ d_render_sdf.get(),
+ d_translation.get(),
+ width,
+ height,
+ num_samples_x,
+ num_samples_y,
+ seed,
+ use_prefiltering,
+ eval_positions.get()
+ }, num_samples, scene->use_gpu);
+ }
+
+ // Boundary sampling
+ if (!use_prefiltering && d_render_image.get() != nullptr) {
+ auto num_samples = width * height * num_samples_x * num_samples_y;
+ BoundarySample *boundary_samples = nullptr;
+ int *boundary_ids = nullptr; // for sorting
+ uint32_t *morton_codes = nullptr; // for sorting
+ // Allocate boundary samples
+ if (scene->use_gpu) {
+#ifdef __CUDACC__
+ checkCuda(cudaMallocManaged(&boundary_samples,
+ num_samples * sizeof(BoundarySample)));
+ checkCuda(cudaMallocManaged(&boundary_ids,
+ num_samples * sizeof(int)));
+ checkCuda(cudaMallocManaged(&morton_codes,
+ num_samples * sizeof(uint32_t)));
+#else
+ assert(false);
+ #endif
+ } else {
+ boundary_samples = (BoundarySample*)malloc(
+ num_samples * sizeof(BoundarySample));
+ boundary_ids = (int*)malloc(
+ num_samples * sizeof(int));
+ morton_codes = (uint32_t*)malloc(
+ num_samples * sizeof(uint32_t));
+ }
+
+ // Edge sampling
+ // We sort the boundary samples for better thread coherency
+ parallel_for(sample_boundary_kernel{
+ get_scene_data(*scene.get()),
+ seed,
+ boundary_samples,
+ boundary_ids,
+ morton_codes
+ }, num_samples, scene->use_gpu);
+ if (scene->use_gpu) {
+ thrust::sort_by_key(thrust::device, morton_codes, morton_codes + num_samples, boundary_ids);
+ } else {
+ // Don't need to sort for CPU, we are not using SIMD hardware anyway.
+ // thrust::sort_by_key(thrust::host, morton_codes, morton_codes + num_samples, boundary_ids);
+ }
+ parallel_for(render_edge_kernel{
+ get_scene_data(*scene.get()),
+ background_image.get(),
+ boundary_samples,
+ boundary_ids,
+ weight_image,
+ d_render_image.get(),
+ d_translation.get(),
+ width,
+ height,
+ num_samples_x,
+ num_samples_y
+ }, num_samples, scene->use_gpu);
+ if (scene->use_gpu) {
+#ifdef __CUDACC__
+ checkCuda(cudaFree(boundary_samples));
+ checkCuda(cudaFree(boundary_ids));
+ checkCuda(cudaFree(morton_codes));
+#else
+ assert(false);
+#endif
+ } else {
+ free(boundary_samples);
+ free(boundary_ids);
+ free(morton_codes);
+ }
+ }
+
+ // Clean up weight image
+ if (scene->use_gpu) {
+#ifdef __CUDACC__
+ checkCuda(cudaFree(weight_image));
+#else
+ assert(false);
+#endif
+ } else {
+ free(weight_image);
+ }
+
+ if (scene->use_gpu) {
+ cuda_synchronize();
+ }
+
+ parallel_cleanup();
+#ifdef __NVCC__
+ if (old_device_id != -1) {
+ checkCuda(cudaSetDevice(old_device_id));
+ }
+#endif
+}
+
+PYBIND11_MODULE(diffvg, m) {
+ m.doc() = "Differential Vector Graphics";
+
+ py::class_>(m, "void_ptr")
+ .def(py::init())
+ .def("as_size_t", &ptr::as_size_t);
+ py::class_>(m, "float_ptr")
+ .def(py::init());
+ py::class_>(m, "int_ptr")
+ .def(py::init());
+
+ py::class_(m, "Vector2f")
+ .def(py::init())
+ .def_readwrite("x", &Vector2f::x)
+ .def_readwrite("y", &Vector2f::y);
+
+ py::class_(m, "Vector3f")
+ .def(py::init())
+ .def_readwrite("x", &Vector3f::x)
+ .def_readwrite("y", &Vector3f::y)
+ .def_readwrite("z", &Vector3f::z);
+
+ py::class_(m, "Vector4f")
+ .def(py::init())
+ .def_readwrite("x", &Vector4f::x)
+ .def_readwrite("y", &Vector4f::y)
+ .def_readwrite("z", &Vector4f::z)
+ .def_readwrite("w", &Vector4f::w);
+
+ py::enum_(m, "ShapeType")
+ .value("circle", ShapeType::Circle)
+ .value("ellipse", ShapeType::Ellipse)
+ .value("path", ShapeType::Path)
+ .value("rect", ShapeType::Rect);
+
+ py::class_(m, "Circle")
+ .def(py::init())
+ .def("get_ptr", &Circle::get_ptr)
+ .def_readonly("radius", &Circle::radius)
+ .def_readonly("center", &Circle::center);
+
+ py::class_(m, "Ellipse")
+ .def(py::init())
+ .def("get_ptr", &Ellipse::get_ptr)
+ .def_readonly("radius", &Ellipse::radius)
+ .def_readonly("center", &Ellipse::center);
+
+ py::class_(m, "Path")
+ .def(py::init, ptr, ptr, int, int, bool, bool>())
+ .def("get_ptr", &Path::get_ptr)
+ .def("has_thickness", &Path::has_thickness)
+ .def("copy_to", &Path::copy_to)
+ .def_readonly("num_points", &Path::num_points);
+
+ py::class_(m, "Rect")
+ .def(py::init())
+ .def("get_ptr", &Rect::get_ptr)
+ .def_readonly("p_min", &Rect::p_min)
+ .def_readonly("p_max", &Rect::p_max);
+
+ py::enum_(m, "ColorType")
+ .value("constant", ColorType::Constant)
+ .value("linear_gradient", ColorType::LinearGradient)
+ .value("radial_gradient", ColorType::RadialGradient);
+
+ py::class_(m, "Constant")
+ .def(py::init())
+ .def("get_ptr", &Constant::get_ptr)
+ .def_readonly("color", &Constant::color);
+
+ py::class_(m, "LinearGradient")
+ .def(py::init, ptr>())
+ .def("get_ptr", &LinearGradient::get_ptr)
+ .def("copy_to", &LinearGradient::copy_to)
+ .def_readonly("begin", &LinearGradient::begin)
+ .def_readonly("end", &LinearGradient::end)
+ .def_readonly("num_stops", &LinearGradient::num_stops);
+
+ py::class_(m, "RadialGradient")
+ .def(py::init, ptr>())
+ .def("get_ptr", &RadialGradient::get_ptr)
+ .def("copy_to", &RadialGradient::copy_to)
+ .def_readonly("center", &RadialGradient::center)
+ .def_readonly("radius", &RadialGradient::radius)
+ .def_readonly("num_stops", &RadialGradient::num_stops);
+
+ py::class_(m, "Shape")
+ .def(py::init, float>())
+ .def("as_circle", &Shape::as_circle)
+ .def("as_ellipse", &Shape::as_ellipse)
+ .def("as_path", &Shape::as_path)
+ .def("as_rect", &Shape::as_rect)
+ .def_readonly("type", &Shape::type)
+ .def_readonly("stroke_width", &Shape::stroke_width);
+
+ py::class_(m, "ShapeGroup")
+ .def(py::init,
+ int,
+ ColorType,
+ ptr,
+ ColorType,
+ ptr,
+ bool,
+ ptr>())
+ .def("fill_color_as_constant", &ShapeGroup::fill_color_as_constant)
+ .def("fill_color_as_linear_gradient", &ShapeGroup::fill_color_as_linear_gradient)
+ .def("fill_color_as_radial_gradient", &ShapeGroup::fill_color_as_radial_gradient)
+ .def("stroke_color_as_constant", &ShapeGroup::stroke_color_as_constant)
+ .def("stroke_color_as_linear_gradient", &ShapeGroup::stroke_color_as_linear_gradient)
+ .def("stroke_color_as_radial_gradient", &ShapeGroup::fill_color_as_radial_gradient)
+ .def("has_fill_color", &ShapeGroup::has_fill_color)
+ .def("has_stroke_color", &ShapeGroup::has_stroke_color)
+ .def("copy_to", &ShapeGroup::copy_to)
+ .def_readonly("fill_color_type", &ShapeGroup::fill_color_type)
+ .def_readonly("stroke_color_type", &ShapeGroup::stroke_color_type);
+
+ py::enum_(m, "FilterType")
+ .value("box", FilterType::Box)
+ .value("tent", FilterType::Tent)
+ .value("parabolic", FilterType::RadialParabolic)
+ .value("hann", FilterType::Hann);
+
+ py::class_(m, "Filter")
+ .def(py::init());
+
+ py::class_>(m, "Scene")
+ .def(py::init &,
+ const std::vector &,
+ const Filter &,
+ bool,
+ int>())
+ .def("get_d_shape", &Scene::get_d_shape)
+ .def("get_d_shape_group", &Scene::get_d_shape_group)
+ .def("get_d_filter_radius", &Scene::get_d_filter_radius)
+ .def_readonly("num_shapes", &Scene::num_shapes)
+ .def_readonly("num_shape_groups", &Scene::num_shape_groups);
+
+ m.def("render", &render, "");
+}
diff --git a/diffvg.h b/diffvg.h
new file mode 100644
index 0000000000000000000000000000000000000000..400e4dc3f60d89061fe3842e09688f130d49c557
--- /dev/null
+++ b/diffvg.h
@@ -0,0 +1,156 @@
+#pragma once
+
+#ifdef __NVCC__
+ #define DEVICE __device__ __host__
+#else
+ #define DEVICE
+#endif
+
+#ifndef __NVCC__
+ #include
+ namespace {
+ inline float fmodf(float a, float b) {
+ return std::fmod(a, b);
+ }
+ inline double fmod(double a, double b) {
+ return std::fmod(a, b);
+ }
+ }
+ using std::isfinite;
+#endif
+
+#ifndef M_PI
+#define M_PI 3.14159265358979323846
+#endif
+
+#include
+#include
+
+// We use Real for most of the internal computation.
+// However, for PyTorch interfaces, Optix Prime and Embree queries
+// we use float
+using Real = float;
+
+template
+DEVICE
+inline T square(const T &x) {
+ return x * x;
+}
+
+template
+DEVICE
+inline T cubic(const T &x) {
+ return x * x * x;
+}
+
+template
+DEVICE
+inline T clamp(const T &v, const T &lo, const T &hi) {
+ if (v < lo) return lo;
+ else if (v > hi) return hi;
+ else return v;
+}
+
+DEVICE
+inline int modulo(int a, int b) {
+ auto r = a % b;
+ return (r < 0) ? r+b : r;
+}
+
+DEVICE
+inline float modulo(float a, float b) {
+ float r = ::fmodf(a, b);
+ return (r < 0.0f) ? r+b : r;
+}
+
+DEVICE
+inline double modulo(double a, double b) {
+ double r = ::fmod(a, b);
+ return (r < 0.0) ? r+b : r;
+}
+
+template
+DEVICE
+inline T max(const T &a, const T &b) {
+ return a > b ? a : b;
+}
+
+template
+DEVICE
+inline T min(const T &a, const T &b) {
+ return a < b ? a : b;
+}
+
+/// Return ceil(x/y) for integers x and y
+inline int idiv_ceil(int x, int y) {
+ return (x + y-1) / y;
+}
+
+template
+DEVICE
+inline void swap_(T &a, T &b) {
+ T tmp = a;
+ a = b;
+ b = tmp;
+}
+
+inline double log2(double x) {
+ return log(x) / log(Real(2));
+}
+
+template
+DEVICE
+inline T safe_acos(const T &x) {
+ if (x >= 1) return T(0);
+ else if(x <= -1) return T(M_PI);
+ return acos(x);
+}
+
+// For Morton code computation. This can be made faster.
+DEVICE
+inline uint32_t expand_bits(uint32_t x) {
+ // Insert one zero after every bit given a 10-bit integer
+ constexpr uint64_t mask = 0x1u;
+ // We start from LSB (bit 31)
+ auto result = (x & (mask << 0u));
+ result |= ((x & (mask << 1u)) << 1u);
+ result |= ((x & (mask << 2u)) << 2u);
+ result |= ((x & (mask << 3u)) << 3u);
+ result |= ((x & (mask << 4u)) << 4u);
+ result |= ((x & (mask << 5u)) << 5u);
+ result |= ((x & (mask << 6u)) << 6u);
+ result |= ((x & (mask << 7u)) << 7u);
+ result |= ((x & (mask << 8u)) << 8u);
+ result |= ((x & (mask << 9u)) << 9u);
+ return result;
+}
+
+// DEVICE
+// inline int clz(uint64_t x) {
+// #ifdef __CUDA_ARCH__
+// return __clzll(x);
+// #else
+// // TODO: use _BitScanReverse in windows
+// return x == 0 ? 64 : __builtin_clzll(x);
+// #endif
+// }
+
+// DEVICE
+// inline int ffs(uint8_t x) {
+// #ifdef __CUDA_ARCH__
+// return __ffs(x);
+// #else
+// // TODO: use _BitScanReverse in windows
+// return __builtin_ffs(x);
+// #endif
+// }
+
+// DEVICE
+// inline int popc(uint8_t x) {
+// #ifdef __CUDA_ARCH__
+// return __popc(x);
+// #else
+// // TODO: use _popcnt in windows
+// return __builtin_popcount(x);
+// #endif
+// }
diff --git a/edge_query.h b/edge_query.h
new file mode 100644
index 0000000000000000000000000000000000000000..57f233a3203c1ea8d6b73f6624036578483442bb
--- /dev/null
+++ b/edge_query.h
@@ -0,0 +1,7 @@
+#pragma once
+
+struct EdgeQuery {
+ int shape_group_id;
+ int shape_id;
+ bool hit; // Do we hit the specified shape_group_id & shape_id?
+};
diff --git a/examples/1.png b/examples/1.png
new file mode 100644
index 0000000000000000000000000000000000000000..6a5b458aae3c26c7614c63cee104dc6614b3a5a2
Binary files /dev/null and b/examples/1.png differ
diff --git a/examples/2.png b/examples/2.png
new file mode 100644
index 0000000000000000000000000000000000000000..e3e1050726124b2c197dd05e1ffa44342e2acb36
Binary files /dev/null and b/examples/2.png differ
diff --git a/examples/3.jpg b/examples/3.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..7ccdcf7d82f4dae849ceec62f68d10a6acddbcdd
Binary files /dev/null and b/examples/3.jpg differ
diff --git a/examples/4.png b/examples/4.png
new file mode 100644
index 0000000000000000000000000000000000000000..6355c30cb8be9014029029f9b69453bae47c8b80
Binary files /dev/null and b/examples/4.png differ
diff --git a/examples/5.png b/examples/5.png
new file mode 100644
index 0000000000000000000000000000000000000000..5705c2ff34aa0df1cffe65d5e5be7b41a607224c
Binary files /dev/null and b/examples/5.png differ
diff --git a/figures/smile.png b/figures/smile.png
new file mode 100644
index 0000000000000000000000000000000000000000..5705c2ff34aa0df1cffe65d5e5be7b41a607224c
Binary files /dev/null and b/figures/smile.png differ
diff --git a/filter.h b/filter.h
new file mode 100644
index 0000000000000000000000000000000000000000..2dd0b62acb83e94da89696e9a8024c4b919f6749
--- /dev/null
+++ b/filter.h
@@ -0,0 +1,106 @@
+#pragma once
+
+#include "diffvg.h"
+#include "atomic.h"
+
+enum class FilterType {
+ Box,
+ Tent,
+ RadialParabolic, // 4/3(1 - (d/r))
+ Hann // https://en.wikipedia.org/wiki/Window_function#Hann_and_Hamming_windows
+};
+
+struct Filter {
+ FilterType type;
+ float radius;
+};
+
+struct DFilter {
+ float radius;
+};
+
+DEVICE
+inline
+float compute_filter_weight(const Filter &filter,
+ float dx,
+ float dy) {
+ if (fabs(dx) > filter.radius || fabs(dy) > filter.radius) {
+ return 0;
+ }
+ if (filter.type == FilterType::Box) {
+ return 1.f / square(2 * filter.radius);
+ } else if (filter.type == FilterType::Tent) {
+ return (filter.radius - fabs(dx)) * (filter.radius - fabs(dy)) /
+ square(square(filter.radius));
+ } else if (filter.type == FilterType::RadialParabolic) {
+ return (4.f / 3.f) * (1 - square(dx / filter.radius)) *
+ (4.f / 3.f) * (1 - square(dy / filter.radius));
+ } else {
+ assert(filter.type == FilterType::Hann);
+ // normalize dx, dy to [0, 1]
+ auto ndx = (dx / (2*filter.radius)) + 0.5f;
+ auto ndy = (dy / (2*filter.radius)) + 0.5f;
+ // the normalization factor is R^2
+ return 0.5f * (1.f - cos(float(2 * M_PI) * ndx)) *
+ 0.5f * (1.f - cos(float(2 * M_PI) * ndy)) /
+ square(filter.radius);
+ }
+}
+
+DEVICE
+inline
+void d_compute_filter_weight(const Filter &filter,
+ float dx,
+ float dy,
+ float d_return,
+ DFilter *d_filter) {
+ if (filter.type == FilterType::Box) {
+ // return 1.f / square(2 * filter.radius);
+ atomic_add(d_filter->radius,
+ d_return * (-2) * 2 * filter.radius / cubic(2 * filter.radius));
+ } else if (filter.type == FilterType::Tent) {
+ // return (filer.radius - fabs(dx)) * (filer.radius - fabs(dy)) /
+ // square(square(filter.radius));
+ auto fx = filter.radius - fabs(dx);
+ auto fy = filter.radius - fabs(dy);
+ auto norm = 1 / square(filter.radius);
+ auto d_fx = d_return * fy * norm;
+ auto d_fy = d_return * fx * norm;
+ auto d_norm = d_return * fx * fy;
+ atomic_add(d_filter->radius,
+ d_fx + d_fy + (-4) * d_norm / pow(filter.radius, 5));
+ } else if (filter.type == FilterType::RadialParabolic) {
+ // return (4.f / 3.f) * (1 - square(dx / filter.radius)) *
+ // (4.f / 3.f) * (1 - square(dy / filter.radius));
+ // auto d_square_x = d_return * (-4.f / 3.f);
+ // auto d_square_y = d_return * (-4.f / 3.f);
+ auto r3 = filter.radius * filter.radius * filter.radius;
+ auto d_radius = -(2 * square(dx) + 2 * square(dy)) / r3;
+ atomic_add(d_filter->radius, d_radius);
+ } else {
+ assert(filter.type == FilterType::Hann);
+ // // normalize dx, dy to [0, 1]
+ // auto ndx = (dx / (2*filter.radius)) + 0.5f;
+ // auto ndy = (dy / (2*filter.radius)) + 0.5f;
+ // // the normalization factor is R^2
+ // return 0.5f * (1.f - cos(float(2 * M_PI) * ndx)) *
+ // 0.5f * (1.f - cos(float(2 * M_PI) * ndy)) /
+ // square(filter.radius);
+
+ // normalize dx, dy to [0, 1]
+ auto ndx = (dx / (2*filter.radius)) + 0.5f;
+ auto ndy = (dy / (2*filter.radius)) + 0.5f;
+ auto fx = 0.5f * (1.f - cos(float(2*M_PI) * ndx));
+ auto fy = 0.5f * (1.f - cos(float(2*M_PI) * ndy));
+ auto norm = 1 / square(filter.radius);
+ auto d_fx = d_return * fy * norm;
+ auto d_fy = d_return * fx * norm;
+ auto d_norm = d_return * fx * fy;
+ auto d_ndx = d_fx * 0.5f * sin(float(2*M_PI) * ndx) * float(2*M_PI);
+ auto d_ndy = d_fy * 0.5f * sin(float(2*M_PI) * ndy) * float(2*M_PI);
+ atomic_add(d_filter->radius,
+ d_ndx * (-2*dx / square(2*filter.radius)) +
+ d_ndy * (-2*dy / square(2*filter.radius)) +
+ (-2) * d_norm / cubic(filter.radius));
+ }
+}
diff --git a/icon/logo.ico b/icon/logo.ico
new file mode 100644
index 0000000000000000000000000000000000000000..11baf9d82d4cc010b86460dd965167e64f5a88a3
Binary files /dev/null and b/icon/logo.ico differ
diff --git a/img_example/Millenial-at-work.jpg b/img_example/Millenial-at-work.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..78cb50b60443c03873bd9ee35c8cd4541387fa34
Binary files /dev/null and b/img_example/Millenial-at-work.jpg differ
diff --git a/img_example/bus.jpg b/img_example/bus.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b43e311165c785f000eb7493ff8fb662d06a3f83
Binary files /dev/null and b/img_example/bus.jpg differ
diff --git a/img_example/zidane.jpg b/img_example/zidane.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..92d72ea124760ce5dbf9425e3aa8f371e7481328
Binary files /dev/null and b/img_example/zidane.jpg differ
diff --git a/main.py b/main.py
new file mode 100644
index 0000000000000000000000000000000000000000..00ed8601b4b1d85741ab8d5c75adbbf425942d2b
--- /dev/null
+++ b/main.py
@@ -0,0 +1,1040 @@
+"""
+Here are some use cases:
+python main.py --config config/all.yaml --experiment experiment_8x1 --signature demo1 --target data/demo1.png
+"""
+import pydiffvg
+import torch
+import cv2
+import matplotlib.pyplot as plt
+import random
+import argparse
+import math
+import errno
+from tqdm import tqdm
+from torch.optim.lr_scheduler import CosineAnnealingLR, LambdaLR
+from torch.nn.functional import adaptive_avg_pool2d
+import warnings
+warnings.filterwarnings("ignore")
+
+import PIL
+import PIL.Image
+import os
+import os.path as osp
+import numpy as np
+import numpy.random as npr
+import shutil
+import copy
+# import skfmm
+from xing_loss import xing_loss
+
+import yaml
+from easydict import EasyDict as edict
+
+
+pydiffvg.set_print_timing(False)
+gamma = 1.0
+
+##########
+# helper #
+##########
+
+from utils import \
+ get_experiment_id, \
+ get_path_schedule, \
+ edict_2_dict, \
+ check_and_create_dir
+
+def get_bezier_circle(radius=1, segments=4, bias=None):
+ points = []
+ if bias is None:
+ bias = (random.random(), random.random())
+ avg_degree = 360 / (segments*3)
+ for i in range(0, segments*3):
+ point = (np.cos(np.deg2rad(i * avg_degree)),
+ np.sin(np.deg2rad(i * avg_degree)))
+ points.append(point)
+ points = torch.tensor(points)
+ points = (points)*radius + torch.tensor(bias).unsqueeze(dim=0)
+ points = points.type(torch.FloatTensor)
+ return points
+
+def get_sdf(phi, method='skfmm', **kwargs):
+ if method == 'skfmm':
+ import skfmm
+ phi = (phi-0.5)*2
+ if (phi.max() <= 0) or (phi.min() >= 0):
+ return np.zeros(phi.shape).astype(np.float32)
+ sd = skfmm.distance(phi, dx=1)
+
+ flip_negative = kwargs.get('flip_negative', True)
+ if flip_negative:
+ sd = np.abs(sd)
+
+ truncate = kwargs.get('truncate', 10)
+ sd = np.clip(sd, -truncate, truncate)
+ # print(f"max sd value is: {sd.max()}")
+
+ zero2max = kwargs.get('zero2max', True)
+ if zero2max and flip_negative:
+ sd = sd.max() - sd
+ elif zero2max:
+ raise ValueError
+
+ normalize = kwargs.get('normalize', 'sum')
+ if normalize == 'sum':
+ sd /= sd.sum()
+ elif normalize == 'to1':
+ sd /= sd.max()
+ return sd
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--debug', action='store_true', default=False)
+ parser.add_argument("--config", type=str)
+ parser.add_argument("--experiment", type=str)
+ parser.add_argument("--seed", type=int)
+ parser.add_argument("--target", type=str, help="target image path")
+ parser.add_argument('--log_dir', metavar='DIR', default="log/debug")
+ parser.add_argument('--initial', type=str, default="random", choices=['random', 'circle'])
+ parser.add_argument('--signature', nargs='+', type=str)
+ parser.add_argument('--seginit', nargs='+', type=str)
+ parser.add_argument("--num_segments", type=int, default=4)
+ # parser.add_argument("--num_paths", type=str, default="1,1,1")
+ # parser.add_argument("--num_iter", type=int, default=500)
+ # parser.add_argument('--free', action='store_true')
+ # Please ensure that image resolution is divisible by pool_size; otherwise the performance would drop a lot.
+ # parser.add_argument('--pool_size', type=int, default=40, help="the pooled image size for next path initialization")
+ # parser.add_argument('--save_loss', action='store_true')
+ # parser.add_argument('--save_init', action='store_true')
+ # parser.add_argument('--save_image', action='store_true')
+ # parser.add_argument('--save_video', action='store_true')
+ # parser.add_argument('--print_weight', action='store_true')
+ # parser.add_argument('--circle_init_radius', type=float)
+ cfg = edict()
+ args = parser.parse_args()
+ cfg.debug = args.debug
+ cfg.config = args.config
+ cfg.experiment = args.experiment
+ cfg.seed = args.seed
+ cfg.target = args.target
+ cfg.log_dir = args.log_dir
+ cfg.initial = args.initial
+ cfg.signature = args.signature
+ # set cfg num_segments in command
+ cfg.num_segments = args.num_segments
+ if args.seginit is not None:
+ cfg.seginit = edict()
+ cfg.seginit.type = args.seginit[0]
+ if cfg.seginit.type == 'circle':
+ cfg.seginit.radius = float(args.seginit[1])
+ return cfg
+
+def ycrcb_conversion(im, format='[bs x 3 x 2D]', reverse=False):
+ mat = torch.FloatTensor([
+ [ 65.481/255, 128.553/255, 24.966/255], # ranged_from [0, 219/255]
+ [-37.797/255, -74.203/255, 112.000/255], # ranged_from [-112/255, 112/255]
+ [112.000/255, -93.786/255, -18.214/255], # ranged_from [-112/255, 112/255]
+ ]).to(im.device)
+
+ if reverse:
+ mat = mat.inverse()
+
+ if format == '[bs x 3 x 2D]':
+ im = im.permute(0, 2, 3, 1)
+ im = torch.matmul(im, mat.T)
+ im = im.permute(0, 3, 1, 2).contiguous()
+ return im
+ elif format == '[2D x 3]':
+ im = torch.matmul(im, mat.T)
+ return im
+ else:
+ raise ValueError
+
+class random_coord_init():
+ def __init__(self, canvas_size):
+ self.canvas_size = canvas_size
+ def __call__(self):
+ h, w = self.canvas_size
+ return [npr.uniform(0, 1)*w, npr.uniform(0, 1)*h]
+
+class naive_coord_init():
+ def __init__(self, pred, gt, format='[bs x c x 2D]', replace_sampling=True):
+ if isinstance(pred, torch.Tensor):
+ pred = pred.detach().cpu().numpy()
+ if isinstance(gt, torch.Tensor):
+ gt = gt.detach().cpu().numpy()
+
+ if format == '[bs x c x 2D]':
+ self.map = ((pred[0] - gt[0])**2).sum(0)
+ elif format == ['[2D x c]']:
+ self.map = ((pred - gt)**2).sum(-1)
+ else:
+ raise ValueError
+ self.replace_sampling = replace_sampling
+
+ def __call__(self):
+ coord = np.where(self.map == self.map.max())
+ coord_h, coord_w = coord[0][0], coord[1][0]
+ if self.replace_sampling:
+ self.map[coord_h, coord_w] = -1
+ return [coord_w, coord_h]
+
+
+class sparse_coord_init():
+ def __init__(self, pred, gt, format='[bs x c x 2D]', quantile_interval=200, nodiff_thres=0.1):
+ if isinstance(pred, torch.Tensor):
+ pred = pred.detach().cpu().numpy()
+ if isinstance(gt, torch.Tensor):
+ gt = gt.detach().cpu().numpy()
+ if format == '[bs x c x 2D]':
+ self.map = ((pred[0] - gt[0])**2).sum(0)
+ self.reference_gt = copy.deepcopy(
+ np.transpose(gt[0], (1, 2, 0)))
+ elif format == ['[2D x c]']:
+ self.map = (np.abs(pred - gt)).sum(-1)
+ self.reference_gt = copy.deepcopy(gt[0])
+ else:
+ raise ValueError
+ # OptionA: Zero too small errors to avoid the error too small deadloop
+ self.map[self.map < nodiff_thres] = 0
+ quantile_interval = np.linspace(0., 1., quantile_interval)
+ quantized_interval = np.quantile(self.map, quantile_interval)
+ # remove redundant
+ quantized_interval = np.unique(quantized_interval)
+ quantized_interval = sorted(quantized_interval[1:-1])
+ self.map = np.digitize(self.map, quantized_interval, right=False)
+ self.map = np.clip(self.map, 0, 255).astype(np.uint8)
+ self.idcnt = {}
+ for idi in sorted(np.unique(self.map)):
+ self.idcnt[idi] = (self.map==idi).sum()
+ self.idcnt.pop(min(self.idcnt.keys()))
+ # remove smallest one to remove the correct region
+ def __call__(self):
+ if len(self.idcnt) == 0:
+ h, w = self.map.shape
+ return [npr.uniform(0, 1)*w, npr.uniform(0, 1)*h]
+ target_id = max(self.idcnt, key=self.idcnt.get)
+ _, component, cstats, ccenter = cv2.connectedComponentsWithStats(
+ (self.map==target_id).astype(np.uint8), connectivity=4)
+ # remove cid = 0, it is the invalid area
+ csize = [ci[-1] for ci in cstats[1:]]
+ target_cid = csize.index(max(csize))+1
+ center = ccenter[target_cid][::-1]
+ coord = np.stack(np.where(component == target_cid)).T
+ dist = np.linalg.norm(coord-center, axis=1)
+ target_coord_id = np.argmin(dist)
+ coord_h, coord_w = coord[target_coord_id]
+ # replace_sampling
+ self.idcnt[target_id] -= max(csize)
+ if self.idcnt[target_id] == 0:
+ self.idcnt.pop(target_id)
+ self.map[component == target_cid] = 0
+ return [coord_w, coord_h]
+
+
+def init_shapes(num_paths,
+ num_segments,
+ canvas_size,
+ seginit_cfg,
+ shape_cnt,
+ pos_init_method=None,
+ trainable_stroke=False,
+ gt=None,
+ **kwargs):
+ shapes = []
+ shape_groups = []
+ h, w = canvas_size
+
+ # change path init location
+ if pos_init_method is None:
+ pos_init_method = random_coord_init(canvas_size=canvas_size)
+
+ for i in range(num_paths):
+ num_control_points = [2] * num_segments
+
+ if seginit_cfg.type=="random":
+ points = []
+ p0 = pos_init_method()
+ color_ref = copy.deepcopy(p0)
+ points.append(p0)
+ for j in range(num_segments):
+ radius = seginit_cfg.radius
+ p1 = (p0[0] + radius * npr.uniform(-0.5, 0.5),
+ p0[1] + radius * npr.uniform(-0.5, 0.5))
+ p2 = (p1[0] + radius * npr.uniform(-0.5, 0.5),
+ p1[1] + radius * npr.uniform(-0.5, 0.5))
+ p3 = (p2[0] + radius * npr.uniform(-0.5, 0.5),
+ p2[1] + radius * npr.uniform(-0.5, 0.5))
+ points.append(p1)
+ points.append(p2)
+ if j < num_segments - 1:
+ points.append(p3)
+ p0 = p3
+ points = torch.FloatTensor(points)
+
+ # circle points initialization
+ elif seginit_cfg.type=="circle":
+ radius = seginit_cfg.radius
+ if radius is None:
+ radius = npr.uniform(0.5, 1)
+ center = pos_init_method()
+ color_ref = copy.deepcopy(center)
+ points = get_bezier_circle(
+ radius=radius, segments=num_segments,
+ bias=center)
+
+ path = pydiffvg.Path(num_control_points = torch.LongTensor(num_control_points),
+ points = points,
+ stroke_width = torch.tensor(0.0),
+ is_closed = True)
+ shapes.append(path)
+ # !!!!!!problem is here. the shape group shape_ids is wrong
+
+ if gt is not None:
+ wref, href = color_ref
+ wref = max(0, min(int(wref), w-1))
+ href = max(0, min(int(href), h-1))
+ fill_color_init = list(gt[0, :, href, wref]) + [1.]
+ fill_color_init = torch.FloatTensor(fill_color_init)
+ stroke_color_init = torch.FloatTensor(npr.uniform(size=[4]))
+ else:
+ fill_color_init = torch.FloatTensor(npr.uniform(size=[4]))
+ stroke_color_init = torch.FloatTensor(npr.uniform(size=[4]))
+
+ path_group = pydiffvg.ShapeGroup(
+ shape_ids = torch.LongTensor([shape_cnt+i]),
+ fill_color = fill_color_init,
+ stroke_color = stroke_color_init,
+ )
+ shape_groups.append(path_group)
+
+ point_var = []
+ color_var = []
+
+ for path in shapes:
+ path.points.requires_grad = True
+ point_var.append(path.points)
+ for group in shape_groups:
+ group.fill_color.requires_grad = True
+ color_var.append(group.fill_color)
+
+ if trainable_stroke:
+ stroke_width_var = []
+ stroke_color_var = []
+ for path in shapes:
+ path.stroke_width.requires_grad = True
+ stroke_width_var.append(path.stroke_width)
+ for group in shape_groups:
+ group.stroke_color.requires_grad = True
+ stroke_color_var.append(group.stroke_color)
+ return shapes, shape_groups, point_var, color_var, stroke_width_var, stroke_color_var
+ else:
+ return shapes, shape_groups, point_var, color_var
+
+class linear_decay_lrlambda_f(object):
+ def __init__(self, decay_every, decay_ratio):
+ self.decay_every = decay_every
+ self.decay_ratio = decay_ratio
+
+ def __call__(self, n):
+ decay_time = n//self.decay_every
+ decay_step = n %self.decay_every
+ lr_s = self.decay_ratio**decay_time
+ lr_e = self.decay_ratio**(decay_time+1)
+ r = decay_step/self.decay_every
+ lr = lr_s * (1-r) + lr_e * r
+ return lr
+
+def main_func(target, experiment, num_iter, cfg_arg):
+ with open(cfg_arg.config, 'r') as f:
+ cfg = yaml.load(f, Loader=yaml.FullLoader)
+ cfg_default = edict(cfg['default'])
+ cfg = edict(cfg[cfg_arg.experiment])
+ cfg.update(cfg_default)
+ cfg.update(cfg_arg)
+ cfg.exid = get_experiment_id(cfg.debug)
+
+ cfg.experiment_dir = \
+ osp.join(cfg.log_dir, '{}_{}'.format(cfg.exid, '_'.join(cfg.signature)))
+ cfg.target = target
+ cfg.experiment = experiment
+ cfg.num_iter = num_iter
+
+ configfile = osp.join(cfg.experiment_dir, 'config.yaml')
+ check_and_create_dir(configfile)
+ with open(osp.join(configfile), 'w') as f:
+ yaml.dump(edict_2_dict(cfg), f)
+
+ # Use GPU if available
+ pydiffvg.set_use_gpu(torch.cuda.is_available())
+ device = pydiffvg.get_device()
+
+ # gt = np.array(PIL.Image.open(cfg.target))
+ gt = np.array(cfg.target)
+ print(f"Input image shape is: {gt.shape}")
+ if len(gt.shape) == 2:
+ print("Converting the gray-scale image to RGB.")
+ gt = gt.unsqueeze(dim=-1).repeat(1,1,3)
+ if gt.shape[2] == 4:
+ print("Input image includes alpha channel, simply dropout alpha channel.")
+ gt = gt[:, :, :3]
+ gt = (gt/255).astype(np.float32)
+ gt = torch.FloatTensor(gt).permute(2, 0, 1)[None].to(device)
+ if cfg.use_ycrcb:
+ gt = ycrcb_conversion(gt)
+ h, w = gt.shape[2:]
+
+ path_schedule = get_path_schedule(**cfg.path_schedule)
+
+ if cfg.seed is not None:
+ random.seed(cfg.seed)
+ npr.seed(cfg.seed)
+ torch.manual_seed(cfg.seed)
+ render = pydiffvg.RenderFunction.apply
+
+ shapes_record, shape_groups_record = [], []
+
+ region_loss = None
+ loss_matrix = []
+
+ para_point, para_color = {}, {}
+ if cfg.trainable.stroke:
+ para_stroke_width, para_stroke_color = {}, {}
+
+ pathn_record = []
+ # Background
+ if cfg.trainable.bg:
+ # meancolor = gt.mean([2, 3])[0]
+ para_bg = torch.tensor([1., 1., 1.], requires_grad=True, device=device)
+ else:
+ if cfg.use_ycrcb:
+ para_bg = torch.tensor([219/255, 0, 0], requires_grad=False, device=device)
+ else:
+ para_bg = torch.tensor([1., 1., 1.], requires_grad=False, device=device)
+
+ ##################
+ # start_training #
+ ##################
+
+ loss_weight = None
+ loss_weight_keep = 0
+ if cfg.coord_init.type == 'naive':
+ pos_init_method = naive_coord_init(
+ para_bg.view(1, -1, 1, 1).repeat(1, 1, h, w), gt)
+ elif cfg.coord_init.type == 'sparse':
+ pos_init_method = sparse_coord_init(
+ para_bg.view(1, -1, 1, 1).repeat(1, 1, h, w), gt)
+ elif cfg.coord_init.type == 'random':
+ pos_init_method = random_coord_init([h, w])
+ else:
+ raise ValueError
+
+ lrlambda_f = linear_decay_lrlambda_f(cfg.num_iter, 0.4)
+ optim_schedular_dict = {}
+
+ for path_idx, pathn in enumerate(path_schedule):
+ loss_list = []
+ print("=> Adding [{}] paths, [{}] ...".format(pathn, cfg.seginit.type))
+ pathn_record.append(pathn)
+ pathn_record_str = '-'.join([str(i) for i in pathn_record])
+
+ # initialize new shapes related stuffs.
+ if cfg.trainable.stroke:
+ shapes, shape_groups, point_var, color_var, stroke_width_var, stroke_color_var = init_shapes(
+ pathn, cfg.num_segments, (h, w),
+ cfg.seginit, len(shapes_record),
+ pos_init_method,
+ trainable_stroke=True,
+ gt=gt, )
+ para_stroke_width[path_idx] = stroke_width_var
+ para_stroke_color[path_idx] = stroke_color_var
+ else:
+ shapes, shape_groups, point_var, color_var = init_shapes(
+ pathn, cfg.num_segments, (h, w),
+ cfg.seginit, len(shapes_record),
+ pos_init_method,
+ trainable_stroke=False,
+ gt=gt, )
+
+ shapes_record += shapes
+ shape_groups_record += shape_groups
+
+ if cfg.save.init:
+ filename = os.path.join(
+ cfg.experiment_dir, "svg-init",
+ "{}-init.svg".format(pathn_record_str))
+ check_and_create_dir(filename)
+ pydiffvg.save_svg(
+ filename, w, h,
+ shapes_record, shape_groups_record)
+
+ para = {}
+ if (cfg.trainable.bg) and (path_idx == 0):
+ para['bg'] = [para_bg]
+ para['point'] = point_var
+ para['color'] = color_var
+ if cfg.trainable.stroke:
+ para['stroke_width'] = stroke_width_var
+ para['stroke_color'] = stroke_color_var
+
+ pg = [{'params' : para[ki], 'lr' : cfg.lr_base[ki]} for ki in sorted(para.keys())]
+ optim = torch.optim.Adam(pg)
+
+ if cfg.trainable.record:
+ scheduler = LambdaLR(
+ optim, lr_lambda=lrlambda_f, last_epoch=-1)
+ else:
+ scheduler = LambdaLR(
+ optim, lr_lambda=lrlambda_f, last_epoch=cfg.num_iter)
+ optim_schedular_dict[path_idx] = (optim, scheduler)
+
+ # Inner loop training
+ t_range = tqdm(range(cfg.num_iter))
+ for t in t_range:
+
+ for _, (optim, _) in optim_schedular_dict.items():
+ optim.zero_grad()
+
+ # Forward pass: render the image.
+ scene_args = pydiffvg.RenderFunction.serialize_scene(
+ w, h, shapes_record, shape_groups_record)
+ img = render(w, h, 2, 2, t, None, *scene_args)
+
+ # Compose img with white background
+ img = img[:, :, 3:4] * img[:, :, :3] + \
+ para_bg * (1 - img[:, :, 3:4])
+
+
+
+
+
+ if cfg.save.video:
+ filename = os.path.join(
+ cfg.experiment_dir, "video-png",
+ "{}-iter{}.png".format(pathn_record_str, t))
+ check_and_create_dir(filename)
+ if cfg.use_ycrcb:
+ imshow = ycrcb_conversion(
+ img, format='[2D x 3]', reverse=True).detach().cpu()
+ else:
+ imshow = img.detach().cpu()
+ pydiffvg.imwrite(imshow, filename, gamma=gamma)
+
+ # ### added for app
+ # if t%30==0 and t !=0 :
+ # # print(f"debug: {t}, {filename} {img.size()}")
+ # return img.detach().cpu().numpy(), t
+
+ x = img.unsqueeze(0).permute(0, 3, 1, 2) # HWC -> NCHW
+
+ if cfg.use_ycrcb:
+ color_reweight = torch.FloatTensor([255/219, 255/224, 255/255]).to(device)
+ loss = ((x-gt)*(color_reweight.view(1, -1, 1, 1)))**2
+ else:
+ loss = ((x-gt)**2)
+
+ if cfg.loss.use_l1_loss:
+ loss = abs(x-gt)
+
+ if cfg.loss.use_distance_weighted_loss:
+ if cfg.use_ycrcb:
+ raise ValueError
+ shapes_forsdf = copy.deepcopy(shapes)
+ shape_groups_forsdf = copy.deepcopy(shape_groups)
+ for si in shapes_forsdf:
+ si.stroke_width = torch.FloatTensor([0]).to(device)
+ for sg_idx, sgi in enumerate(shape_groups_forsdf):
+ sgi.fill_color = torch.FloatTensor([1, 1, 1, 1]).to(device)
+ sgi.shape_ids = torch.LongTensor([sg_idx]).to(device)
+
+ sargs_forsdf = pydiffvg.RenderFunction.serialize_scene(
+ w, h, shapes_forsdf, shape_groups_forsdf)
+ with torch.no_grad():
+ im_forsdf = render(w, h, 2, 2, 0, None, *sargs_forsdf)
+ # use alpha channel is a trick to get 0-1 image
+ im_forsdf = (im_forsdf[:, :, 3]).detach().cpu().numpy()
+ loss_weight = get_sdf(im_forsdf, normalize='to1')
+ loss_weight += loss_weight_keep
+ loss_weight = np.clip(loss_weight, 0, 1)
+ loss_weight = torch.FloatTensor(loss_weight).to(device)
+
+ if cfg.save.loss:
+ save_loss = loss.squeeze(dim=0).mean(dim=0,keepdim=False).cpu().detach().numpy()
+ save_weight = loss_weight.cpu().detach().numpy()
+ save_weighted_loss = save_loss*save_weight
+ # normalize to [0,1]
+ save_loss = (save_loss - np.min(save_loss))/np.ptp(save_loss)
+ save_weight = (save_weight - np.min(save_weight))/np.ptp(save_weight)
+ save_weighted_loss = (save_weighted_loss - np.min(save_weighted_loss))/np.ptp(save_weighted_loss)
+
+ # save
+ plt.imshow(save_loss, cmap='Reds')
+ plt.axis('off')
+ # plt.colorbar()
+ filename = os.path.join(cfg.experiment_dir, "loss", "{}-iter{}-mseloss.png".format(pathn_record_str, t))
+ check_and_create_dir(filename)
+ plt.savefig(filename, dpi=800)
+ plt.close()
+
+ plt.imshow(save_weight, cmap='Greys')
+ plt.axis('off')
+ # plt.colorbar()
+ filename = os.path.join(cfg.experiment_dir, "loss", "{}-iter{}-sdfweight.png".format(pathn_record_str, t))
+ plt.savefig(filename, dpi=800)
+ plt.close()
+
+ plt.imshow(save_weighted_loss, cmap='Reds')
+ plt.axis('off')
+ # plt.colorbar()
+ filename = os.path.join(cfg.experiment_dir, "loss", "{}-iter{}-weightedloss.png".format(pathn_record_str, t))
+ plt.savefig(filename, dpi=800)
+ plt.close()
+
+
+
+
+
+ if loss_weight is None:
+ loss = loss.sum(1).mean()
+ else:
+ loss = (loss.sum(1)*loss_weight).mean()
+
+ # if (cfg.loss.bis_loss_weight is not None) and (cfg.loss.bis_loss_weight > 0):
+ # loss_bis = bezier_intersection_loss(point_var[0]) * cfg.loss.bis_loss_weight
+ # loss = loss + loss_bis
+ if (cfg.loss.xing_loss_weight is not None) \
+ and (cfg.loss.xing_loss_weight > 0):
+ loss_xing = xing_loss(point_var) * cfg.loss.xing_loss_weight
+ loss = loss + loss_xing
+
+
+ loss_list.append(loss.item())
+ t_range.set_postfix({'loss': loss.item()})
+ loss.backward()
+
+ # step
+ for _, (optim, scheduler) in optim_schedular_dict.items():
+ optim.step()
+ scheduler.step()
+
+ for group in shape_groups_record:
+ group.fill_color.data.clamp_(0.0, 1.0)
+
+ if cfg.loss.use_distance_weighted_loss:
+ loss_weight_keep = loss_weight.detach().cpu().numpy() * 1
+
+ if not cfg.trainable.record:
+ for _, pi in pg.items():
+ for ppi in pi:
+ pi.require_grad = False
+ optim_schedular_dict = {}
+
+ if cfg.save.image:
+ filename = os.path.join(
+ cfg.experiment_dir, "demo-png", "{}.png".format(pathn_record_str))
+ check_and_create_dir(filename)
+ if cfg.use_ycrcb:
+ imshow = ycrcb_conversion(
+ img, format='[2D x 3]', reverse=True).detach().cpu()
+ else:
+ imshow = img.detach().cpu()
+ pydiffvg.imwrite(imshow, filename, gamma=gamma)
+
+ svg_app_file_name = ""
+ if cfg.save.output:
+ filename = os.path.join(
+ cfg.experiment_dir, "output-svg", "{}.svg".format(pathn_record_str))
+ check_and_create_dir(filename)
+ pydiffvg.save_svg(filename, w, h, shapes_record, shape_groups_record)
+ svg_app_file_name = filename
+
+ loss_matrix.append(loss_list)
+
+ # calculate the pixel loss
+ # pixel_loss = ((x-gt)**2).sum(dim=1, keepdim=True).sqrt_() # [N,1,H, W]
+ # region_loss = adaptive_avg_pool2d(pixel_loss, cfg.region_loss_pool_size)
+ # loss_weight = torch.softmax(region_loss.reshape(1, 1, -1), dim=-1)\
+ # .reshape_as(region_loss)
+
+ pos_init_method = naive_coord_init(x, gt)
+
+ if cfg.coord_init.type == 'naive':
+ pos_init_method = naive_coord_init(x, gt)
+ elif cfg.coord_init.type == 'sparse':
+ pos_init_method = sparse_coord_init(x, gt)
+ elif cfg.coord_init.type == 'random':
+ pos_init_method = random_coord_init([h, w])
+ else:
+ raise ValueError
+
+ if cfg.save.video:
+ print("saving iteration video...")
+ img_array = []
+ for ii in range(0, cfg.num_iter):
+ filename = os.path.join(
+ cfg.experiment_dir, "video-png",
+ "{}-iter{}.png".format(pathn_record_str, ii))
+ img = cv2.imread(filename)
+ # cv2.putText(
+ # img, "Path:{} \nIteration:{}".format(pathn_record_str, ii),
+ # (10, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1)
+ img_array.append(img)
+
+ videoname = os.path.join(
+ cfg.experiment_dir, "video-avi",
+ "{}.avi".format(pathn_record_str))
+ check_and_create_dir(videoname)
+ out = cv2.VideoWriter(
+ videoname,
+ # cv2.VideoWriter_fourcc(*'mp4v'),
+ cv2.VideoWriter_fourcc(*'FFV1'),
+ 20.0, (w, h))
+ for iii in range(len(img_array)):
+ out.write(img_array[iii])
+ out.release()
+ # shutil.rmtree(os.path.join(cfg.experiment_dir, "video-png"))
+
+ print("The last loss is: {}".format(loss.item()))
+ return img.detach().cpu().numpy(), svg_app_file_name
+
+
+if __name__ == "__main__":
+
+ ###############
+ # make config #
+ ###############
+
+ cfg_arg = parse_args()
+ with open(cfg_arg.config, 'r') as f:
+ cfg = yaml.load(f, Loader=yaml.FullLoader)
+ cfg_default = edict(cfg['default'])
+ cfg = edict(cfg[cfg_arg.experiment])
+ cfg.update(cfg_default)
+ cfg.update(cfg_arg)
+ cfg.exid = get_experiment_id(cfg.debug)
+
+ cfg.experiment_dir = \
+ osp.join(cfg.log_dir, '{}_{}'.format(cfg.exid, '_'.join(cfg.signature)))
+ configfile = osp.join(cfg.experiment_dir, 'config.yaml')
+ check_and_create_dir(configfile)
+ with open(osp.join(configfile), 'w') as f:
+ yaml.dump(edict_2_dict(cfg), f)
+
+ # Use GPU if available
+ pydiffvg.set_use_gpu(torch.cuda.is_available())
+ device = pydiffvg.get_device()
+
+ gt = np.array(PIL.Image.open(cfg.target))
+ print(f"Input image shape is: {gt.shape}")
+ if len(gt.shape) == 2:
+ print("Converting the gray-scale image to RGB.")
+ gt = gt.unsqueeze(dim=-1).repeat(1,1,3)
+ if gt.shape[2] == 4:
+ print("Input image includes alpha channel, simply dropout alpha channel.")
+ gt = gt[:, :, :3]
+ gt = (gt/255).astype(np.float32)
+ gt = torch.FloatTensor(gt).permute(2, 0, 1)[None].to(device)
+ if cfg.use_ycrcb:
+ gt = ycrcb_conversion(gt)
+ h, w = gt.shape[2:]
+
+ path_schedule = get_path_schedule(**cfg.path_schedule)
+
+ if cfg.seed is not None:
+ random.seed(cfg.seed)
+ npr.seed(cfg.seed)
+ torch.manual_seed(cfg.seed)
+ render = pydiffvg.RenderFunction.apply
+
+ shapes_record, shape_groups_record = [], []
+
+ region_loss = None
+ loss_matrix = []
+
+ para_point, para_color = {}, {}
+ if cfg.trainable.stroke:
+ para_stroke_width, para_stroke_color = {}, {}
+
+ pathn_record = []
+ # Background
+ if cfg.trainable.bg:
+ # meancolor = gt.mean([2, 3])[0]
+ para_bg = torch.tensor([1., 1., 1.], requires_grad=True, device=device)
+ else:
+ if cfg.use_ycrcb:
+ para_bg = torch.tensor([219/255, 0, 0], requires_grad=False, device=device)
+ else:
+ para_bg = torch.tensor([1., 1., 1.], requires_grad=False, device=device)
+
+ ##################
+ # start_training #
+ ##################
+
+ loss_weight = None
+ loss_weight_keep = 0
+ if cfg.coord_init.type == 'naive':
+ pos_init_method = naive_coord_init(
+ para_bg.view(1, -1, 1, 1).repeat(1, 1, h, w), gt)
+ elif cfg.coord_init.type == 'sparse':
+ pos_init_method = sparse_coord_init(
+ para_bg.view(1, -1, 1, 1).repeat(1, 1, h, w), gt)
+ elif cfg.coord_init.type == 'random':
+ pos_init_method = random_coord_init([h, w])
+ else:
+ raise ValueError
+
+ lrlambda_f = linear_decay_lrlambda_f(cfg.num_iter, 0.4)
+ optim_schedular_dict = {}
+
+ for path_idx, pathn in enumerate(path_schedule):
+ loss_list = []
+ print("=> Adding [{}] paths, [{}] ...".format(pathn, cfg.seginit.type))
+ pathn_record.append(pathn)
+ pathn_record_str = '-'.join([str(i) for i in pathn_record])
+
+ # initialize new shapes related stuffs.
+ if cfg.trainable.stroke:
+ shapes, shape_groups, point_var, color_var, stroke_width_var, stroke_color_var = init_shapes(
+ pathn, cfg.num_segments, (h, w),
+ cfg.seginit, len(shapes_record),
+ pos_init_method,
+ trainable_stroke=True,
+ gt=gt, )
+ para_stroke_width[path_idx] = stroke_width_var
+ para_stroke_color[path_idx] = stroke_color_var
+ else:
+ shapes, shape_groups, point_var, color_var = init_shapes(
+ pathn, cfg.num_segments, (h, w),
+ cfg.seginit, len(shapes_record),
+ pos_init_method,
+ trainable_stroke=False,
+ gt=gt, )
+
+ shapes_record += shapes
+ shape_groups_record += shape_groups
+
+ if cfg.save.init:
+ filename = os.path.join(
+ cfg.experiment_dir, "svg-init",
+ "{}-init.svg".format(pathn_record_str))
+ check_and_create_dir(filename)
+ pydiffvg.save_svg(
+ filename, w, h,
+ shapes_record, shape_groups_record)
+
+ para = {}
+ if (cfg.trainable.bg) and (path_idx == 0):
+ para['bg'] = [para_bg]
+ para['point'] = point_var
+ para['color'] = color_var
+ if cfg.trainable.stroke:
+ para['stroke_width'] = stroke_width_var
+ para['stroke_color'] = stroke_color_var
+
+ pg = [{'params' : para[ki], 'lr' : cfg.lr_base[ki]} for ki in sorted(para.keys())]
+ optim = torch.optim.Adam(pg)
+
+ if cfg.trainable.record:
+ scheduler = LambdaLR(
+ optim, lr_lambda=lrlambda_f, last_epoch=-1)
+ else:
+ scheduler = LambdaLR(
+ optim, lr_lambda=lrlambda_f, last_epoch=cfg.num_iter)
+ optim_schedular_dict[path_idx] = (optim, scheduler)
+
+ # Inner loop training
+ t_range = tqdm(range(cfg.num_iter))
+ for t in t_range:
+
+ for _, (optim, _) in optim_schedular_dict.items():
+ optim.zero_grad()
+
+ # Forward pass: render the image.
+ scene_args = pydiffvg.RenderFunction.serialize_scene(
+ w, h, shapes_record, shape_groups_record)
+ img = render(w, h, 2, 2, t, None, *scene_args)
+
+ # Compose img with white background
+ img = img[:, :, 3:4] * img[:, :, :3] + \
+ para_bg * (1 - img[:, :, 3:4])
+
+ if cfg.save.video:
+ filename = os.path.join(
+ cfg.experiment_dir, "video-png",
+ "{}-iter{}.png".format(pathn_record_str, t))
+ check_and_create_dir(filename)
+ if cfg.use_ycrcb:
+ imshow = ycrcb_conversion(
+ img, format='[2D x 3]', reverse=True).detach().cpu()
+ else:
+ imshow = img.detach().cpu()
+ pydiffvg.imwrite(imshow, filename, gamma=gamma)
+
+ x = img.unsqueeze(0).permute(0, 3, 1, 2) # HWC -> NCHW
+
+ if cfg.use_ycrcb:
+ color_reweight = torch.FloatTensor([255/219, 255/224, 255/255]).to(device)
+ loss = ((x-gt)*(color_reweight.view(1, -1, 1, 1)))**2
+ else:
+ loss = ((x-gt)**2)
+
+ if cfg.loss.use_l1_loss:
+ loss = abs(x-gt)
+
+ if cfg.loss.use_distance_weighted_loss:
+ if cfg.use_ycrcb:
+ raise ValueError
+ shapes_forsdf = copy.deepcopy(shapes)
+ shape_groups_forsdf = copy.deepcopy(shape_groups)
+ for si in shapes_forsdf:
+ si.stroke_width = torch.FloatTensor([0]).to(device)
+ for sg_idx, sgi in enumerate(shape_groups_forsdf):
+ sgi.fill_color = torch.FloatTensor([1, 1, 1, 1]).to(device)
+ sgi.shape_ids = torch.LongTensor([sg_idx]).to(device)
+
+ sargs_forsdf = pydiffvg.RenderFunction.serialize_scene(
+ w, h, shapes_forsdf, shape_groups_forsdf)
+ with torch.no_grad():
+ im_forsdf = render(w, h, 2, 2, 0, None, *sargs_forsdf)
+ # use alpha channel is a trick to get 0-1 image
+ im_forsdf = (im_forsdf[:, :, 3]).detach().cpu().numpy()
+ loss_weight = get_sdf(im_forsdf, normalize='to1')
+ loss_weight += loss_weight_keep
+ loss_weight = np.clip(loss_weight, 0, 1)
+ loss_weight = torch.FloatTensor(loss_weight).to(device)
+
+ if cfg.save.loss:
+ save_loss = loss.squeeze(dim=0).mean(dim=0,keepdim=False).cpu().detach().numpy()
+ save_weight = loss_weight.cpu().detach().numpy()
+ save_weighted_loss = save_loss*save_weight
+ # normalize to [0,1]
+ save_loss = (save_loss - np.min(save_loss))/np.ptp(save_loss)
+ save_weight = (save_weight - np.min(save_weight))/np.ptp(save_weight)
+ save_weighted_loss = (save_weighted_loss - np.min(save_weighted_loss))/np.ptp(save_weighted_loss)
+
+ # save
+ plt.imshow(save_loss, cmap='Reds')
+ plt.axis('off')
+ # plt.colorbar()
+ filename = os.path.join(cfg.experiment_dir, "loss", "{}-iter{}-mseloss.png".format(pathn_record_str, t))
+ check_and_create_dir(filename)
+ plt.savefig(filename, dpi=800)
+ plt.close()
+
+ plt.imshow(save_weight, cmap='Greys')
+ plt.axis('off')
+ # plt.colorbar()
+ filename = os.path.join(cfg.experiment_dir, "loss", "{}-iter{}-sdfweight.png".format(pathn_record_str, t))
+ plt.savefig(filename, dpi=800)
+ plt.close()
+
+ plt.imshow(save_weighted_loss, cmap='Reds')
+ plt.axis('off')
+ # plt.colorbar()
+ filename = os.path.join(cfg.experiment_dir, "loss", "{}-iter{}-weightedloss.png".format(pathn_record_str, t))
+ plt.savefig(filename, dpi=800)
+ plt.close()
+
+
+
+
+
+ if loss_weight is None:
+ loss = loss.sum(1).mean()
+ else:
+ loss = (loss.sum(1)*loss_weight).mean()
+
+ # if (cfg.loss.bis_loss_weight is not None) and (cfg.loss.bis_loss_weight > 0):
+ # loss_bis = bezier_intersection_loss(point_var[0]) * cfg.loss.bis_loss_weight
+ # loss = loss + loss_bis
+ if (cfg.loss.xing_loss_weight is not None) \
+ and (cfg.loss.xing_loss_weight > 0):
+ loss_xing = xing_loss(point_var) * cfg.loss.xing_loss_weight
+ loss = loss + loss_xing
+
+
+ loss_list.append(loss.item())
+ t_range.set_postfix({'loss': loss.item()})
+ loss.backward()
+
+ # step
+ for _, (optim, scheduler) in optim_schedular_dict.items():
+ optim.step()
+ scheduler.step()
+
+ for group in shape_groups_record:
+ group.fill_color.data.clamp_(0.0, 1.0)
+
+ if cfg.loss.use_distance_weighted_loss:
+ loss_weight_keep = loss_weight.detach().cpu().numpy() * 1
+
+ if not cfg.trainable.record:
+ for _, pi in pg.items():
+ for ppi in pi:
+ pi.require_grad = False
+ optim_schedular_dict = {}
+
+ if cfg.save.image:
+ filename = os.path.join(
+ cfg.experiment_dir, "demo-png", "{}.png".format(pathn_record_str))
+ check_and_create_dir(filename)
+ if cfg.use_ycrcb:
+ imshow = ycrcb_conversion(
+ img, format='[2D x 3]', reverse=True).detach().cpu()
+ else:
+ imshow = img.detach().cpu()
+ pydiffvg.imwrite(imshow, filename, gamma=gamma)
+
+ if cfg.save.output:
+ filename = os.path.join(
+ cfg.experiment_dir, "output-svg", "{}.svg".format(pathn_record_str))
+ check_and_create_dir(filename)
+ pydiffvg.save_svg(filename, w, h, shapes_record, shape_groups_record)
+
+ loss_matrix.append(loss_list)
+
+ # calculate the pixel loss
+ # pixel_loss = ((x-gt)**2).sum(dim=1, keepdim=True).sqrt_() # [N,1,H, W]
+ # region_loss = adaptive_avg_pool2d(pixel_loss, cfg.region_loss_pool_size)
+ # loss_weight = torch.softmax(region_loss.reshape(1, 1, -1), dim=-1)\
+ # .reshape_as(region_loss)
+
+ pos_init_method = naive_coord_init(x, gt)
+
+ if cfg.coord_init.type == 'naive':
+ pos_init_method = naive_coord_init(x, gt)
+ elif cfg.coord_init.type == 'sparse':
+ pos_init_method = sparse_coord_init(x, gt)
+ elif cfg.coord_init.type == 'random':
+ pos_init_method = random_coord_init([h, w])
+ else:
+ raise ValueError
+
+ if cfg.save.video:
+ print("saving iteration video...")
+ img_array = []
+ for ii in range(0, cfg.num_iter):
+ filename = os.path.join(
+ cfg.experiment_dir, "video-png",
+ "{}-iter{}.png".format(pathn_record_str, ii))
+ img = cv2.imread(filename)
+ # cv2.putText(
+ # img, "Path:{} \nIteration:{}".format(pathn_record_str, ii),
+ # (10, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1)
+ img_array.append(img)
+
+ videoname = os.path.join(
+ cfg.experiment_dir, "video-avi",
+ "{}.avi".format(pathn_record_str))
+ check_and_create_dir(videoname)
+ out = cv2.VideoWriter(
+ videoname,
+ # cv2.VideoWriter_fourcc(*'mp4v'),
+ cv2.VideoWriter_fourcc(*'FFV1'),
+ 20.0, (w, h))
+ for iii in range(len(img_array)):
+ out.write(img_array[iii])
+ out.release()
+ # shutil.rmtree(os.path.join(cfg.experiment_dir, "video-png"))
+
+ print("The last loss is: {}".format(loss.item()))
diff --git a/matrix.h b/matrix.h
new file mode 100644
index 0000000000000000000000000000000000000000..b53f484e2abf613c6d0c1b36890a332d778f24b5
--- /dev/null
+++ b/matrix.h
@@ -0,0 +1,544 @@
+#pragma once
+
+#include "diffvg.h"
+#include "vector.h"
+#include
+
+template
+struct TMatrix3x3 {
+ DEVICE
+ TMatrix3x3() {
+ for (int i = 0; i < 3; i++) {
+ for (int j = 0; j < 3; j++) {
+ data[i][j] = T(0);
+ }
+ }
+ }
+
+ template
+ DEVICE
+ TMatrix3x3(T2 *arr) {
+ data[0][0] = arr[0];
+ data[0][1] = arr[1];
+ data[0][2] = arr[2];
+ data[1][0] = arr[3];
+ data[1][1] = arr[4];
+ data[1][2] = arr[5];
+ data[2][0] = arr[6];
+ data[2][1] = arr[7];
+ data[2][2] = arr[8];
+ }
+ DEVICE
+ TMatrix3x3(T v00, T v01, T v02,
+ T v10, T v11, T v12,
+ T v20, T v21, T v22) {
+ data[0][0] = v00;
+ data[0][1] = v01;
+ data[0][2] = v02;
+ data[1][0] = v10;
+ data[1][1] = v11;
+ data[1][2] = v12;
+ data[2][0] = v20;
+ data[2][1] = v21;
+ data[2][2] = v22;
+ }
+
+ DEVICE
+ const T& operator()(int i, int j) const {
+ return data[i][j];
+ }
+ DEVICE
+ T& operator()(int i, int j) {
+ return data[i][j];
+ }
+ DEVICE
+ static TMatrix3x3 identity() {
+ TMatrix3x3 m(1, 0, 0,
+ 0, 1, 0,
+ 0, 0, 1);
+ return m;
+ }
+
+ T data[3][3];
+};
+
+using Matrix3x3 = TMatrix3x3;
+using Matrix3x3f = TMatrix3x3;
+
+template
+struct TMatrix4x4 {
+ DEVICE TMatrix4x4() {
+ for (int i = 0; i < 4; i++) {
+ for (int j = 0; j < 4; j++) {
+ data[i][j] = T(0);
+ }
+ }
+ }
+
+ template
+ DEVICE TMatrix4x4(const T2 *arr) {
+ for (int i = 0; i < 4; i++) {
+ for (int j = 0; j < 4; j++) {
+ data[i][j] = (T)arr[i * 4 + j];
+ }
+ }
+ }
+
+ template
+ DEVICE TMatrix4x4(const TMatrix4x4 &m) {
+ for (int i = 0; i < 4; i++) {
+ for (int j = 0; j < 4; j++) {
+ data[i][j] = T(m.data[i][j]);
+ }
+ }
+ }
+
+ template
+ DEVICE TMatrix4x4(T2 v00, T2 v01, T2 v02, T2 v03,
+ T2 v10, T2 v11, T2 v12, T2 v13,
+ T2 v20, T2 v21, T2 v22, T2 v23,
+ T2 v30, T2 v31, T2 v32, T2 v33) {
+ data[0][0] = (T)v00;
+ data[0][1] = (T)v01;
+ data[0][2] = (T)v02;
+ data[0][3] = (T)v03;
+ data[1][0] = (T)v10;
+ data[1][1] = (T)v11;
+ data[1][2] = (T)v12;
+ data[1][3] = (T)v13;
+ data[2][0] = (T)v20;
+ data[2][1] = (T)v21;
+ data[2][2] = (T)v22;
+ data[2][3] = (T)v23;
+ data[3][0] = (T)v30;
+ data[3][1] = (T)v31;
+ data[3][2] = (T)v32;
+ data[3][3] = (T)v33;
+ }
+
+ DEVICE
+ const T& operator()(int i, int j) const {
+ return data[i][j];
+ }
+
+ DEVICE
+ T& operator()(int i, int j) {
+ return data[i][j];
+ }
+
+ DEVICE
+ static TMatrix4x4 identity() {
+ TMatrix4x4 m(1, 0, 0, 0,
+ 0, 1, 0, 0,
+ 0, 0, 1, 0,
+ 0, 0, 0, 1);
+ return m;
+ }
+
+ T data[4][4];
+};
+
+using Matrix4x4 = TMatrix4x4;
+using Matrix4x4f = TMatrix4x4;
+
+template
+DEVICE
+inline auto operator+(const TMatrix3x3 &m0, const TMatrix3x3 &m1) -> TMatrix3x3 {
+ TMatrix3x3 m;
+ for (int i = 0; i < 3; i++) {
+ for (int j = 0; j < 3; j++) {
+ m(i, j) = m0(i, j) + m1(i, j);
+ }
+ }
+ return m;
+}
+
+template
+DEVICE
+inline auto operator-(const TMatrix3x3 &m0, const TMatrix3x3 &m1) -> TMatrix3x3 {
+ TMatrix3x3 m;
+ for (int i = 0; i < 3; i++) {
+ for (int j = 0; j < 3; j++) {
+ m(i, j) = m0(i, j) - m1(i, j);
+ }
+ }
+ return m;
+}
+
+template
+DEVICE
+inline auto operator*(const TMatrix3x3 &m0, const TMatrix3x3 &m1) -> TMatrix3x3 {
+ TMatrix3x3 ret;
+ for (int i = 0; i < 3; i++) {
+ for (int j = 0; j < 3; j++) {
+ ret(i, j) = T(0);
+ for (int k = 0; k < 3; k++) {
+ ret(i, j) += m0(i, k) * m1(k, j);
+ }
+ }
+ }
+ return ret;
+}
+
+template
+DEVICE
+inline auto operator*(const TVector3 &v, const TMatrix3x3 &m) -> TVector3 {
+ TVector3 ret;
+ for (int i = 0; i < 3; i++) {
+ ret[i] = T(0);
+ for (int j = 0; j < 3; j++) {
+ ret[i] += v[j] * m(j, i);
+ }
+ }
+ return ret;
+}
+
+template
+DEVICE
+inline auto operator*(const TMatrix3x3 &m, const TVector3 &v) -> TVector3 {
+ TVector3 ret;
+ for (int i = 0; i < 3; i++) {
+ ret[i] = 0.f;
+ for (int j = 0; j < 3; j++) {
+ ret[i] += m(i, j) * v[j];
+ }
+ }
+ return ret;
+}
+
+template
+DEVICE
+inline auto inverse(const TMatrix3x3 &m) -> TMatrix3x3 {
+ // computes the inverse of a matrix m
+ auto det = m(0, 0) * (m(1, 1) * m(2, 2) - m(2, 1) * m(1, 2)) -
+ m(0, 1) * (m(1, 0) * m(2, 2) - m(1, 2) * m(2, 0)) +
+ m(0, 2) * (m(1, 0) * m(2, 1) - m(1, 1) * m(2, 0));
+
+ auto invdet = 1 / det;
+
+ auto m_inv = TMatrix3x3{};
+ m_inv(0, 0) = (m(1, 1) * m(2, 2) - m(2, 1) * m(1, 2)) * invdet;
+ m_inv(0, 1) = (m(0, 2) * m(2, 1) - m(0, 1) * m(2, 2)) * invdet;
+ m_inv(0, 2) = (m(0, 1) * m(1, 2) - m(0, 2) * m(1, 1)) * invdet;
+ m_inv(1, 0) = (m(1, 2) * m(2, 0) - m(1, 0) * m(2, 2)) * invdet;
+ m_inv(1, 1) = (m(0, 0) * m(2, 2) - m(0, 2) * m(2, 0)) * invdet;
+ m_inv(1, 2) = (m(1, 0) * m(0, 2) - m(0, 0) * m(1, 2)) * invdet;
+ m_inv(2, 0) = (m(1, 0) * m(2, 1) - m(2, 0) * m(1, 1)) * invdet;
+ m_inv(2, 1) = (m(2, 0) * m(0, 1) - m(0, 0) * m(2, 1)) * invdet;
+ m_inv(2, 2) = (m(0, 0) * m(1, 1) - m(1, 0) * m(0, 1)) * invdet;
+ return m_inv;
+}
+
+template
+DEVICE
+inline auto operator+(const TMatrix4x4 &m0, const TMatrix4x4 &m1) -> TMatrix4x4 {
+ TMatrix4x4 m;
+ for (int i = 0; i < 4; i++) {
+ for (int j = 0; j < 4; j++) {
+ m(i, j) = m0(i, j) + m1(i, j);
+ }
+ }
+ return m;
+}
+
+template
+DEVICE
+TMatrix3x3 transpose(const TMatrix3x3 &m) {
+ return TMatrix3x3(m(0, 0), m(1, 0), m(2, 0),
+ m(0, 1), m(1, 1), m(2, 1),
+ m(0, 2), m(1, 2), m(2, 2));
+}
+
+template
+DEVICE
+TMatrix4x4 transpose(const TMatrix4x4 &m) {
+ return TMatrix4x4(m(0, 0), m(1, 0), m(2, 0), m(3, 0),
+ m(0, 1), m(1, 1), m(2, 1), m(3, 1),
+ m(0, 2), m(1, 2), m(2, 2), m(3, 2),
+ m(0, 3), m(1, 3), m(2, 3), m(3, 3));
+}
+
+template
+DEVICE
+inline TMatrix3x3 operator-(const TMatrix3x3 &m0) {
+ TMatrix3x3 m;
+ for (int i = 0; i < 3; i++) {
+ for (int j = 0; j < 3; j++) {
+ m(i, j) = -m0(i, j);
+ }
+ }
+ return m;
+}
+
+template
+DEVICE
+inline TMatrix4x4 operator-(const TMatrix4x4 &m0) {
+ TMatrix4x4 m;
+ for (int i = 0; i < 4; i++) {
+ for (int j = 0; j < 4; j++) {
+ m(i, j) = -m0(i, j);
+ }
+ }
+ return m;
+}
+
+template
+DEVICE
+inline TMatrix4x4 operator-(const TMatrix4x4 &m0, const TMatrix4x4 &m1) {
+ TMatrix4x4 m;
+ for (int i = 0; i < 4; i++) {
+ for (int j = 0; j < 4; j++) {
+ m(i, j) = m0(i, j) - m1(i, j);
+ }
+ }
+ return m;
+}
+
+template
+DEVICE
+inline TMatrix3x3& operator+=(TMatrix3x3 &m0, const TMatrix3x3 &m1) {
+ for (int i = 0; i < 3; i++) {
+ for (int j = 0; j < 3; j++) {
+ m0(i, j) += m1(i, j);
+ }
+ }
+ return m0;
+}
+
+template
+DEVICE
+inline TMatrix4x4& operator+=(TMatrix4x4 &m0, const TMatrix4x4 &m1) {
+ for (int i = 0; i < 4; i++) {
+ for (int j = 0; j < 4; j++) {
+ m0(i, j) += m1(i, j);
+ }
+ }
+ return m0;
+}
+
+template
+DEVICE
+inline TMatrix4x4& operator-=(TMatrix4x4 &m0, const TMatrix4x4 &m1) {
+ for (int i = 0; i < 4; i++) {
+ for (int j = 0; j < 4; j++) {
+ m0(i, j) -= m1(i, j);
+ }
+ }
+ return m0;
+}
+
+template
+DEVICE
+inline TMatrix4x4 operator*(const TMatrix4x4 &m0, const TMatrix4x4 &m1) {
+ TMatrix4x4 m;
+ for (int i = 0; i < 4; i++) {
+ for (int j = 0; j < 4; j++) {
+ for (int k = 0; k < 4; k++) {
+ m(i, j) += m0(i, k) * m1(k, j);
+ }
+ }
+ }
+ return m;
+}
+
+template
+DEVICE
+TMatrix4x4 inverse(const TMatrix4x4 &m) {
+ // https://stackoverflow.com/questions/1148309/inverting-a-4x4-matrix
+ TMatrix4x4 inv;
+
+ inv(0, 0) = m(1, 1) * m(2, 2) * m(3, 3) -
+ m(1, 1) * m(2, 3) * m(3, 2) -
+ m(2, 1) * m(1, 2) * m(3, 3) +
+ m(2, 1) * m(1, 3) * m(3, 2) +
+ m(3, 1) * m(1, 2) * m(2, 3) -
+ m(3, 1) * m(1, 3) * m(2, 2);
+
+ inv(1, 0) = -m(1, 0) * m(2, 2) * m(3, 3) +
+ m(1, 0) * m(2, 3) * m(3, 2) +
+ m(2, 0) * m(1, 2) * m(3, 3) -
+ m(2, 0) * m(1, 3) * m(3, 2) -
+ m(3, 0) * m(1, 2) * m(2, 3) +
+ m(3, 0) * m(1, 3) * m(2, 2);
+
+ inv(2, 0) = m(1, 0) * m(2, 1) * m(3, 3) -
+ m(1, 0) * m(2, 3) * m(3, 1) -
+ m(2, 0) * m(1, 1) * m(3, 3) +
+ m(2, 0) * m(1, 3) * m(3, 1) +
+ m(3, 0) * m(1, 1) * m(2, 3) -
+ m(3, 0) * m(1, 3) * m(2, 1);
+
+ inv(3, 0) = -m(1, 0) * m(2, 1) * m(3, 2) +
+ m(1, 0) * m(2, 2) * m(3, 1) +
+ m(2, 0) * m(1, 1) * m(3, 2) -
+ m(2, 0) * m(1, 2) * m(3, 1) -
+ m(3, 0) * m(1, 1) * m(2, 2) +
+ m(3, 0) * m(1, 2) * m(2, 1);
+
+ inv(0, 1) = -m(0, 1) * m(2, 2) * m(3, 3) +
+ m(0, 1) * m(2, 3) * m(3, 2) +
+ m(2, 1) * m(0, 2) * m(3, 3) -
+ m(2, 1) * m(0, 3) * m(3, 2) -
+ m(3, 1) * m(0, 2) * m(2, 3) +
+ m(3, 1) * m(0, 3) * m(2, 2);
+
+ inv(1, 1) = m(0, 0) * m(2, 2) * m(3, 3) -
+ m(0, 0) * m(2, 3) * m(3, 2) -
+ m(2, 0) * m(0, 2) * m(3, 3) +
+ m(2, 0) * m(0, 3) * m(3, 2) +
+ m(3, 0) * m(0, 2) * m(2, 3) -
+ m(3, 0) * m(0, 3) * m(2, 2);
+
+ inv(2, 1) = -m(0, 0) * m(2, 1) * m(3, 3) +
+ m(0, 0) * m(2, 3) * m(3, 1) +
+ m(2, 0) * m(0, 1) * m(3, 3) -
+ m(2, 0) * m(0, 3) * m(3, 1) -
+ m(3, 0) * m(0, 1) * m(2, 3) +
+ m(3, 0) * m(0, 3) * m(2, 1);
+
+ inv(3, 1) = m(0, 0) * m(2, 1) * m(3, 2) -
+ m(0, 0) * m(2, 2) * m(3, 1) -
+ m(2, 0) * m(0, 1) * m(3, 2) +
+ m(2, 0) * m(0, 2) * m(3, 1) +
+ m(3, 0) * m(0, 1) * m(2, 2) -
+ m(3, 0) * m(0, 2) * m(2, 1);
+
+ inv(0, 2) = m(0, 1) * m(1, 2) * m(3, 3) -
+ m(0, 1) * m(1, 3) * m(3, 2) -
+ m(1, 1) * m(0, 2) * m(3, 3) +
+ m(1, 1) * m(0, 3) * m(3, 2) +
+ m(3, 1) * m(0, 2) * m(1, 3) -
+ m(3, 1) * m(0, 3) * m(1, 2);
+
+ inv(1, 2) = -m(0, 0) * m(1, 2) * m(3, 3) +
+ m(0, 0) * m(1, 3) * m(3, 2) +
+ m(1, 0) * m(0, 2) * m(3, 3) -
+ m(1, 0) * m(0, 3) * m(3, 2) -
+ m(3, 0) * m(0, 2) * m(1, 3) +
+ m(3, 0) * m(0, 3) * m(1, 2);
+
+ inv(2, 2) = m(0, 0) * m(1, 1) * m(3, 3) -
+ m(0, 0) * m(1, 3) * m(3, 1) -
+ m(1, 0) * m(0, 1) * m(3, 3) +
+ m(1, 0) * m(0, 3) * m(3, 1) +
+ m(3, 0) * m(0, 1) * m(1, 3) -
+ m(3, 0) * m(0, 3) * m(1, 1);
+
+ inv(3, 2) = -m(0, 0) * m(1, 1) * m(3, 2) +
+ m(0, 0) * m(1, 2) * m(3, 1) +
+ m(1, 0) * m(0, 1) * m(3, 2) -
+ m(1, 0) * m(0, 2) * m(3, 1) -
+ m(3, 0) * m(0, 1) * m(1, 2) +
+ m(3, 0) * m(0, 2) * m(1, 1);
+
+ inv(0, 3) = -m(0, 1) * m(1, 2) * m(2, 3) +
+ m(0, 1) * m(1, 3) * m(2, 2) +
+ m(1, 1) * m(0, 2) * m(2, 3) -
+ m(1, 1) * m(0, 3) * m(2, 2) -
+ m(2, 1) * m(0, 2) * m(1, 3) +
+ m(2, 1) * m(0, 3) * m(1, 2);
+
+ inv(1, 3) = m(0, 0) * m(1, 2) * m(2, 3) -
+ m(0, 0) * m(1, 3) * m(2, 2) -
+ m(1, 0) * m(0, 2) * m(2, 3) +
+ m(1, 0) * m(0, 3) * m(2, 2) +
+ m(2, 0) * m(0, 2) * m(1, 3) -
+ m(2, 0) * m(0, 3) * m(1, 2);
+
+ inv(2, 3) = -m(0, 0) * m(1, 1) * m(2, 3) +
+ m(0, 0) * m(1, 3) * m(2, 1) +
+ m(1, 0) * m(0, 1) * m(2, 3) -
+ m(1, 0) * m(0, 3) * m(2, 1) -
+ m(2, 0) * m(0, 1) * m(1, 3) +
+ m(2, 0) * m(0, 3) * m(1, 1);
+
+ inv(3, 3) = m(0, 0) * m(1, 1) * m(2, 2) -
+ m(0, 0) * m(1, 2) * m(2, 1) -
+ m(1, 0) * m(0, 1) * m(2, 2) +
+ m(1, 0) * m(0, 2) * m(2, 1) +
+ m(2, 0) * m(0, 1) * m(1, 2) -
+ m(2, 0) * m(0, 2) * m(1, 1);
+
+ auto det = m(0, 0) * inv(0, 0) +
+ m(0, 1) * inv(1, 0) +
+ m(0, 2) * inv(2, 0) +
+ m(0, 3) * inv(3, 0);
+
+ if (det == 0) {
+ return TMatrix4x4{};
+ }
+
+ auto inv_det = 1.0 / det;
+
+ for (int i = 0; i < 4; i++) {
+ for (int j = 0; j < 4; j++) {
+ inv(i, j) *= inv_det;
+ }
+ }
+
+ return inv;
+}
+
+template
+inline std::ostream& operator<<(std::ostream &os, const TMatrix3x3 &m) {
+ for (int i = 0; i < 3; i++) {
+ for (int j = 0; j < 3; j++) {
+ os << m(i, j) << " ";
+ }
+ os << std::endl;
+ }
+ return os;
+}
+
+template
+inline std::ostream& operator<<(std::ostream &os, const TMatrix4x4 &m) {
+ for (int i = 0; i < 4; i++) {
+ for (int j = 0; j < 4; j++) {
+ os << m(i, j) << " ";
+ }
+ os << std::endl;
+ }
+ return os;
+}
+
+template
+DEVICE
+TVector2 xform_pt(const TMatrix3x3 &m, const TVector2 &pt) {
+ TVector3 t{m(0, 0) * pt[0] + m(0, 1) * pt[1] + m(0, 2),
+ m(1, 0) * pt[0] + m(1, 1) * pt[1] + m(1, 2),
+ m(2, 0) * pt[0] + m(2, 1) * pt[1] + m(2, 2)};
+ return TVector2{t[0] / t[2], t[1] / t[2]};
+}
+
+template
+DEVICE
+void d_xform_pt(const TMatrix3x3 &m, const TVector2 &pt,
+ const TVector2 &d_out,
+ TMatrix3x3 &d_m,
+ TVector2 &d_pt) {
+ TVector3 t{m(0, 0) * pt[0] + m(0, 1) * pt[1] + m(0, 2),
+ m(1, 0) * pt[0] + m(1, 1) * pt[1] + m(1, 2),
+ m(2, 0) * pt[0] + m(2, 1) * pt[1] + m(2, 2)};
+ auto out = TVector2{t[0] / t[2], t[1] / t[2]};
+ TVector3 d_t{d_out[0] / t[2],
+ d_out[1] / t[2],
+ -(d_out[0] * out[0] + d_out[1] * out[1]) / t[2]};
+ d_m(0, 0) += d_t[0] * pt[0];
+ d_m(0, 1) += d_t[0] * pt[1];
+ d_m(0, 2) += d_t[0];
+ d_m(1, 0) += d_t[1] * pt[0];
+ d_m(1, 1) += d_t[1] * pt[1];
+ d_m(1, 2) += d_t[1];
+ d_m(2, 0) += d_t[2] * pt[0];
+ d_m(2, 1) += d_t[2] * pt[1];
+ d_m(2, 2) += d_t[2];
+ d_pt[0] += d_t[0] * m(0, 0) + d_t[1] * m(1, 0) + d_t[2] * m(2, 0);
+ d_pt[1] += d_t[0] * m(0, 1) + d_t[1] * m(1, 1) + d_t[2] * m(2, 1);
+}
+
+template
+DEVICE
+TVector2 xform_normal(const TMatrix3x3 &m_inv, const TVector2 &n) {
+ return normalize(TVector2{m_inv(0, 0) * n[0] + m_inv(1, 0) * n[1],
+ m_inv(0, 1) * n[0] + m_inv(1, 1) * n[1]});
+}
diff --git a/model_config/model_name_p5_all.csv b/model_config/model_name_p5_all.csv
new file mode 100644
index 0000000000000000000000000000000000000000..aafe25f261dd88008d5cc3b746778b7aa77156c9
--- /dev/null
+++ b/model_config/model_name_p5_all.csv
@@ -0,0 +1,5 @@
+yolov5n
+yolov5s
+yolov5m
+yolov5l
+yolov5x
\ No newline at end of file
diff --git a/model_config/model_name_p5_all.yaml b/model_config/model_name_p5_all.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b178207fb72c5c489d74eec1c45edd1c084b722f
--- /dev/null
+++ b/model_config/model_name_p5_all.yaml
@@ -0,0 +1 @@
+model_names: ["yolov5n", "yolov5s", "yolov5m", "yolov5l", "yolov5x"]
diff --git a/model_config/model_name_p5_n.csv b/model_config/model_name_p5_n.csv
new file mode 100644
index 0000000000000000000000000000000000000000..f13d40609e8f329c832630dd0a81a4fc56a99f8d
--- /dev/null
+++ b/model_config/model_name_p5_n.csv
@@ -0,0 +1 @@
+yolov5n
\ No newline at end of file
diff --git a/model_config/model_name_p5_n.yaml b/model_config/model_name_p5_n.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fa53bf5e8b0c78c9f9d6f797a663a4fb77bd6c5f
--- /dev/null
+++ b/model_config/model_name_p5_n.yaml
@@ -0,0 +1 @@
+model_names: ["yolov5n"]
\ No newline at end of file
diff --git a/model_config/model_name_p6_all.csv b/model_config/model_name_p6_all.csv
new file mode 100644
index 0000000000000000000000000000000000000000..1de274571ba45177344aab6bcfb97d46dda4b836
--- /dev/null
+++ b/model_config/model_name_p6_all.csv
@@ -0,0 +1,5 @@
+yolov5n6
+yolov5s6
+yolov5m6
+yolov5l6
+yolov5x6
\ No newline at end of file
diff --git a/model_config/model_name_p6_all.yaml b/model_config/model_name_p6_all.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f3d4f1764da69d93442e37ee696c873e41a5ebe1
--- /dev/null
+++ b/model_config/model_name_p6_all.yaml
@@ -0,0 +1 @@
+model_names: ["yolov5n6", "yolov5s6", "yolov5m6", "yolov5l6", "yolov5x6"]
\ No newline at end of file
diff --git a/model_download/yolov5_model_p5_all.sh b/model_download/yolov5_model_p5_all.sh
new file mode 100644
index 0000000000000000000000000000000000000000..a8e11f6c73445e2e7855d7b62c2b8ebbb7236e9d
--- /dev/null
+++ b/model_download/yolov5_model_p5_all.sh
@@ -0,0 +1,8 @@
+cd ./yolov5
+
+# 下载YOLOv5模型
+wget -c -t 0 https://github.com/ultralytics/yolov5/releases/download/v6.1/yolov5n.pt
+wget -c -t 0 https://github.com/ultralytics/yolov5/releases/download/v6.1/yolov5s.pt
+wget -c -t 0 https://github.com/ultralytics/yolov5/releases/download/v6.1/yolov5m.pt
+wget -c -t 0 https://github.com/ultralytics/yolov5/releases/download/v6.1/yolov5l.pt
+wget -c -t 0 https://github.com/ultralytics/yolov5/releases/download/v6.1/yolov5x.pt
\ No newline at end of file
diff --git a/model_download/yolov5_model_p5_n.sh b/model_download/yolov5_model_p5_n.sh
new file mode 100644
index 0000000000000000000000000000000000000000..2ff8cd2505a95c9f6469c47c3c890681f4df9ebe
--- /dev/null
+++ b/model_download/yolov5_model_p5_n.sh
@@ -0,0 +1,4 @@
+cd ./yolov5
+
+# 下载YOLOv5模型
+wget -c -t 0 https://github.com/ultralytics/yolov5/releases/download/v6.1/yolov5n.pt
\ No newline at end of file
diff --git a/model_download/yolov5_model_p6_all.sh b/model_download/yolov5_model_p6_all.sh
new file mode 100644
index 0000000000000000000000000000000000000000..dfe8d9014e46cf8f7df244095d0115df55e0a209
--- /dev/null
+++ b/model_download/yolov5_model_p6_all.sh
@@ -0,0 +1,8 @@
+cd ./yolov5
+
+# 下载YOLOv5模型
+wget -c -t 0 https://github.com/ultralytics/yolov5/releases/download/v6.1/yolov5n6.pt
+wget -c -t 0 https://github.com/ultralytics/yolov5/releases/download/v6.1/yolov5s6.pt
+wget -c -t 0 https://github.com/ultralytics/yolov5/releases/download/v6.1/yolov5m6.pt
+wget -c -t 0 https://github.com/ultralytics/yolov5/releases/download/v6.1/yolov5l6.pt
+wget -c -t 0 https://github.com/ultralytics/yolov5/releases/download/v6.1/yolov5x6.pt
\ No newline at end of file
diff --git a/packages.txt b/packages.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f359f073a1f1a020ae08d923becf10ca1e4afb57
--- /dev/null
+++ b/packages.txt
@@ -0,0 +1 @@
+libgl1-mesa-glx
diff --git a/painterly_rendering.py b/painterly_rendering.py
new file mode 100644
index 0000000000000000000000000000000000000000..f08c9fe32927b05f6a99bf53fa30d3ba584b027d
--- /dev/null
+++ b/painterly_rendering.py
@@ -0,0 +1,223 @@
+"""
+Scream: python painterly_rendering.py imgs/scream.jpg --num_paths 2048 --max_width 4.0
+Fallingwater: python painterly_rendering.py imgs/fallingwater.jpg --num_paths 2048 --max_width 4.0
+Fallingwater: python painterly_rendering.py imgs/fallingwater.jpg --num_paths 2048 --max_width 4.0 --use_lpips_loss
+Baboon: python painterly_rendering.py imgs/baboon.png --num_paths 1024 --max_width 4.0 --num_iter 250
+Baboon Lpips: python painterly_rendering.py imgs/baboon.png --num_paths 1024 --max_width 4.0 --num_iter 500 --use_lpips_loss
+smile: python painterly_rendering.py ../LIVE/figures/smile.png --num_paths 5 --use_blob --num_iter 500
+"""
+import pydiffvg
+import torch
+import skimage
+import skimage.io
+import random
+import ttools.modules
+import argparse
+import math
+
+pydiffvg.set_print_timing(True)
+
+gamma = 1.0
+
+def main(args):
+ # Use GPU if available
+ pydiffvg.set_use_gpu(torch.cuda.is_available())
+
+ perception_loss = ttools.modules.LPIPS().to(pydiffvg.get_device())
+
+ #target = torch.from_numpy(skimage.io.imread('imgs/lena.png')).to(torch.float32) / 255.0
+ target = torch.from_numpy(skimage.io.imread(args.target)).to(torch.float32) / 255.0
+ target = target.pow(gamma)
+ target = target.to(pydiffvg.get_device())
+ target = target.unsqueeze(0)
+ target = target.permute(0, 3, 1, 2) # NHWC -> NCHW
+ #target = torch.nn.functional.interpolate(target, size = [256, 256], mode = 'area')
+ canvas_width, canvas_height = target.shape[3], target.shape[2]
+ num_paths = args.num_paths
+ max_width = args.max_width
+
+ random.seed(1234)
+ torch.manual_seed(1234)
+
+ shapes = []
+ shape_groups = []
+ if args.use_blob:
+ for i in range(num_paths):
+ num_segments = random.randint(3, 5)
+ num_control_points = torch.zeros(num_segments, dtype = torch.int32) + 2
+ points = []
+ p0 = (random.random(), random.random())
+ points.append(p0)
+ for j in range(num_segments):
+ radius = 0.05
+ p1 = (p0[0] + radius * (random.random() - 0.5), p0[1] + radius * (random.random() - 0.5))
+ p2 = (p1[0] + radius * (random.random() - 0.5), p1[1] + radius * (random.random() - 0.5))
+ p3 = (p2[0] + radius * (random.random() - 0.5), p2[1] + radius * (random.random() - 0.5))
+ points.append(p1)
+ points.append(p2)
+ if j < num_segments - 1:
+ points.append(p3)
+ p0 = p3
+ points = torch.tensor(points)
+ points[:, 0] *= canvas_width
+ points[:, 1] *= canvas_height
+ path = pydiffvg.Path(num_control_points = num_control_points,
+ points = points,
+ stroke_width = torch.tensor(1.0),
+ is_closed = True)
+ shapes.append(path)
+ path_group = pydiffvg.ShapeGroup(shape_ids = torch.tensor([len(shapes) - 1]),
+ fill_color = torch.tensor([random.random(),
+ random.random(),
+ random.random(),
+ random.random()]))
+ shape_groups.append(path_group)
+ else:
+ for i in range(num_paths):
+ num_segments = random.randint(1, 3)
+ num_control_points = torch.zeros(num_segments, dtype = torch.int32) + 2
+ points = []
+ p0 = (random.random(), random.random())
+ points.append(p0)
+ for j in range(num_segments):
+ radius = 0.05
+ p1 = (p0[0] + radius * (random.random() - 0.5), p0[1] + radius * (random.random() - 0.5))
+ p2 = (p1[0] + radius * (random.random() - 0.5), p1[1] + radius * (random.random() - 0.5))
+ p3 = (p2[0] + radius * (random.random() - 0.5), p2[1] + radius * (random.random() - 0.5))
+ points.append(p1)
+ points.append(p2)
+ points.append(p3)
+ p0 = p3
+ points = torch.tensor(points)
+ points[:, 0] *= canvas_width
+ points[:, 1] *= canvas_height
+ #points = torch.rand(3 * num_segments + 1, 2) * min(canvas_width, canvas_height)
+ path = pydiffvg.Path(num_control_points = num_control_points,
+ points = points,
+ stroke_width = torch.tensor(1.0),
+ is_closed = False)
+ shapes.append(path)
+ path_group = pydiffvg.ShapeGroup(shape_ids = torch.tensor([len(shapes) - 1]),
+ fill_color = None,
+ stroke_color = torch.tensor([random.random(),
+ random.random(),
+ random.random(),
+ random.random()]))
+ shape_groups.append(path_group)
+
+ scene_args = pydiffvg.RenderFunction.serialize_scene(\
+ canvas_width, canvas_height, shapes, shape_groups)
+
+ render = pydiffvg.RenderFunction.apply
+ img = render(canvas_width, # width
+ canvas_height, # height
+ 2, # num_samples_x
+ 2, # num_samples_y
+ 0, # seed
+ None,
+ *scene_args)
+ pydiffvg.imwrite(img.cpu(), 'results/painterly_rendering/init.png', gamma=gamma)
+
+ points_vars = []
+ stroke_width_vars = []
+ color_vars = []
+ for path in shapes:
+ path.points.requires_grad = True
+ points_vars.append(path.points)
+ if not args.use_blob:
+ for path in shapes:
+ path.stroke_width.requires_grad = True
+ stroke_width_vars.append(path.stroke_width)
+ if args.use_blob:
+ for group in shape_groups:
+ group.fill_color.requires_grad = True
+ color_vars.append(group.fill_color)
+ else:
+ for group in shape_groups:
+ group.stroke_color.requires_grad = True
+ color_vars.append(group.stroke_color)
+
+ # Optimize
+ points_optim = torch.optim.Adam(points_vars, lr=1.0)
+ if len(stroke_width_vars) > 0:
+ width_optim = torch.optim.Adam(stroke_width_vars, lr=0.1)
+ color_optim = torch.optim.Adam(color_vars, lr=0.01)
+ # Adam iterations.
+ for t in range(args.num_iter):
+ print('iteration:', t)
+ points_optim.zero_grad()
+ if len(stroke_width_vars) > 0:
+ width_optim.zero_grad()
+ color_optim.zero_grad()
+ # Forward pass: render the image.
+ scene_args = pydiffvg.RenderFunction.serialize_scene(\
+ canvas_width, canvas_height, shapes, shape_groups)
+ img = render(canvas_width, # width
+ canvas_height, # height
+ 2, # num_samples_x
+ 2, # num_samples_y
+ t, # seed
+ None,
+ *scene_args)
+ # Compose img with white background
+ img = img[:, :, 3:4] * img[:, :, :3] + torch.ones(img.shape[0], img.shape[1], 3, device = pydiffvg.get_device()) * (1 - img[:, :, 3:4])
+ # Save the intermediate render.
+ pydiffvg.imwrite(img.cpu(), 'results/painterly_rendering/iter_{}.png'.format(t), gamma=gamma)
+ img = img[:, :, :3]
+ # Convert img from HWC to NCHW
+ img = img.unsqueeze(0)
+ img = img.permute(0, 3, 1, 2) # NHWC -> NCHW
+ if args.use_lpips_loss:
+ loss = perception_loss(img, target) + (img.mean() - target.mean()).pow(2)
+ else:
+ loss = (img - target).pow(2).mean()
+ print('render loss:', loss.item())
+
+ # Backpropagate the gradients.
+ loss.backward()
+
+ # Take a gradient descent step.
+ points_optim.step()
+ if len(stroke_width_vars) > 0:
+ width_optim.step()
+ color_optim.step()
+ if len(stroke_width_vars) > 0:
+ for path in shapes:
+ path.stroke_width.data.clamp_(1.0, max_width)
+ if args.use_blob:
+ for group in shape_groups:
+ group.fill_color.data.clamp_(0.0, 1.0)
+ else:
+ for group in shape_groups:
+ group.stroke_color.data.clamp_(0.0, 1.0)
+
+ if t % 10 == 0 or t == args.num_iter - 1:
+ pydiffvg.save_svg('results/painterly_rendering/iter_{}.svg'.format(t),
+ canvas_width, canvas_height, shapes, shape_groups)
+
+ # Render the final result.
+ img = render(target.shape[1], # width
+ target.shape[0], # height
+ 2, # num_samples_x
+ 2, # num_samples_y
+ 0, # seed
+ None,
+ *scene_args)
+ # Save the intermediate render.
+ pydiffvg.imwrite(img.cpu(), 'results/painterly_rendering/final.png'.format(t), gamma=gamma)
+ # Convert the intermediate renderings to a video.
+ from subprocess import call
+ call(["ffmpeg", "-framerate", "24", "-i",
+ "results/painterly_rendering/iter_%d.png", "-vb", "20M",
+ "results/painterly_rendering/out.mp4"])
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument("target", help="target image path")
+ parser.add_argument("--num_paths", type=int, default=512)
+ parser.add_argument("--max_width", type=float, default=2.0)
+ parser.add_argument("--use_lpips_loss", dest='use_lpips_loss', action='store_true')
+ parser.add_argument("--num_iter", type=int, default=500)
+ parser.add_argument("--use_blob", dest='use_blob', action='store_true')
+ args = parser.parse_args()
+ main(args)
diff --git a/parallel.cpp b/parallel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..365fc5bb305f9cacc780fb5276905e37d3b37e34
--- /dev/null
+++ b/parallel.cpp
@@ -0,0 +1,273 @@
+#include "parallel.h"
+#include
+#include
+#include