Xu Ma commited on
Commit
be11144
1 Parent(s): 6afe7e5
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. DiffVG/CMakeLists.txt +140 -0
  2. DiffVG/aabb.h +67 -0
  3. DiffVG/atomic.cpp +27 -0
  4. DiffVG/atomic.h +139 -0
  5. DiffVG/cdf.h +29 -0
  6. DiffVG/cmake/FindTensorFlow.cmake +34 -0
  7. DiffVG/cmake/FindThrust.cmake +40 -0
  8. DiffVG/color.cpp +25 -0
  9. DiffVG/color.h +63 -0
  10. DiffVG/compute_distance.h +949 -0
  11. DiffVG/cuda_utils.h +53 -0
  12. DiffVG/diffvg.cpp +1792 -0
  13. DiffVG/diffvg.h +156 -0
  14. DiffVG/edge_query.h +7 -0
  15. DiffVG/filter.h +106 -0
  16. DiffVG/matrix.h +544 -0
  17. DiffVG/painterly_rendering.py +223 -0
  18. DiffVG/parallel.cpp +273 -0
  19. DiffVG/parallel.h +91 -0
  20. DiffVG/pcg.h +40 -0
  21. DiffVG/poetry.lock +0 -0
  22. DiffVG/ptr.h +23 -0
  23. DiffVG/pybind11/.appveyor.yml +37 -0
  24. DiffVG/pybind11/.cmake-format.yaml +73 -0
  25. DiffVG/pybind11/.github/CONTRIBUTING.md +171 -0
  26. DiffVG/pybind11/.github/ISSUE_TEMPLATE/bug-report.md +28 -0
  27. DiffVG/pybind11/.github/ISSUE_TEMPLATE/config.yml +5 -0
  28. DiffVG/pybind11/.github/ISSUE_TEMPLATE/feature-request.md +16 -0
  29. DiffVG/pybind11/.github/ISSUE_TEMPLATE/question.md +21 -0
  30. DiffVG/pybind11/.github/workflows/ci.yml +359 -0
  31. DiffVG/pybind11/.github/workflows/configure.yml +78 -0
  32. DiffVG/pybind11/.github/workflows/format.yml +19 -0
  33. DiffVG/pybind11/.gitignore +41 -0
  34. DiffVG/pybind11/.gitmodules +3 -0
  35. DiffVG/pybind11/.pre-commit-config.yaml +44 -0
  36. DiffVG/pybind11/.readthedocs.yml +3 -0
  37. DiffVG/pybind11/CMakeLists.txt +271 -0
  38. DiffVG/pybind11/LICENSE +29 -0
  39. DiffVG/pybind11/MANIFEST.in +2 -0
  40. DiffVG/pybind11/README.md +143 -0
  41. DiffVG/pybind11/docs/Doxyfile +22 -0
  42. DiffVG/pybind11/docs/_static/theme_overrides.css +11 -0
  43. DiffVG/pybind11/docs/advanced/cast/chrono.rst +81 -0
  44. DiffVG/pybind11/docs/advanced/cast/custom.rst +91 -0
  45. DiffVG/pybind11/docs/advanced/cast/eigen.rst +310 -0
  46. DiffVG/pybind11/docs/advanced/cast/functional.rst +109 -0
  47. DiffVG/pybind11/docs/advanced/cast/index.rst +41 -0
  48. DiffVG/pybind11/docs/advanced/cast/overview.rst +165 -0
  49. DiffVG/pybind11/docs/advanced/cast/stl.rst +240 -0
  50. DiffVG/pybind11/docs/advanced/cast/strings.rst +305 -0
DiffVG/CMakeLists.txt ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cmake_minimum_required(VERSION 3.12)
2
+
3
+ project(diffvg VERSION 0.0.1 DESCRIPTION "Differentiable Vector Graphics")
4
+
5
+ set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/")
6
+ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
7
+
8
+ if(WIN32)
9
+ find_package(Python 3.6 COMPONENTS Development REQUIRED)
10
+ else()
11
+ find_package(Python 3.7 COMPONENTS Development REQUIRED)
12
+ endif()
13
+ add_subdirectory(pybind11)
14
+
15
+ option(DIFFVG_CUDA "Build diffvg with GPU code path?" ON)
16
+
17
+ if(DIFFVG_CUDA)
18
+ message(STATUS "Build with CUDA support")
19
+ find_package(CUDA 10 REQUIRED)
20
+ set(CMAKE_CUDA_STANDARD 11)
21
+ if(NOT WIN32)
22
+ # Hack: for some reason the line above doesn't work on some Linux systems.
23
+ set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -std=c++11")
24
+ #set(CUDA_NVCC_FLAGS_DEBUG "-g -G")
25
+ endif()
26
+ else()
27
+ message(STATUS "Build without CUDA support")
28
+ find_package(Thrust REQUIRED)
29
+ endif()
30
+
31
+ # include_directories(${CMAKE_SOURCE_DIR}/pybind11/include)
32
+ include_directories(${PYTHON_INCLUDE_PATH})
33
+ find_package(PythonLibs REQUIRED)
34
+ include_directories(${PYTHON_INCLUDE_PATH})
35
+ include_directories(${PYTHON_INCLUDE_DIRS})
36
+ include_directories(pybind11/include)
37
+ if(DIFFVG_CUDA)
38
+ link_directories(${CUDA_LIBRARIES})
39
+ else()
40
+ include_directories(${THRUST_INCLUDE_DIR})
41
+ endif()
42
+
43
+ if(NOT MSVC)
44
+ # These compile definitions are not meaningful for MSVC
45
+ add_compile_options(-Wall -g -O3 -fvisibility=hidden -Wno-unknown-pragmas)
46
+ else()
47
+ add_compile_options(/Wall /Zi)
48
+ add_link_options(/DEBUG)
49
+ endif()
50
+
51
+ if(NOT DIFFVG_CUDA)
52
+ add_compile_options("-DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_CPP")
53
+ endif()
54
+
55
+ set(SRCS atomic.h
56
+ color.h
57
+ cdf.h
58
+ cuda_utils.h
59
+ diffvg.h
60
+ edge_query.h
61
+ filter.h
62
+ matrix.h
63
+ parallel.h
64
+ pcg.h
65
+ ptr.h
66
+ sample_boundary.h
67
+ scene.h
68
+ shape.h
69
+ solve.h
70
+ vector.h
71
+ within_distance.h
72
+ winding_number.h
73
+ atomic.cpp
74
+ color.cpp
75
+ diffvg.cpp
76
+ parallel.cpp
77
+ scene.cpp
78
+ shape.cpp)
79
+
80
+ if(DIFFVG_CUDA)
81
+ add_compile_definitions(COMPILE_WITH_CUDA)
82
+ set_source_files_properties(
83
+ diffvg.cpp
84
+ scene.cpp
85
+ PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ)
86
+
87
+ cuda_add_library(diffvg MODULE ${SRCS})
88
+ else()
89
+ add_library(diffvg MODULE ${SRCS})
90
+ endif()
91
+
92
+ if(APPLE)
93
+ # The "-undefined dynamic_lookup" is a hack for systems with
94
+ # multiple Python installed. If we link a particular Python version
95
+ # here, and we import it with a different Python version later.
96
+ # likely a segmentation fault.
97
+ # The solution for Linux Mac OS machines, as mentioned in
98
+ # https://github.com/pybind/pybind11/blob/master/tools/pybind11Tools.cmake
99
+ # is to not link against Python library at all and resolve the symbols
100
+ # at compile time.
101
+ set(DYNAMIC_LOOKUP "-undefined dynamic_lookup")
102
+ endif()
103
+
104
+ target_link_libraries(diffvg ${DYNAMIC_LOOKUP})
105
+
106
+ if(WIN32)
107
+ # See: https://pybind11.readthedocs.io/en/master/compiling.html#advanced-interface-library-target
108
+ target_link_libraries(diffvg pybind11::module)
109
+ set_target_properties(diffvg PROPERTIES PREFIX "${PYTHON_MODULE_PREFIX}"
110
+ SUFFIX "${PYTHON_MODULE_EXTENSION}")
111
+ endif()
112
+
113
+ set_target_properties(diffvg PROPERTIES SKIP_BUILD_RPATH FALSE)
114
+ set_target_properties(diffvg PROPERTIES BUILD_WITH_INSTALL_RPATH TRUE)
115
+ if(UNIX AND NOT APPLE)
116
+ set_target_properties(diffvg PROPERTIES INSTALL_RPATH "$ORIGIN")
117
+ elseif(APPLE)
118
+ set_target_properties(diffvg PROPERTIES INSTALL_RPATH "@loader_path")
119
+ endif()
120
+
121
+ set_property(TARGET diffvg PROPERTY CXX_STANDARD 11)
122
+ set_target_properties(diffvg PROPERTIES PREFIX "")
123
+ # Still enable assertion in release mode
124
+ string( REPLACE "/DNDEBUG" "" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}")
125
+ string( REPLACE "-DNDEBUG" "" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}")
126
+ string( REPLACE "/DNDEBUG" "" CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO}")
127
+ string( REPLACE "-DNDEBUG" "" CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO}")
128
+ string( REPLACE "/DNDEBUG" "" CMAKE_C_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}")
129
+ string( REPLACE "-DNDEBUG" "" CMAKE_C_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}")
130
+ string( REPLACE "/DNDEBUG" "" CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO}")
131
+ string( REPLACE "-DNDEBUG" "" CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO}")
132
+
133
+ if(NOT WIN32)
134
+ find_package(TensorFlow)
135
+ if(TensorFlow_FOUND)
136
+ add_subdirectory(pydiffvg_tensorflow/custom_ops)
137
+ else()
138
+ message(INFO " Building without TensorFlow support (not found)")
139
+ endif()
140
+ endif()
DiffVG/aabb.h ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #include "diffvg.h"
4
+ #include "cuda_utils.h"
5
+ #include "vector.h"
6
+ #include "matrix.h"
7
+
8
+ struct AABB {
9
+ DEVICE
10
+ inline AABB(const Vector2f &p_min = Vector2f{infinity<float>(), infinity<float>()},
11
+ const Vector2f &p_max = Vector2f{-infinity<float>(), -infinity<float>()})
12
+ : p_min(p_min), p_max(p_max) {}
13
+ Vector2f p_min, p_max;
14
+ };
15
+
16
+ DEVICE
17
+ inline
18
+ AABB merge(const AABB &box, const Vector2f &p) {
19
+ return AABB{Vector2f{min(p.x, box.p_min.x), min(p.y, box.p_min.y)},
20
+ Vector2f{max(p.x, box.p_max.x), max(p.y, box.p_max.y)}};
21
+ }
22
+
23
+ DEVICE
24
+ inline
25
+ AABB merge(const AABB &box0, const AABB &box1) {
26
+ return AABB{Vector2f{min(box0.p_min.x, box1.p_min.x), min(box0.p_min.y, box1.p_min.y)},
27
+ Vector2f{max(box0.p_max.x, box1.p_max.x), max(box0.p_max.y, box1.p_max.y)}};
28
+ }
29
+
30
+ DEVICE
31
+ inline
32
+ bool inside(const AABB &box, const Vector2f &p) {
33
+ return p.x >= box.p_min.x && p.x <= box.p_max.x &&
34
+ p.y >= box.p_min.y && p.y <= box.p_max.y;
35
+ }
36
+
37
+ DEVICE
38
+ inline
39
+ bool inside(const AABB &box, const Vector2f &p, float radius) {
40
+ return p.x >= box.p_min.x - radius && p.x <= box.p_max.x + radius &&
41
+ p.y >= box.p_min.y - radius && p.y <= box.p_max.y + radius;
42
+ }
43
+
44
+ DEVICE
45
+ inline
46
+ AABB enlarge(const AABB &box, float width) {
47
+ return AABB{Vector2f{box.p_min.x - width, box.p_min.y - width},
48
+ Vector2f{box.p_max.x + width, box.p_max.y + width}};
49
+ }
50
+
51
+ DEVICE
52
+ inline
53
+ AABB transform(const Matrix3x3f &xform, const AABB &box) {
54
+ auto ret = AABB();
55
+ ret = merge(ret, xform_pt(xform, Vector2f{box.p_min.x, box.p_min.y}));
56
+ ret = merge(ret, xform_pt(xform, Vector2f{box.p_min.x, box.p_max.y}));
57
+ ret = merge(ret, xform_pt(xform, Vector2f{box.p_max.x, box.p_min.y}));
58
+ ret = merge(ret, xform_pt(xform, Vector2f{box.p_max.x, box.p_max.y}));
59
+ return ret;
60
+ }
61
+
62
+ DEVICE
63
+ inline
64
+ bool within_distance(const AABB &box, const Vector2f &pt, float r) {
65
+ return pt.x >= box.p_min.x - r && pt.x <= box.p_max.x + r &&
66
+ pt.y >= box.p_min.y - r && pt.y <= box.p_max.y + r;
67
+ }
DiffVG/atomic.cpp ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //A hacky solution to get around the Ellipse include
2
+
3
+ #ifdef WIN32
4
+ #include <windows.h>
5
+ #include <cstdint>
6
+
7
+ float win_atomic_add(float &target, float source) {
8
+ union { int i; float f; } old_val;
9
+ union { int i; float f; } new_val;
10
+ do {
11
+ old_val.f = target;
12
+ new_val.f = old_val.f + (float)source;
13
+ } while (InterlockedCompareExchange((LONG*)&target, (LONG)new_val.i, (LONG)old_val.i) != old_val.i);
14
+ return old_val.f;
15
+ }
16
+
17
+ double win_atomic_add(double &target, double source) {
18
+ union { int64_t i; double f; } old_val;
19
+ union { int64_t i; double f; } new_val;
20
+ do {
21
+ old_val.f = target;
22
+ new_val.f = old_val.f + (double)source;
23
+ } while (InterlockedCompareExchange64((LONG64*)&target, (LONG64)new_val.i, (LONG64)old_val.i) != old_val.i);
24
+ return old_val.f;
25
+ }
26
+
27
+ #endif
DiffVG/atomic.h ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #include "diffvg.h"
4
+ #include "vector.h"
5
+ #include "matrix.h"
6
+
7
+ // https://stackoverflow.com/questions/39274472/error-function-atomicadddouble-double-has-already-been-defined
8
+ #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600
9
+ #else
10
+ static inline DEVICE double atomicAdd(double *address, double val) {
11
+ unsigned long long int* address_as_ull = (unsigned long long int*)address;
12
+ unsigned long long int old = *address_as_ull, assumed;
13
+ if (val == 0.0)
14
+ return __longlong_as_double(old);
15
+ do {
16
+ assumed = old;
17
+ old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val +__longlong_as_double(assumed)));
18
+ } while (assumed != old);
19
+ return __longlong_as_double(old);
20
+ }
21
+ #endif
22
+
23
+ #ifndef WIN32
24
+ template <typename T0, typename T1>
25
+ DEVICE
26
+ inline T0 atomic_add_(T0 &target, T1 source) {
27
+ #ifdef __CUDA_ARCH__
28
+ return atomicAdd(&target, (T0)source);
29
+ #else
30
+ T0 old_val;
31
+ T0 new_val;
32
+ do {
33
+ old_val = target;
34
+ new_val = old_val + source;
35
+ } while (!__atomic_compare_exchange(&target, &old_val, &new_val, true,
36
+ std::memory_order::memory_order_seq_cst,
37
+ std::memory_order::memory_order_seq_cst));
38
+ return old_val;
39
+ #endif
40
+ }
41
+
42
+ DEVICE
43
+ inline
44
+ float atomic_add(float &target, float source) {
45
+ return atomic_add_(target, source);
46
+ }
47
+ DEVICE
48
+ inline
49
+ double atomic_add(double &target, double source) {
50
+ return atomic_add_(target, source);
51
+ }
52
+ #else
53
+ float win_atomic_add(float &target, float source);
54
+ double win_atomic_add(double &target, double source);
55
+ DEVICE
56
+ static float atomic_add(float &target, float source) {
57
+ #ifdef __CUDA_ARCH__
58
+ return atomicAdd(&target, source);
59
+ #else
60
+ return win_atomic_add(target, source);
61
+ #endif
62
+ }
63
+ DEVICE
64
+ static double atomic_add(double &target, double source) {
65
+ #ifdef __CUDA_ARCH__
66
+ return atomicAdd(&target, (double)source);
67
+ #else
68
+ return win_atomic_add(target, source);
69
+ #endif
70
+ }
71
+ #endif
72
+
73
+ template <typename T0, typename T1>
74
+ DEVICE
75
+ inline T0 atomic_add(T0 *target, T1 source) {
76
+ return atomic_add(*target, (T0)source);
77
+ }
78
+
79
+ template <typename T0, typename T1>
80
+ DEVICE
81
+ inline TVector2<T0> atomic_add(TVector2<T0> &target, const TVector2<T1> &source) {
82
+ atomic_add(target[0], source[0]);
83
+ atomic_add(target[1], source[1]);
84
+ return target;
85
+ }
86
+
87
+ template <typename T0, typename T1>
88
+ DEVICE
89
+ inline void atomic_add(T0 *target, const TVector2<T1> &source) {
90
+ atomic_add(target[0], (T0)source[0]);
91
+ atomic_add(target[1], (T0)source[1]);
92
+ }
93
+
94
+ template <typename T0, typename T1>
95
+ DEVICE
96
+ inline TVector3<T0> atomic_add(TVector3<T0> &target, const TVector3<T1> &source) {
97
+ atomic_add(target[0], source[0]);
98
+ atomic_add(target[1], source[1]);
99
+ atomic_add(target[2], source[2]);
100
+ return target;
101
+ }
102
+
103
+ template <typename T0, typename T1>
104
+ DEVICE
105
+ inline void atomic_add(T0 *target, const TVector3<T1> &source) {
106
+ atomic_add(target[0], (T0)source[0]);
107
+ atomic_add(target[1], (T0)source[1]);
108
+ atomic_add(target[2], (T0)source[2]);
109
+ }
110
+
111
+ template <typename T0, typename T1>
112
+ DEVICE
113
+ inline TVector4<T0> atomic_add(TVector4<T0> &target, const TVector4<T1> &source) {
114
+ atomic_add(target[0], source[0]);
115
+ atomic_add(target[1], source[1]);
116
+ atomic_add(target[2], source[2]);
117
+ atomic_add(target[3], source[3]);
118
+ return target;
119
+ }
120
+
121
+ template <typename T0, typename T1>
122
+ DEVICE
123
+ inline void atomic_add(T0 *target, const TVector4<T1> &source) {
124
+ atomic_add(target[0], (T0)source[0]);
125
+ atomic_add(target[1], (T0)source[1]);
126
+ atomic_add(target[2], (T0)source[2]);
127
+ atomic_add(target[3], (T0)source[3]);
128
+ }
129
+
130
+ template <typename T0, typename T1>
131
+ DEVICE
132
+ inline void atomic_add(T0 *target, const TMatrix3x3<T1> &source) {
133
+ for (int i = 0; i < 3; i++) {
134
+ for (int j = 0; j < 3; j++) {
135
+ atomic_add(target[3 * i + j], (T0)source(i, j));
136
+ }
137
+ }
138
+ }
139
+
DiffVG/cdf.h ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #include "diffvg.h"
4
+
5
+ DEVICE int sample(const float *cdf, int num_entries, float u, float *updated_u = nullptr) {
6
+ // Binary search the cdf
7
+ auto lb = 0;
8
+ auto len = num_entries - 1 - lb;
9
+ while (len > 0) {
10
+ auto half_len = len / 2;
11
+ auto mid = lb + half_len;
12
+ assert(mid >= 0 && mid < num_entries);
13
+ if (u < cdf[mid]) {
14
+ len = half_len;
15
+ } else {
16
+ lb = mid + 1;
17
+ len = len - half_len - 1;
18
+ }
19
+ }
20
+ lb = clamp(lb, 0, num_entries - 1);
21
+ if (updated_u != nullptr) {
22
+ if (lb > 0) {
23
+ *updated_u = (u - cdf[lb - 1]) / (cdf[lb] - cdf[lb - 1]);
24
+ } else {
25
+ *updated_u = u / cdf[lb];
26
+ }
27
+ }
28
+ return lb;
29
+ }
DiffVG/cmake/FindTensorFlow.cmake ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://github.com/PatWie/tensorflow-cmake/blob/master/cmake/modules/FindTensorFlow.cmake
2
+
3
+ execute_process(
4
+ COMMAND python -c "exec(\"try:\\n import tensorflow as tf; print(tf.__version__); print(tf.__cxx11_abi_flag__);print(tf.sysconfig.get_include()); print(tf.sysconfig.get_lib())\\nexcept ImportError:\\n exit(1)\")"
5
+ OUTPUT_VARIABLE TF_INFORMATION_STRING
6
+ OUTPUT_STRIP_TRAILING_WHITESPACE
7
+ RESULT_VARIABLE retcode)
8
+
9
+ if("${retcode}" STREQUAL "0")
10
+ string(REPLACE "\n" ";" TF_INFORMATION_LIST ${TF_INFORMATION_STRING})
11
+ list(GET TF_INFORMATION_LIST 0 TF_DETECTED_VERSION)
12
+ list(GET TF_INFORMATION_LIST 1 TF_DETECTED_ABI)
13
+ list(GET TF_INFORMATION_LIST 2 TF_DETECTED_INCLUDE_DIR)
14
+ list(GET TF_INFORMATION_LIST 3 TF_DETECTED_LIBRARY_DIR)
15
+ if(WIN32)
16
+ find_library(TF_DETECTED_LIBRARY NAMES _pywrap_tensorflow_internal PATHS
17
+ ${TF_DETECTED_LIBRARY_DIR}/python)
18
+ else()
19
+ # For some reason my tensorflow doesn't have a .so file
20
+ list(APPEND CMAKE_FIND_LIBRARY_SUFFIXES .so.1)
21
+ list(APPEND CMAKE_FIND_LIBRARY_SUFFIXES .so.2)
22
+ find_library(TF_DETECTED_LIBRARY NAMES tensorflow_framework PATHS
23
+ ${TF_DETECTED_LIBRARY_DIR})
24
+ endif()
25
+ set(TensorFlow_VERSION ${TF_DETECTED_VERSION})
26
+ set(TensorFlow_ABI ${TF_DETECTED_ABI})
27
+ set(TensorFlow_INCLUDE_DIR ${TF_DETECTED_INCLUDE_DIR})
28
+ set(TensorFlow_LIBRARY ${TF_DETECTED_LIBRARY})
29
+ if(TensorFlow_LIBRARY AND TensorFlow_INCLUDE_DIR)
30
+ set(TensorFlow_FOUND TRUE)
31
+ else()
32
+ set(TensorFlow_FOUND FALSE)
33
+ endif()
34
+ endif()
DiffVG/cmake/FindThrust.cmake ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ##=============================================================================
2
+ ##
3
+ ## Copyright (c) Kitware, Inc.
4
+ ## All rights reserved.
5
+ ## See LICENSE.txt for details.
6
+ ##
7
+ ## This software is distributed WITHOUT ANY WARRANTY; without even
8
+ ## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
9
+ ## PURPOSE. See the above copyright notice for more information.
10
+ ##
11
+ ## Copyright 2012 Sandia Corporation.
12
+ ## Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
13
+ ## the U.S. Government retains certain rights in this software.
14
+ ##
15
+ ##=============================================================================
16
+
17
+ #
18
+ # FindThrust
19
+ #
20
+ # This module finds the Thrust header files and extrats their version. It
21
+ # sets the following variables.
22
+ #
23
+ # THRUST_INCLUDE_DIR - Include directory for thrust header files. (All header
24
+ # files will actually be in the thrust subdirectory.)
25
+ # THRUST_VERSION - Version of thrust in the form "major.minor.patch".
26
+ #
27
+
28
+ find_path(THRUST_INCLUDE_DIR
29
+ HINTS /usr/include/cuda
30
+ /usr/local/include
31
+ /usr/local/cuda/include
32
+ ${CUDA_INCLUDE_DIRS}
33
+ ./thrust
34
+ ../thrust
35
+ NAMES thrust/version.h
36
+ )
37
+
38
+ if (THRUST_INCLUDE_DIR)
39
+ set(THRUST_FOUND TRUE)
40
+ endif ()
DiffVG/color.cpp ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include "color.h"
2
+
3
+ void LinearGradient::copy_to(ptr<float> stop_offsets,
4
+ ptr<float> stop_colors) const {
5
+ float *o = stop_offsets.get();
6
+ float *c = stop_colors.get();
7
+ for (int i = 0; i < num_stops; i++) {
8
+ o[i] = this->stop_offsets[i];
9
+ }
10
+ for (int i = 0; i < 4 * num_stops; i++) {
11
+ c[i] = this->stop_colors[i];
12
+ }
13
+ }
14
+
15
+ void RadialGradient::copy_to(ptr<float> stop_offsets,
16
+ ptr<float> stop_colors) const {
17
+ float *o = stop_offsets.get();
18
+ float *c = stop_colors.get();
19
+ for (int i = 0; i < num_stops; i++) {
20
+ o[i] = this->stop_offsets[i];
21
+ }
22
+ for (int i = 0; i < 4 * num_stops; i++) {
23
+ c[i] = this->stop_colors[i];
24
+ }
25
+ }
DiffVG/color.h ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #include "diffvg.h"
4
+ #include "vector.h"
5
+ #include "ptr.h"
6
+
7
+ enum class ColorType {
8
+ Constant,
9
+ LinearGradient,
10
+ RadialGradient
11
+ };
12
+
13
+ struct Constant {
14
+ Vector4f color;
15
+
16
+ ptr<void> get_ptr() {
17
+ return ptr<void>(this);
18
+ }
19
+ };
20
+
21
+ struct LinearGradient {
22
+ LinearGradient(const Vector2f &begin,
23
+ const Vector2f &end,
24
+ int num_stops,
25
+ ptr<float> stop_offsets,
26
+ ptr<float> stop_colors)
27
+ : begin(begin), end(end), num_stops(num_stops),
28
+ stop_offsets(stop_offsets.get()), stop_colors(stop_colors.get()) {}
29
+
30
+ ptr<void> get_ptr() {
31
+ return ptr<void>(this);
32
+ }
33
+
34
+ void copy_to(ptr<float> stop_offset,
35
+ ptr<float> stop_colors) const;
36
+
37
+ Vector2f begin, end;
38
+ int num_stops;
39
+ float *stop_offsets;
40
+ float *stop_colors; // rgba
41
+ };
42
+
43
+ struct RadialGradient {
44
+ RadialGradient(const Vector2f &center,
45
+ const Vector2f &radius,
46
+ int num_stops,
47
+ ptr<float> stop_offsets,
48
+ ptr<float> stop_colors)
49
+ : center(center), radius(radius), num_stops(num_stops),
50
+ stop_offsets(stop_offsets.get()), stop_colors(stop_colors.get()) {}
51
+
52
+ ptr<void> get_ptr() {
53
+ return ptr<void>(this);
54
+ }
55
+
56
+ void copy_to(ptr<float> stop_offset,
57
+ ptr<float> stop_colors) const;
58
+
59
+ Vector2f center, radius;
60
+ int num_stops;
61
+ float *stop_offsets;
62
+ float *stop_colors; // rgba
63
+ };
DiffVG/compute_distance.h ADDED
@@ -0,0 +1,949 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #include "diffvg.h"
4
+ #include "edge_query.h"
5
+ #include "scene.h"
6
+ #include "shape.h"
7
+ #include "solve.h"
8
+ #include "vector.h"
9
+
10
+ #include <cassert>
11
+
12
+ struct ClosestPointPathInfo {
13
+ int base_point_id;
14
+ int point_id;
15
+ float t_root;
16
+ };
17
+
18
+ DEVICE
19
+ inline
20
+ bool closest_point(const Circle &circle, const Vector2f &pt,
21
+ Vector2f *result) {
22
+ *result = circle.center + circle.radius * normalize(pt - circle.center);
23
+ return false;
24
+ }
25
+
26
+ DEVICE
27
+ inline
28
+ bool closest_point(const Path &path, const BVHNode *bvh_nodes, const Vector2f &pt, float max_radius,
29
+ ClosestPointPathInfo *path_info,
30
+ Vector2f *result) {
31
+ auto min_dist = max_radius;
32
+ auto ret_pt = Vector2f{0, 0};
33
+ auto found = false;
34
+ auto num_segments = path.num_base_points;
35
+ constexpr auto max_bvh_size = 128;
36
+ int bvh_stack[max_bvh_size];
37
+ auto stack_size = 0;
38
+ bvh_stack[stack_size++] = 2 * num_segments - 2;
39
+ while (stack_size > 0) {
40
+ const BVHNode &node = bvh_nodes[bvh_stack[--stack_size]];
41
+ if (node.child1 < 0) {
42
+ // leaf
43
+ auto base_point_id = node.child0;
44
+ auto point_id = - node.child1 - 1;
45
+ assert(base_point_id < num_segments);
46
+ assert(point_id < path.num_points);
47
+ auto dist = 0.f;
48
+ auto closest_pt = Vector2f{0, 0};
49
+ auto t_root = 0.f;
50
+ if (path.num_control_points[base_point_id] == 0) {
51
+ // Straight line
52
+ auto i0 = point_id;
53
+ auto i1 = (point_id + 1) % path.num_points;
54
+ auto p0 = Vector2f{path.points[2 * i0], path.points[2 * i0 + 1]};
55
+ auto p1 = Vector2f{path.points[2 * i1], path.points[2 * i1 + 1]};
56
+ // project pt to line
57
+ auto t = dot(pt - p0, p1 - p0) / dot(p1 - p0, p1 - p0);
58
+ if (t < 0) {
59
+ dist = distance(p0, pt);
60
+ closest_pt = p0;
61
+ t_root = 0;
62
+ } else if (t > 1) {
63
+ dist = distance(p1, pt);
64
+ closest_pt = p1;
65
+ t_root = 1;
66
+ } else {
67
+ dist = distance(p0 + t * (p1 - p0), pt);
68
+ closest_pt = p0 + t * (p1 - p0);
69
+ t_root = t;
70
+ }
71
+ } else if (path.num_control_points[base_point_id] == 1) {
72
+ // Quadratic Bezier curve
73
+ auto i0 = point_id;
74
+ auto i1 = point_id + 1;
75
+ auto i2 = (point_id + 2) % path.num_points;
76
+ auto p0 = Vector2f{path.points[2 * i0], path.points[2 * i0 + 1]};
77
+ auto p1 = Vector2f{path.points[2 * i1], path.points[2 * i1 + 1]};
78
+ auto p2 = Vector2f{path.points[2 * i2], path.points[2 * i2 + 1]};
79
+ if (path.use_distance_approx) {
80
+ closest_pt = quadratic_closest_pt_approx(p0, p1, p2, pt, &t_root);
81
+ dist = distance(closest_pt, pt);
82
+ } else {
83
+ auto eval = [&](float t) -> Vector2f {
84
+ auto tt = 1 - t;
85
+ return (tt*tt)*p0 + (2*tt*t)*p1 + (t*t)*p2;
86
+ };
87
+ auto pt0 = eval(0);
88
+ auto pt1 = eval(1);
89
+ auto dist0 = distance(pt0, pt);
90
+ auto dist1 = distance(pt1, pt);
91
+ {
92
+ dist = dist0;
93
+ closest_pt = pt0;
94
+ t_root = 0;
95
+ }
96
+ if (dist1 < dist) {
97
+ dist = dist1;
98
+ closest_pt = pt1;
99
+ t_root = 1;
100
+ }
101
+ // The curve is (1-t)^2p0 + 2(1-t)tp1 + t^2p2
102
+ // = (p0-2p1+p2)t^2+(-2p0+2p1)t+p0 = q
103
+ // Want to solve (q - pt) dot q' = 0
104
+ // q' = (p0-2p1+p2)t + (-p0+p1)
105
+ // Expanding (p0-2p1+p2)^2 t^3 +
106
+ // 3(p0-2p1+p2)(-p0+p1) t^2 +
107
+ // (2(-p0+p1)^2+(p0-2p1+p2)(p0-pt))t +
108
+ // (-p0+p1)(p0-pt) = 0
109
+ auto A = sum((p0-2*p1+p2)*(p0-2*p1+p2));
110
+ auto B = sum(3*(p0-2*p1+p2)*(-p0+p1));
111
+ auto C = sum(2*(-p0+p1)*(-p0+p1)+(p0-2*p1+p2)*(p0-pt));
112
+ auto D = sum((-p0+p1)*(p0-pt));
113
+ float t[3];
114
+ int num_sol = solve_cubic(A, B, C, D, t);
115
+ for (int j = 0; j < num_sol; j++) {
116
+ if (t[j] >= 0 && t[j] <= 1) {
117
+ auto p = eval(t[j]);
118
+ auto distp = distance(p, pt);
119
+ if (distp < dist) {
120
+ dist = distp;
121
+ closest_pt = p;
122
+ t_root = t[j];
123
+ }
124
+ }
125
+ }
126
+ }
127
+ } else if (path.num_control_points[base_point_id] == 2) {
128
+ // Cubic Bezier curve
129
+ auto i0 = point_id;
130
+ auto i1 = point_id + 1;
131
+ auto i2 = point_id + 2;
132
+ auto i3 = (point_id + 3) % path.num_points;
133
+ auto p0 = Vector2f{path.points[2 * i0], path.points[2 * i0 + 1]};
134
+ auto p1 = Vector2f{path.points[2 * i1], path.points[2 * i1 + 1]};
135
+ auto p2 = Vector2f{path.points[2 * i2], path.points[2 * i2 + 1]};
136
+ auto p3 = Vector2f{path.points[2 * i3], path.points[2 * i3 + 1]};
137
+ auto eval = [&](float t) -> Vector2f {
138
+ auto tt = 1 - t;
139
+ return (tt*tt*tt)*p0 + (3*tt*tt*t)*p1 + (3*tt*t*t)*p2 + (t*t*t)*p3;
140
+ };
141
+ auto pt0 = eval(0);
142
+ auto pt1 = eval(1);
143
+ auto dist0 = distance(pt0, pt);
144
+ auto dist1 = distance(pt1, pt);
145
+ {
146
+ dist = dist0;
147
+ closest_pt = pt0;
148
+ t_root = 0;
149
+ }
150
+ if (dist1 < dist) {
151
+ dist = dist1;
152
+ closest_pt = pt1;
153
+ t_root = 1;
154
+ }
155
+ // The curve is (1 - t)^3 p0 + 3 * (1 - t)^2 t p1 + 3 * (1 - t) t^2 p2 + t^3 p3
156
+ // = (-p0+3p1-3p2+p3) t^3 + (3p0-6p1+3p2) t^2 + (-3p0+3p1) t + p0
157
+ // Want to solve (q - pt) dot q' = 0
158
+ // q' = 3*(-p0+3p1-3p2+p3)t^2 + 2*(3p0-6p1+3p2)t + (-3p0+3p1)
159
+ // Expanding
160
+ // 3*(-p0+3p1-3p2+p3)^2 t^5
161
+ // 5*(-p0+3p1-3p2+p3)(3p0-6p1+3p2) t^4
162
+ // 4*(-p0+3p1-3p2+p3)(-3p0+3p1) + 2*(3p0-6p1+3p2)^2 t^3
163
+ // 3*(3p0-6p1+3p2)(-3p0+3p1) + 3*(-p0+3p1-3p2+p3)(p0-pt) t^2
164
+ // (-3p0+3p1)^2+2(p0-pt)(3p0-6p1+3p2) t
165
+ // (p0-pt)(-3p0+3p1)
166
+ double A = 3*sum((-p0+3*p1-3*p2+p3)*(-p0+3*p1-3*p2+p3));
167
+ double B = 5*sum((-p0+3*p1-3*p2+p3)*(3*p0-6*p1+3*p2));
168
+ double C = 4*sum((-p0+3*p1-3*p2+p3)*(-3*p0+3*p1)) + 2*sum((3*p0-6*p1+3*p2)*(3*p0-6*p1+3*p2));
169
+ double D = 3*(sum((3*p0-6*p1+3*p2)*(-3*p0+3*p1)) + sum((-p0+3*p1-3*p2+p3)*(p0-pt)));
170
+ double E = sum((-3*p0+3*p1)*(-3*p0+3*p1)) + 2*sum((p0-pt)*(3*p0-6*p1+3*p2));
171
+ double F = sum((p0-pt)*(-3*p0+3*p1));
172
+ // normalize the polynomial
173
+ B /= A;
174
+ C /= A;
175
+ D /= A;
176
+ E /= A;
177
+ F /= A;
178
+ // Isolator Polynomials:
179
+ // https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.133.2233&rep=rep1&type=pdf
180
+ // x/5 + B/25
181
+ // /-----------------------------------------------------
182
+ // 5x^4 + 4B x^3 + 3C x^2 + 2D x + E / x^5 + B x^4 + C x^3 + D x^2 + E x + F
183
+ // x^5 + 4B/5 x^4 + 3C/5 x^3 + 2D/5 x^2 + E/5 x
184
+ // ----------------------------------------------------
185
+ // B/5 x^4 + 2C/5 x^3 + 3D/5 x^2 + 4E/5 x + F
186
+ // B/5 x^4 + 4B^2/25 x^3 + 3BC/25 x^2 + 2BD/25 x + BE/25
187
+ // ----------------------------------------------------
188
+ // (2C/5 - 4B^2/25)x^3 + (3D/5-3BC/25)x^2 + (4E/5-2BD/25) + (F-BE/25)
189
+ auto p1A = ((2 / 5.f) * C - (4 / 25.f) * B * B);
190
+ auto p1B = ((3 / 5.f) * D - (3 / 25.f) * B * C);
191
+ auto p1C = ((4 / 5.f) * E - (2 / 25.f) * B * D);
192
+ auto p1D = F - B * E / 25.f;
193
+ // auto q1A = 1 / 5.f;
194
+ // auto q1B = B / 25.f;
195
+ // x/5 + B/25 = 0
196
+ // x = -B/5
197
+ auto q_root = -B/5.f;
198
+ double p_roots[3];
199
+ int num_sol = solve_cubic(p1A, p1B, p1C, p1D, p_roots);
200
+ float intervals[4];
201
+ if (q_root >= 0 && q_root <= 1) {
202
+ intervals[0] = q_root;
203
+ }
204
+ for (int j = 0; j < num_sol; j++) {
205
+ intervals[j + 1] = p_roots[j];
206
+ }
207
+ auto num_intervals = 1 + num_sol;
208
+ // sort intervals
209
+ for (int j = 1; j < num_intervals; j++) {
210
+ for (int k = j; k > 0 && intervals[k - 1] > intervals[k]; k--) {
211
+ auto tmp = intervals[k];
212
+ intervals[k] = intervals[k - 1];
213
+ intervals[k - 1] = tmp;
214
+ }
215
+ }
216
+ auto eval_polynomial = [&] (double t) {
217
+ return t*t*t*t*t+
218
+ B*t*t*t*t+
219
+ C*t*t*t+
220
+ D*t*t+
221
+ E*t+
222
+ F;
223
+ };
224
+ auto eval_polynomial_deriv = [&] (double t) {
225
+ return 5*t*t*t*t+
226
+ 4*B*t*t*t+
227
+ 3*C*t*t+
228
+ 2*D*t+
229
+ E;
230
+ };
231
+ auto lower_bound = 0.f;
232
+ for (int j = 0; j < num_intervals + 1; j++) {
233
+ if (j < num_intervals && intervals[j] < 0.f) {
234
+ continue;
235
+ }
236
+ auto upper_bound = j < num_intervals ?
237
+ min(intervals[j], 1.f) : 1.f;
238
+ auto lb = lower_bound;
239
+ auto ub = upper_bound;
240
+ auto lb_eval = eval_polynomial(lb);
241
+ auto ub_eval = eval_polynomial(ub);
242
+ if (lb_eval * ub_eval > 0) {
243
+ // Doesn't have root
244
+ continue;
245
+ }
246
+ if (lb_eval > ub_eval) {
247
+ swap_(lb, ub);
248
+ }
249
+ auto t = 0.5f * (lb + ub);
250
+ auto num_iter = 20;
251
+ for (int it = 0; it < num_iter; it++) {
252
+ if (!(t >= lb && t <= ub)) {
253
+ t = 0.5f * (lb + ub);
254
+ }
255
+ auto value = eval_polynomial(t);
256
+ if (fabs(value) < 1e-5f || it == num_iter - 1) {
257
+ break;
258
+ }
259
+ // The derivative may not be entirely accurate,
260
+ // but the bisection is going to handle this
261
+ if (value > 0.f) {
262
+ ub = t;
263
+ } else {
264
+ lb = t;
265
+ }
266
+ auto derivative = eval_polynomial_deriv(t);
267
+ t -= value / derivative;
268
+ }
269
+ auto p = eval(t);
270
+ auto distp = distance(p, pt);
271
+ if (distp < dist) {
272
+ dist = distp;
273
+ closest_pt = p;
274
+ t_root = t;
275
+ }
276
+ if (upper_bound >= 1.f) {
277
+ break;
278
+ }
279
+ lower_bound = upper_bound;
280
+ }
281
+ } else {
282
+ assert(false);
283
+ }
284
+ if (dist < min_dist) {
285
+ min_dist = dist;
286
+ ret_pt = closest_pt;
287
+ path_info->base_point_id = base_point_id;
288
+ path_info->point_id = point_id;
289
+ path_info->t_root = t_root;
290
+ found = true;
291
+ }
292
+ } else {
293
+ assert(node.child0 >= 0 && node.child1 >= 0);
294
+ const AABB &b0 = bvh_nodes[node.child0].box;
295
+ if (within_distance(b0, pt, min_dist)) {
296
+ bvh_stack[stack_size++] = node.child0;
297
+ }
298
+ const AABB &b1 = bvh_nodes[node.child1].box;
299
+ if (within_distance(b1, pt, min_dist)) {
300
+ bvh_stack[stack_size++] = node.child1;
301
+ }
302
+ assert(stack_size <= max_bvh_size);
303
+ }
304
+ }
305
+ if (found) {
306
+ assert(path_info->base_point_id < num_segments);
307
+ }
308
+ *result = ret_pt;
309
+ return found;
310
+ }
311
+
312
+ DEVICE
313
+ inline
314
+ bool closest_point(const Rect &rect, const Vector2f &pt,
315
+ Vector2f *result) {
316
+ auto min_dist = 0.f;
317
+ auto closest_pt = Vector2f{0, 0};
318
+ auto update = [&](const Vector2f &p0, const Vector2f &p1, bool first) {
319
+ // project pt to line
320
+ auto t = dot(pt - p0, p1 - p0) / dot(p1 - p0, p1 - p0);
321
+ if (t < 0) {
322
+ auto d = distance(p0, pt);
323
+ if (first || d < min_dist) {
324
+ min_dist = d;
325
+ closest_pt = p0;
326
+ }
327
+ } else if (t > 1) {
328
+ auto d = distance(p1, pt);
329
+ if (first || d < min_dist) {
330
+ min_dist = d;
331
+ closest_pt = p1;
332
+ }
333
+ } else {
334
+ auto p = p0 + t * (p1 - p0);
335
+ auto d = distance(p, pt);
336
+ if (first || d < min_dist) {
337
+ min_dist = d;
338
+ closest_pt = p0;
339
+ }
340
+ }
341
+ };
342
+ auto left_top = rect.p_min;
343
+ auto right_top = Vector2f{rect.p_max.x, rect.p_min.y};
344
+ auto left_bottom = Vector2f{rect.p_min.x, rect.p_max.y};
345
+ auto right_bottom = rect.p_max;
346
+ update(left_top, left_bottom, true);
347
+ update(left_top, right_top, false);
348
+ update(right_top, right_bottom, false);
349
+ update(left_bottom, right_bottom, false);
350
+ *result = closest_pt;
351
+ return true;
352
+ }
353
+
354
+ DEVICE
355
+ inline
356
+ bool closest_point(const Shape &shape, const BVHNode *bvh_nodes, const Vector2f &pt, float max_radius,
357
+ ClosestPointPathInfo *path_info,
358
+ Vector2f *result) {
359
+ switch (shape.type) {
360
+ case ShapeType::Circle:
361
+ return closest_point(*(const Circle *)shape.ptr, pt, result);
362
+ case ShapeType::Ellipse:
363
+ // https://www.geometrictools.com/Documentation/DistancePointEllipseEllipsoid.pdf
364
+ assert(false);
365
+ return false;
366
+ case ShapeType::Path:
367
+ return closest_point(*(const Path *)shape.ptr, bvh_nodes, pt, max_radius, path_info, result);
368
+ case ShapeType::Rect:
369
+ return closest_point(*(const Rect *)shape.ptr, pt, result);
370
+ }
371
+ assert(false);
372
+ return false;
373
+ }
374
+
375
+ DEVICE
376
+ inline
377
+ bool compute_distance(const SceneData &scene,
378
+ int shape_group_id,
379
+ const Vector2f &pt,
380
+ float max_radius,
381
+ int *min_shape_id,
382
+ Vector2f *closest_pt_,
383
+ ClosestPointPathInfo *path_info,
384
+ float *result) {
385
+ const ShapeGroup &shape_group = scene.shape_groups[shape_group_id];
386
+ // pt is in canvas space, transform it to shape's local space
387
+ auto local_pt = xform_pt(shape_group.canvas_to_shape, pt);
388
+
389
+ constexpr auto max_bvh_stack_size = 64;
390
+ int bvh_stack[max_bvh_stack_size];
391
+ auto stack_size = 0;
392
+ bvh_stack[stack_size++] = 2 * shape_group.num_shapes - 2;
393
+ const auto &bvh_nodes = scene.shape_groups_bvh_nodes[shape_group_id];
394
+
395
+ auto min_dist = max_radius;
396
+ auto found = false;
397
+
398
+ while (stack_size > 0) {
399
+ const BVHNode &node = bvh_nodes[bvh_stack[--stack_size]];
400
+ if (node.child1 < 0) {
401
+ // leaf
402
+ auto shape_id = node.child0;
403
+ const auto &shape = scene.shapes[shape_id];
404
+ ClosestPointPathInfo local_path_info{-1, -1};
405
+ auto local_closest_pt = Vector2f{0, 0};
406
+ if (closest_point(shape, scene.path_bvhs[shape_id], local_pt, max_radius, &local_path_info, &local_closest_pt)) {
407
+ auto closest_pt = xform_pt(shape_group.shape_to_canvas, local_closest_pt);
408
+ auto dist = distance(closest_pt, pt);
409
+ if (!found || dist < min_dist) {
410
+ found = true;
411
+ min_dist = dist;
412
+ if (min_shape_id != nullptr) {
413
+ *min_shape_id = shape_id;
414
+ }
415
+ if (closest_pt_ != nullptr) {
416
+ *closest_pt_ = closest_pt;
417
+ }
418
+ if (path_info != nullptr) {
419
+ *path_info = local_path_info;
420
+ }
421
+ }
422
+ }
423
+ } else {
424
+ assert(node.child0 >= 0 && node.child1 >= 0);
425
+ const AABB &b0 = bvh_nodes[node.child0].box;
426
+ if (inside(b0, local_pt, max_radius)) {
427
+ bvh_stack[stack_size++] = node.child0;
428
+ }
429
+ const AABB &b1 = bvh_nodes[node.child1].box;
430
+ if (inside(b1, local_pt, max_radius)) {
431
+ bvh_stack[stack_size++] = node.child1;
432
+ }
433
+ assert(stack_size <= max_bvh_stack_size);
434
+ }
435
+ }
436
+
437
+ *result = min_dist;
438
+ return found;
439
+ }
440
+
441
+
442
+ DEVICE
443
+ inline
444
+ void d_closest_point(const Circle &circle,
445
+ const Vector2f &pt,
446
+ const Vector2f &d_closest_pt,
447
+ Circle &d_circle,
448
+ Vector2f &d_pt) {
449
+ // return circle.center + circle.radius * normalize(pt - circle.center);
450
+ auto d_center = d_closest_pt *
451
+ (1 + d_normalize(pt - circle.center, circle.radius * d_closest_pt));
452
+ atomic_add(&d_circle.center.x, d_center);
453
+ atomic_add(&d_circle.radius, dot(d_closest_pt, normalize(pt - circle.center)));
454
+ }
455
+
456
+ DEVICE
457
+ inline
458
+ void d_closest_point(const Path &path,
459
+ const Vector2f &pt,
460
+ const Vector2f &d_closest_pt,
461
+ const ClosestPointPathInfo &path_info,
462
+ Path &d_path,
463
+ Vector2f &d_pt) {
464
+ auto base_point_id = path_info.base_point_id;
465
+ auto point_id = path_info.point_id;
466
+ auto min_t_root = path_info.t_root;
467
+
468
+ if (path.num_control_points[base_point_id] == 0) {
469
+ // Straight line
470
+ auto i0 = point_id;
471
+ auto i1 = (point_id + 1) % path.num_points;
472
+ auto p0 = Vector2f{path.points[2 * i0], path.points[2 * i0 + 1]};
473
+ auto p1 = Vector2f{path.points[2 * i1], path.points[2 * i1 + 1]};
474
+ // project pt to line
475
+ auto t = dot(pt - p0, p1 - p0) / dot(p1 - p0, p1 - p0);
476
+ auto d_p0 = Vector2f{0, 0};
477
+ auto d_p1 = Vector2f{0, 0};
478
+ if (t < 0) {
479
+ d_p0 += d_closest_pt;
480
+ } else if (t > 1) {
481
+ d_p1 += d_closest_pt;
482
+ } else {
483
+ auto d_p = d_closest_pt;
484
+ // p = p0 + t * (p1 - p0)
485
+ d_p0 += d_p * (1 - t);
486
+ d_p1 += d_p * t;
487
+ }
488
+ atomic_add(d_path.points + 2 * i0, d_p0);
489
+ atomic_add(d_path.points + 2 * i1, d_p1);
490
+ } else if (path.num_control_points[base_point_id] == 1) {
491
+ // Quadratic Bezier curve
492
+ auto i0 = point_id;
493
+ auto i1 = point_id + 1;
494
+ auto i2 = (point_id + 2) % path.num_points;
495
+ auto p0 = Vector2f{path.points[2 * i0], path.points[2 * i0 + 1]};
496
+ auto p1 = Vector2f{path.points[2 * i1], path.points[2 * i1 + 1]};
497
+ auto p2 = Vector2f{path.points[2 * i2], path.points[2 * i2 + 1]};
498
+ // auto eval = [&](float t) -> Vector2f {
499
+ // auto tt = 1 - t;
500
+ // return (tt*tt)*p0 + (2*tt*t)*p1 + (t*t)*p2;
501
+ // };
502
+ // auto dist0 = distance(eval(0), pt);
503
+ // auto dist1 = distance(eval(1), pt);
504
+ auto d_p0 = Vector2f{0, 0};
505
+ auto d_p1 = Vector2f{0, 0};
506
+ auto d_p2 = Vector2f{0, 0};
507
+ auto t = min_t_root;
508
+ if (t == 0) {
509
+ d_p0 += d_closest_pt;
510
+ } else if (t == 1) {
511
+ d_p2 += d_closest_pt;
512
+ } else {
513
+ // The curve is (1-t)^2p0 + 2(1-t)tp1 + t^2p2
514
+ // = (p0-2p1+p2)t^2+(-2p0+2p1)t+p0 = q
515
+ // Want to solve (q - pt) dot q' = 0
516
+ // q' = (p0-2p1+p2)t + (-p0+p1)
517
+ // Expanding (p0-2p1+p2)^2 t^3 +
518
+ // 3(p0-2p1+p2)(-p0+p1) t^2 +
519
+ // (2(-p0+p1)^2+(p0-2p1+p2)(p0-pt))t +
520
+ // (-p0+p1)(p0-pt) = 0
521
+ auto A = sum((p0-2*p1+p2)*(p0-2*p1+p2));
522
+ auto B = sum(3*(p0-2*p1+p2)*(-p0+p1));
523
+ auto C = sum(2*(-p0+p1)*(-p0+p1)+(p0-2*p1+p2)*(p0-pt));
524
+ // auto D = sum((-p0+p1)*(p0-pt));
525
+ auto d_p = d_closest_pt;
526
+ // p = eval(t)
527
+ auto tt = 1 - t;
528
+ // (tt*tt)*p0 + (2*tt*t)*p1 + (t*t)*p2
529
+ auto d_tt = 2 * tt * dot(d_p, p0) + 2 * t * dot(d_p, p1);
530
+ auto d_t = -d_tt + 2 * tt * dot(d_p, p1) + 2 * t * dot(d_p, p2);
531
+ auto d_p0 = d_p * tt * tt;
532
+ auto d_p1 = 2 * d_p * tt * t;
533
+ auto d_p2 = d_p * t * t;
534
+ // implicit function theorem: dt/dA = -1/(p'(t)) * dp/dA
535
+ auto poly_deriv_t = 3 * A * t * t + 2 * B * t + C;
536
+ if (fabs(poly_deriv_t) > 1e-6f) {
537
+ auto d_A = - (d_t / poly_deriv_t) * t * t * t;
538
+ auto d_B = - (d_t / poly_deriv_t) * t * t;
539
+ auto d_C = - (d_t / poly_deriv_t) * t;
540
+ auto d_D = - (d_t / poly_deriv_t);
541
+ // A = sum((p0-2*p1+p2)*(p0-2*p1+p2))
542
+ // B = sum(3*(p0-2*p1+p2)*(-p0+p1))
543
+ // C = sum(2*(-p0+p1)*(-p0+p1)+(p0-2*p1+p2)*(p0-pt))
544
+ // D = sum((-p0+p1)*(p0-pt))
545
+ d_p0 += 2*d_A*(p0-2*p1+p2)+
546
+ 3*d_B*((-p0+p1)-(p0-2*p1+p2))+
547
+ 2*d_C*(-2*(-p0+p1))+
548
+ d_C*((p0-pt)+(p0-2*p1+p2))+
549
+ 2*d_D*(-(p0-pt)+(-p0+p1));
550
+ d_p1 += (-2)*2*d_A*(p0-2*p1+p2)+
551
+ 3*d_B*(-2*(-p0+p1)+(p0-2*p1+p2))+
552
+ 2*d_C*(2*(-p0+p1))+
553
+ d_C*((-2)*(p0-pt))+
554
+ d_D*(p0-pt);
555
+ d_p2 += 2*d_A*(p0-2*p1+p2)+
556
+ 3*d_B*(-p0+p1)+
557
+ d_C*(p0-pt);
558
+ d_pt += d_C*(-(p0-2*p1+p2))+
559
+ d_D*(-(-p0+p1));
560
+ }
561
+ }
562
+ atomic_add(d_path.points + 2 * i0, d_p0);
563
+ atomic_add(d_path.points + 2 * i1, d_p1);
564
+ atomic_add(d_path.points + 2 * i2, d_p2);
565
+ } else if (path.num_control_points[base_point_id] == 2) {
566
+ // Cubic Bezier curve
567
+ auto i0 = point_id;
568
+ auto i1 = point_id + 1;
569
+ auto i2 = point_id + 2;
570
+ auto i3 = (point_id + 3) % path.num_points;
571
+ auto p0 = Vector2f{path.points[2 * i0], path.points[2 * i0 + 1]};
572
+ auto p1 = Vector2f{path.points[2 * i1], path.points[2 * i1 + 1]};
573
+ auto p2 = Vector2f{path.points[2 * i2], path.points[2 * i2 + 1]};
574
+ auto p3 = Vector2f{path.points[2 * i3], path.points[2 * i3 + 1]};
575
+ // auto eval = [&](float t) -> Vector2f {
576
+ // auto tt = 1 - t;
577
+ // return (tt*tt*tt)*p0 + (3*tt*tt*t)*p1 + (3*tt*t*t)*p2 + (t*t*t)*p3;
578
+ // };
579
+ auto d_p0 = Vector2f{0, 0};
580
+ auto d_p1 = Vector2f{0, 0};
581
+ auto d_p2 = Vector2f{0, 0};
582
+ auto d_p3 = Vector2f{0, 0};
583
+ auto t = min_t_root;
584
+ if (t == 0) {
585
+ // closest_pt = p0
586
+ d_p0 += d_closest_pt;
587
+ } else if (t == 1) {
588
+ // closest_pt = p1
589
+ d_p3 += d_closest_pt;
590
+ } else {
591
+ // The curve is (1 - t)^3 p0 + 3 * (1 - t)^2 t p1 + 3 * (1 - t) t^2 p2 + t^3 p3
592
+ // = (-p0+3p1-3p2+p3) t^3 + (3p0-6p1+3p2) t^2 + (-3p0+3p1) t + p0
593
+ // Want to solve (q - pt) dot q' = 0
594
+ // q' = 3*(-p0+3p1-3p2+p3)t^2 + 2*(3p0-6p1+3p2)t + (-3p0+3p1)
595
+ // Expanding
596
+ // 3*(-p0+3p1-3p2+p3)^2 t^5
597
+ // 5*(-p0+3p1-3p2+p3)(3p0-6p1+3p2) t^4
598
+ // 4*(-p0+3p1-3p2+p3)(-3p0+3p1) + 2*(3p0-6p1+3p2)^2 t^3
599
+ // 3*(3p0-6p1+3p2)(-3p0+3p1) + 3*(-p0+3p1-3p2+p3)(p0-pt) t^2
600
+ // (-3p0+3p1)^2+2(p0-pt)(3p0-6p1+3p2) t
601
+ // (p0-pt)(-3p0+3p1)
602
+ double A = 3*sum((-p0+3*p1-3*p2+p3)*(-p0+3*p1-3*p2+p3));
603
+ double B = 5*sum((-p0+3*p1-3*p2+p3)*(3*p0-6*p1+3*p2));
604
+ double C = 4*sum((-p0+3*p1-3*p2+p3)*(-3*p0+3*p1)) + 2*sum((3*p0-6*p1+3*p2)*(3*p0-6*p1+3*p2));
605
+ double D = 3*(sum((3*p0-6*p1+3*p2)*(-3*p0+3*p1)) + sum((-p0+3*p1-3*p2+p3)*(p0-pt)));
606
+ double E = sum((-3*p0+3*p1)*(-3*p0+3*p1)) + 2*sum((p0-pt)*(3*p0-6*p1+3*p2));
607
+ double F = sum((p0-pt)*(-3*p0+3*p1));
608
+ B /= A;
609
+ C /= A;
610
+ D /= A;
611
+ E /= A;
612
+ F /= A;
613
+ // auto eval_polynomial = [&] (double t) {
614
+ // return t*t*t*t*t+
615
+ // B*t*t*t*t+
616
+ // C*t*t*t+
617
+ // D*t*t+
618
+ // E*t+
619
+ // F;
620
+ // };
621
+ auto eval_polynomial_deriv = [&] (double t) {
622
+ return 5*t*t*t*t+
623
+ 4*B*t*t*t+
624
+ 3*C*t*t+
625
+ 2*D*t+
626
+ E;
627
+ };
628
+
629
+ // auto p = eval(t);
630
+ auto d_p = d_closest_pt;
631
+ // (tt*tt*tt)*p0 + (3*tt*tt*t)*p1 + (3*tt*t*t)*p2 + (t*t*t)*p3
632
+ auto tt = 1 - t;
633
+ auto d_tt = 3 * tt * tt * dot(d_p, p0) +
634
+ 6 * tt * t * dot(d_p, p1) +
635
+ 3 * t * t * dot(d_p, p2);
636
+ auto d_t = -d_tt +
637
+ 3 * tt * tt * dot(d_p, p1) +
638
+ 6 * tt * t * dot(d_p, p2) +
639
+ 3 * t * t * dot(d_p, p3);
640
+ d_p0 += d_p * (tt * tt * tt);
641
+ d_p1 += d_p * (3 * tt * tt * t);
642
+ d_p2 += d_p * (3 * tt * t * t);
643
+ d_p3 += d_p * (t * t * t);
644
+ // implicit function theorem: dt/dA = -1/(p'(t)) * dp/dA
645
+ auto poly_deriv_t = eval_polynomial_deriv(t);
646
+ if (fabs(poly_deriv_t) > 1e-10f) {
647
+ auto d_B = -(d_t / poly_deriv_t) * t * t * t * t;
648
+ auto d_C = -(d_t / poly_deriv_t) * t * t * t;
649
+ auto d_D = -(d_t / poly_deriv_t) * t * t;
650
+ auto d_E = -(d_t / poly_deriv_t) * t;
651
+ auto d_F = -(d_t / poly_deriv_t);
652
+ // B = B' / A
653
+ // C = C' / A
654
+ // D = D' / A
655
+ // E = E' / A
656
+ // F = F' / A
657
+ auto d_A = -d_B * B / A
658
+ -d_C * C / A
659
+ -d_D * D / A
660
+ -d_E * E / A
661
+ -d_F * F / A;
662
+ d_B /= A;
663
+ d_C /= A;
664
+ d_D /= A;
665
+ d_E /= A;
666
+ d_F /= A;
667
+ {
668
+ double A = 3*sum((-p0+3*p1-3*p2+p3)*(-p0+3*p1-3*p2+p3)) + 1e-3;
669
+ double B = 5*sum((-p0+3*p1-3*p2+p3)*(3*p0-6*p1+3*p2));
670
+ double C = 4*sum((-p0+3*p1-3*p2+p3)*(-3*p0+3*p1)) + 2*sum((3*p0-6*p1+3*p2)*(3*p0-6*p1+3*p2));
671
+ double D = 3*(sum((3*p0-6*p1+3*p2)*(-3*p0+3*p1)) + sum((-p0+3*p1-3*p2+p3)*(p0-pt)));
672
+ double E = sum((-3*p0+3*p1)*(-3*p0+3*p1)) + 2*sum((p0-pt)*(3*p0-6*p1+3*p2));
673
+ double F = sum((p0-pt)*(-3*p0+3*p1));
674
+ B /= A;
675
+ C /= A;
676
+ D /= A;
677
+ E /= A;
678
+ F /= A;
679
+ auto eval_polynomial = [&] (double t) {
680
+ return t*t*t*t*t+
681
+ B*t*t*t*t+
682
+ C*t*t*t+
683
+ D*t*t+
684
+ E*t+
685
+ F;
686
+ };
687
+ auto eval_polynomial_deriv = [&] (double t) {
688
+ return 5*t*t*t*t+
689
+ 4*B*t*t*t+
690
+ 3*C*t*t+
691
+ 2*D*t+
692
+ E;
693
+ };
694
+ auto lb = t - 1e-2f;
695
+ auto ub = t + 1e-2f;
696
+ auto lb_eval = eval_polynomial(lb);
697
+ auto ub_eval = eval_polynomial(ub);
698
+ if (lb_eval > ub_eval) {
699
+ swap_(lb, ub);
700
+ }
701
+ auto t_ = 0.5f * (lb + ub);
702
+ auto num_iter = 20;
703
+ for (int it = 0; it < num_iter; it++) {
704
+ if (!(t_ >= lb && t_ <= ub)) {
705
+ t_ = 0.5f * (lb + ub);
706
+ }
707
+ auto value = eval_polynomial(t_);
708
+ if (fabs(value) < 1e-5f || it == num_iter - 1) {
709
+ break;
710
+ }
711
+ // The derivative may not be entirely accurate,
712
+ // but the bisection is going to handle this
713
+ if (value > 0.f) {
714
+ ub = t_;
715
+ } else {
716
+ lb = t_;
717
+ }
718
+ auto derivative = eval_polynomial_deriv(t);
719
+ t_ -= value / derivative;
720
+ }
721
+ }
722
+ // A = 3*sum((-p0+3*p1-3*p2+p3)*(-p0+3*p1-3*p2+p3))
723
+ d_p0 += d_A * 3 * (-1) * 2 * (-p0+3*p1-3*p2+p3);
724
+ d_p1 += d_A * 3 * 3 * 2 * (-p0+3*p1-3*p2+p3);
725
+ d_p2 += d_A * 3 * (-3) * 2 * (-p0+3*p1-3*p2+p3);
726
+ d_p3 += d_A * 3 * 1 * 2 * (-p0+3*p1-3*p2+p3);
727
+ // B = 5*sum((-p0+3*p1-3*p2+p3)*(3*p0-6*p1+3*p2))
728
+ d_p0 += d_B * 5 * ((-1) * (3*p0-6*p1+3*p2) + 3 * (-p0+3*p1-3*p2+p3));
729
+ d_p1 += d_B * 5 * (3 * (3*p0-6*p1+3*p2) + (-6) * (-p0+3*p1-3*p2+p3));
730
+ d_p2 += d_B * 5 * ((-3) * (3*p0-6*p1+3*p2) + 3 * (-p0+3*p1-3*p2+p3));
731
+ d_p3 += d_B * 5 * (3*p0-6*p1+3*p2);
732
+ // C = 4*sum((-p0+3*p1-3*p2+p3)*(-3*p0+3*p1)) + 2*sum((3*p0-6*p1+3*p2)*(3*p0-6*p1+3*p2))
733
+ d_p0 += d_C * 4 * ((-1) * (-3*p0+3*p1) + (-3) * (-p0+3*p1-3*p2+p3)) +
734
+ d_C * 2 * (3 * 2 * (3*p0-6*p1+3*p2));
735
+ d_p1 += d_C * 4 * (3 * (-3*p0+3*p1) + 3 * (-p0+3*p1-3*p2+p3)) +
736
+ d_C * 2 * ((-6) * 2 * (3*p0-6*p1+3*p2));
737
+ d_p2 += d_C * 4 * ((-3) * (-3*p0+3*p1)) +
738
+ d_C * 2 * (3 * 2 * (3*p0-6*p1+3*p2));
739
+ d_p3 += d_C * 4 * (-3*p0+3*p1);
740
+ // D = 3*(sum((3*p0-6*p1+3*p2)*(-3*p0+3*p1)) + sum((-p0+3*p1-3*p2+p3)*(p0-pt)))
741
+ d_p0 += d_D * 3 * (3 * (-3*p0+3*p1) + (-3) * (3*p0-6*p1+3*p2)) +
742
+ d_D * 3 * ((-1) * (p0-pt) + 1 * (-p0+3*p1-3*p2+p3));
743
+ d_p1 += d_D * 3 * ((-6) * (-3*p0+3*p1) + (3) * (3*p0-6*p1+3*p2)) +
744
+ d_D * 3 * (3 * (p0-pt));
745
+ d_p2 += d_D * 3 * (3 * (-3*p0+3*p1)) +
746
+ d_D * 3 * ((-3) * (p0-pt));
747
+ d_pt += d_D * 3 * ((-1) * (-p0+3*p1-3*p2+p3));
748
+ // E = sum((-3*p0+3*p1)*(-3*p0+3*p1)) + 2*sum((p0-pt)*(3*p0-6*p1+3*p2))
749
+ d_p0 += d_E * ((-3) * 2 * (-3*p0+3*p1)) +
750
+ d_E * 2 * (1 * (3*p0-6*p1+3*p2) + 3 * (p0-pt));
751
+ d_p1 += d_E * ( 3 * 2 * (-3*p0+3*p1)) +
752
+ d_E * 2 * ((-6) * (p0-pt));
753
+ d_p2 += d_E * 2 * ( 3 * (p0-pt));
754
+ d_pt += d_E * 2 * ((-1) * (3*p0-6*p1+3*p2));
755
+ // F = sum((p0-pt)*(-3*p0+3*p1))
756
+ d_p0 += d_F * (1 * (-3*p0+3*p1)) +
757
+ d_F * ((-3) * (p0-pt));
758
+ d_p1 += d_F * (3 * (p0-pt));
759
+ d_pt += d_F * ((-1) * (-3*p0+3*p1));
760
+ }
761
+ }
762
+ atomic_add(d_path.points + 2 * i0, d_p0);
763
+ atomic_add(d_path.points + 2 * i1, d_p1);
764
+ atomic_add(d_path.points + 2 * i2, d_p2);
765
+ atomic_add(d_path.points + 2 * i3, d_p3);
766
+ } else {
767
+ assert(false);
768
+ }
769
+ }
770
+
771
+ DEVICE
772
+ inline
773
+ void d_closest_point(const Rect &rect,
774
+ const Vector2f &pt,
775
+ const Vector2f &d_closest_pt,
776
+ Rect &d_rect,
777
+ Vector2f &d_pt) {
778
+ auto dist = [&](const Vector2f &p0, const Vector2f &p1) -> float {
779
+ // project pt to line
780
+ auto t = dot(pt - p0, p1 - p0) / dot(p1 - p0, p1 - p0);
781
+ if (t < 0) {
782
+ return distance(p0, pt);
783
+ } else if (t > 1) {
784
+ return distance(p1, pt);
785
+ } else {
786
+ return distance(p0 + t * (p1 - p0), pt);
787
+ }
788
+ // return 0;
789
+ };
790
+ auto left_top = rect.p_min;
791
+ auto right_top = Vector2f{rect.p_max.x, rect.p_min.y};
792
+ auto left_bottom = Vector2f{rect.p_min.x, rect.p_max.y};
793
+ auto right_bottom = rect.p_max;
794
+ auto left_dist = dist(left_top, left_bottom);
795
+ auto top_dist = dist(left_top, right_top);
796
+ auto right_dist = dist(right_top, right_bottom);
797
+ auto bottom_dist = dist(left_bottom, right_bottom);
798
+ int min_id = 0;
799
+ auto min_dist = left_dist;
800
+ if (top_dist < min_dist) { min_dist = top_dist; min_id = 1; }
801
+ if (right_dist < min_dist) { min_dist = right_dist; min_id = 2; }
802
+ if (bottom_dist < min_dist) { min_dist = bottom_dist; min_id = 3; }
803
+
804
+ auto d_update = [&](const Vector2f &p0, const Vector2f &p1,
805
+ const Vector2f &d_closest_pt,
806
+ Vector2f &d_p0, Vector2f &d_p1) {
807
+ // project pt to line
808
+ auto t = dot(pt - p0, p1 - p0) / dot(p1 - p0, p1 - p0);
809
+ if (t < 0) {
810
+ d_p0 += d_closest_pt;
811
+ } else if (t > 1) {
812
+ d_p1 += d_closest_pt;
813
+ } else {
814
+ // p = p0 + t * (p1 - p0)
815
+ auto d_p = d_closest_pt;
816
+ d_p0 += d_p * (1 - t);
817
+ d_p1 += d_p * t;
818
+ auto d_t = sum(d_p * (p1 - p0));
819
+ // t = dot(pt - p0, p1 - p0) / dot(p1 - p0, p1 - p0)
820
+ auto d_numerator = d_t / dot(p1 - p0, p1 - p0);
821
+ auto d_denominator = d_t * (-t) / dot(p1 - p0, p1 - p0);
822
+ // numerator = dot(pt - p0, p1 - p0)
823
+ d_pt += (p1 - p0) * d_numerator;
824
+ d_p1 += (pt - p0) * d_numerator;
825
+ d_p0 += ((p0 - p1) + (p0 - pt)) * d_numerator;
826
+ // denominator = dot(p1 - p0, p1 - p0)
827
+ d_p1 += 2 * (p1 - p0) * d_denominator;
828
+ d_p0 += 2 * (p0 - p1) * d_denominator;
829
+ }
830
+ };
831
+ auto d_left_top = Vector2f{0, 0};
832
+ auto d_right_top = Vector2f{0, 0};
833
+ auto d_left_bottom = Vector2f{0, 0};
834
+ auto d_right_bottom = Vector2f{0, 0};
835
+ if (min_id == 0) {
836
+ d_update(left_top, left_bottom, d_closest_pt, d_left_top, d_left_bottom);
837
+ } else if (min_id == 1) {
838
+ d_update(left_top, right_top, d_closest_pt, d_left_top, d_right_top);
839
+ } else if (min_id == 2) {
840
+ d_update(right_top, right_bottom, d_closest_pt, d_right_top, d_right_bottom);
841
+ } else {
842
+ assert(min_id == 3);
843
+ d_update(left_bottom, right_bottom, d_closest_pt, d_left_bottom, d_right_bottom);
844
+ }
845
+ auto d_p_min = Vector2f{0, 0};
846
+ auto d_p_max = Vector2f{0, 0};
847
+ // left_top = rect.p_min
848
+ // right_top = Vector2f{rect.p_max.x, rect.p_min.y}
849
+ // left_bottom = Vector2f{rect.p_min.x, rect.p_max.y}
850
+ // right_bottom = rect.p_max
851
+ d_p_min += d_left_top;
852
+ d_p_max.x += d_right_top.x;
853
+ d_p_min.y += d_right_top.y;
854
+ d_p_min.x += d_left_bottom.x;
855
+ d_p_max.y += d_left_bottom.y;
856
+ d_p_max += d_right_bottom;
857
+ atomic_add(d_rect.p_min, d_p_min);
858
+ atomic_add(d_rect.p_max, d_p_max);
859
+ }
860
+
861
+ DEVICE
862
+ inline
863
+ void d_closest_point(const Shape &shape,
864
+ const Vector2f &pt,
865
+ const Vector2f &d_closest_pt,
866
+ const ClosestPointPathInfo &path_info,
867
+ Shape &d_shape,
868
+ Vector2f &d_pt) {
869
+ switch (shape.type) {
870
+ case ShapeType::Circle:
871
+ d_closest_point(*(const Circle *)shape.ptr,
872
+ pt,
873
+ d_closest_pt,
874
+ *(Circle *)d_shape.ptr,
875
+ d_pt);
876
+ break;
877
+ case ShapeType::Ellipse:
878
+ // https://www.geometrictools.com/Documentation/DistancePointEllipseEllipsoid.pdf
879
+ assert(false);
880
+ break;
881
+ case ShapeType::Path:
882
+ d_closest_point(*(const Path *)shape.ptr,
883
+ pt,
884
+ d_closest_pt,
885
+ path_info,
886
+ *(Path *)d_shape.ptr,
887
+ d_pt);
888
+ break;
889
+ case ShapeType::Rect:
890
+ d_closest_point(*(const Rect *)shape.ptr,
891
+ pt,
892
+ d_closest_pt,
893
+ *(Rect *)d_shape.ptr,
894
+ d_pt);
895
+ break;
896
+ }
897
+ }
898
+
899
+ DEVICE
900
+ inline
901
+ void d_compute_distance(const Matrix3x3f &canvas_to_shape,
902
+ const Matrix3x3f &shape_to_canvas,
903
+ const Shape &shape,
904
+ const Vector2f &pt,
905
+ const Vector2f &closest_pt,
906
+ const ClosestPointPathInfo &path_info,
907
+ float d_dist,
908
+ Matrix3x3f &d_shape_to_canvas,
909
+ Shape &d_shape,
910
+ float *d_translation) {
911
+ if (distance_squared(pt, closest_pt) < 1e-10f) {
912
+ // The derivative at distance=0 is undefined
913
+ return;
914
+ }
915
+ assert(isfinite(d_dist));
916
+ // pt is in canvas space, transform it to shape's local space
917
+ auto local_pt = xform_pt(canvas_to_shape, pt);
918
+ auto local_closest_pt = xform_pt(canvas_to_shape, closest_pt);
919
+ // auto local_closest_pt = closest_point(shape, local_pt);
920
+ // auto closest_pt = xform_pt(shape_group.shape_to_canvas, local_closest_pt);
921
+ // auto dist = distance(closest_pt, pt);
922
+ auto d_pt = Vector2f{0, 0};
923
+ auto d_closest_pt = Vector2f{0, 0};
924
+ d_distance(closest_pt, pt, d_dist, d_closest_pt, d_pt);
925
+ assert(isfinite(d_pt));
926
+ assert(isfinite(d_closest_pt));
927
+ // auto closest_pt = xform_pt(shape_group.shape_to_canvas, local_closest_pt);
928
+ auto d_local_closest_pt = Vector2f{0, 0};
929
+ auto d_shape_to_canvas_ = Matrix3x3f();
930
+ d_xform_pt(shape_to_canvas, local_closest_pt, d_closest_pt,
931
+ d_shape_to_canvas_, d_local_closest_pt);
932
+ assert(isfinite(d_local_closest_pt));
933
+ auto d_local_pt = Vector2f{0, 0};
934
+ d_closest_point(shape, local_pt, d_local_closest_pt, path_info, d_shape, d_local_pt);
935
+ assert(isfinite(d_local_pt));
936
+ auto d_canvas_to_shape = Matrix3x3f();
937
+ d_xform_pt(canvas_to_shape,
938
+ pt,
939
+ d_local_pt,
940
+ d_canvas_to_shape,
941
+ d_pt);
942
+ // http://jack.valmadre.net/notes/2016/09/04/back-prop-differentials/#back-propagation-using-differentials
943
+ auto tc2s = transpose(canvas_to_shape);
944
+ d_shape_to_canvas_ += -tc2s * d_canvas_to_shape * tc2s;
945
+ atomic_add(&d_shape_to_canvas(0, 0), d_shape_to_canvas_);
946
+ if (d_translation != nullptr) {
947
+ atomic_add(d_translation, -d_pt);
948
+ }
949
+ }
DiffVG/cuda_utils.h ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #ifdef __CUDACC__
4
+ #include <cuda.h>
5
+ #include <cuda_runtime.h>
6
+ #endif
7
+ #include <cstdio>
8
+ #include <cassert>
9
+ #include <limits>
10
+
11
+ #ifdef __CUDACC__
12
+ #define checkCuda(x) do { if((x)!=cudaSuccess) { \
13
+ printf("CUDA Runtime Error: %s at %s:%d\n",\
14
+ cudaGetErrorString(x),__FILE__,__LINE__);\
15
+ exit(1);}} while(0)
16
+ #endif
17
+
18
+ template <typename T>
19
+ DEVICE
20
+ inline T infinity() {
21
+ #ifdef __CUDA_ARCH__
22
+ const unsigned long long ieee754inf = 0x7ff0000000000000;
23
+ return __longlong_as_double(ieee754inf);
24
+ #else
25
+ return std::numeric_limits<T>::infinity();
26
+ #endif
27
+ }
28
+
29
+ template <>
30
+ DEVICE
31
+ inline double infinity() {
32
+ #ifdef __CUDA_ARCH__
33
+ return __longlong_as_double(0x7ff0000000000000ULL);
34
+ #else
35
+ return std::numeric_limits<double>::infinity();
36
+ #endif
37
+ }
38
+
39
+ template <>
40
+ DEVICE
41
+ inline float infinity() {
42
+ #ifdef __CUDA_ARCH__
43
+ return __int_as_float(0x7f800000);
44
+ #else
45
+ return std::numeric_limits<float>::infinity();
46
+ #endif
47
+ }
48
+
49
+ inline void cuda_synchronize() {
50
+ #ifdef __CUDACC__
51
+ checkCuda(cudaDeviceSynchronize());
52
+ #endif
53
+ }
DiffVG/diffvg.cpp ADDED
@@ -0,0 +1,1792 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include "diffvg.h"
2
+ #include "aabb.h"
3
+ #include "shape.h"
4
+ #include "sample_boundary.h"
5
+ #include "atomic.h"
6
+ #include "cdf.h"
7
+ #include "compute_distance.h"
8
+ #include "cuda_utils.h"
9
+ #include "edge_query.h"
10
+ #include "filter.h"
11
+ #include "matrix.h"
12
+ #include "parallel.h"
13
+ #include "pcg.h"
14
+ #include "ptr.h"
15
+ #include "scene.h"
16
+ #include "vector.h"
17
+ #include "winding_number.h"
18
+ #include "within_distance.h"
19
+ #include <cassert>
20
+ #include <pybind11/pybind11.h>
21
+ #include <pybind11/stl.h>
22
+ #include <thrust/execution_policy.h>
23
+ #include <thrust/sort.h>
24
+
25
+ namespace py = pybind11;
26
+
27
+ struct Command {
28
+ int shape_group_id;
29
+ int shape_id;
30
+ int point_id; // Only used by path
31
+ };
32
+
33
+ DEVICE
34
+ bool is_inside(const SceneData &scene_data,
35
+ int shape_group_id,
36
+ const Vector2f &pt,
37
+ EdgeQuery *edge_query) {
38
+ const ShapeGroup &shape_group = scene_data.shape_groups[shape_group_id];
39
+ // pt is in canvas space, transform it to shape's local space
40
+ auto local_pt = xform_pt(shape_group.canvas_to_shape, pt);
41
+ const auto &bvh_nodes = scene_data.shape_groups_bvh_nodes[shape_group_id];
42
+ const AABB &bbox = bvh_nodes[2 * shape_group.num_shapes - 2].box;
43
+ if (!inside(bbox, local_pt)) {
44
+ return false;
45
+ }
46
+ auto winding_number = 0;
47
+ // Traverse the shape group BVH
48
+ constexpr auto max_bvh_stack_size = 64;
49
+ int bvh_stack[max_bvh_stack_size];
50
+ auto stack_size = 0;
51
+ bvh_stack[stack_size++] = 2 * shape_group.num_shapes - 2;
52
+ while (stack_size > 0) {
53
+ const BVHNode &node = bvh_nodes[bvh_stack[--stack_size]];
54
+ if (node.child1 < 0) {
55
+ // leaf
56
+ auto shape_id = node.child0;
57
+ auto w = compute_winding_number(
58
+ scene_data.shapes[shape_id], scene_data.path_bvhs[shape_id], local_pt);
59
+ winding_number += w;
60
+ if (edge_query != nullptr) {
61
+ if (edge_query->shape_group_id == shape_group_id &&
62
+ edge_query->shape_id == shape_id) {
63
+ if ((shape_group.use_even_odd_rule && abs(w) % 2 == 1) ||
64
+ (!shape_group.use_even_odd_rule && w != 0)) {
65
+ edge_query->hit = true;
66
+ }
67
+ }
68
+ }
69
+ } else {
70
+ assert(node.child0 >= 0 && node.child1 >= 0);
71
+ const AABB &b0 = bvh_nodes[node.child0].box;
72
+ if (inside(b0, local_pt)) {
73
+ bvh_stack[stack_size++] = node.child0;
74
+ }
75
+ const AABB &b1 = bvh_nodes[node.child1].box;
76
+ if (inside(b1, local_pt)) {
77
+ bvh_stack[stack_size++] = node.child1;
78
+ }
79
+ assert(stack_size <= max_bvh_stack_size);
80
+ }
81
+ }
82
+ if (shape_group.use_even_odd_rule) {
83
+ return abs(winding_number) % 2 == 1;
84
+ } else {
85
+ return winding_number != 0;
86
+ }
87
+ }
88
+
89
+ DEVICE void accumulate_boundary_gradient(const Shape &shape,
90
+ float contrib,
91
+ float t,
92
+ const Vector2f &normal,
93
+ const BoundaryData &boundary_data,
94
+ Shape &d_shape,
95
+ const Matrix3x3f &shape_to_canvas,
96
+ const Vector2f &local_boundary_pt,
97
+ Matrix3x3f &d_shape_to_canvas) {
98
+ assert(isfinite(contrib));
99
+ assert(isfinite(normal));
100
+ // According to Reynold transport theorem,
101
+ // the Jacobian of the boundary integral is dot(velocity, normal),
102
+ // where the velocity depends on the variable being differentiated with.
103
+ if (boundary_data.is_stroke) {
104
+ auto has_path_thickness = false;
105
+ if (shape.type == ShapeType::Path) {
106
+ const Path &path = *(const Path *)shape.ptr;
107
+ has_path_thickness = path.thickness != nullptr;
108
+ }
109
+ // differentiate stroke width: velocity is the same as normal
110
+ if (has_path_thickness) {
111
+ Path *d_p = (Path*)d_shape.ptr;
112
+ auto base_point_id = boundary_data.path.base_point_id;
113
+ auto point_id = boundary_data.path.point_id;
114
+ auto t = boundary_data.path.t;
115
+ const Path &path = *(const Path *)shape.ptr;
116
+ if (path.num_control_points[base_point_id] == 0) {
117
+ // Straight line
118
+ auto i0 = point_id;
119
+ auto i1 = (point_id + 1) % path.num_points;
120
+ // r = r0 + t * (r1 - r0)
121
+ atomic_add(&d_p->thickness[i0], (1 - t) * contrib);
122
+ atomic_add(&d_p->thickness[i1], ( t) * contrib);
123
+ } else if (path.num_control_points[base_point_id] == 1) {
124
+ // Quadratic Bezier curve
125
+ auto i0 = point_id;
126
+ auto i1 = point_id + 1;
127
+ auto i2 = (point_id + 2) % path.num_points;
128
+ // r = (1-t)^2r0 + 2(1-t)t r1 + t^2 r2
129
+ atomic_add(&d_p->thickness[i0], square(1 - t) * contrib);
130
+ atomic_add(&d_p->thickness[i1], (2*(1-t)*t) * contrib);
131
+ atomic_add(&d_p->thickness[i2], (t*t) * contrib);
132
+ } else if (path.num_control_points[base_point_id] == 2) {
133
+ auto i0 = point_id;
134
+ auto i1 = point_id + 1;
135
+ auto i2 = point_id + 2;
136
+ auto i3 = (point_id + 3) % path.num_points;
137
+ // r = (1-t)^3r0 + 3*(1-t)^2tr1 + 3*(1-t)t^2r2 + t^3r3
138
+ atomic_add(&d_p->thickness[i0], cubic(1 - t) * contrib);
139
+ atomic_add(&d_p->thickness[i1], 3 * square(1 - t) * t * contrib);
140
+ atomic_add(&d_p->thickness[i2], 3 * (1 - t) * t * t * contrib);
141
+ atomic_add(&d_p->thickness[i3], t * t * t * contrib);
142
+ } else {
143
+ assert(false);
144
+ }
145
+ } else {
146
+ atomic_add(&d_shape.stroke_width, contrib);
147
+ }
148
+ }
149
+ switch (shape.type) {
150
+ case ShapeType::Circle: {
151
+ Circle *d_p = (Circle*)d_shape.ptr;
152
+ // velocity for the center is (1, 0) for x and (0, 1) for y
153
+ atomic_add(&d_p->center[0], normal * contrib);
154
+ // velocity for the radius is the same as the normal
155
+ atomic_add(&d_p->radius, contrib);
156
+ break;
157
+ } case ShapeType::Ellipse: {
158
+ Ellipse *d_p = (Ellipse*)d_shape.ptr;
159
+ // velocity for the center is (1, 0) for x and (0, 1) for y
160
+ atomic_add(&d_p->center[0], normal * contrib);
161
+ // velocity for the radius:
162
+ // x = center.x + r.x * cos(2pi * t)
163
+ // y = center.y + r.y * sin(2pi * t)
164
+ // for r.x: (cos(2pi * t), 0)
165
+ // for r.y: (0, sin(2pi * t))
166
+ atomic_add(&d_p->radius.x, cos(2 * float(M_PI) * t) * normal.x * contrib);
167
+ atomic_add(&d_p->radius.y, sin(2 * float(M_PI) * t) * normal.y * contrib);
168
+ break;
169
+ } case ShapeType::Path: {
170
+ Path *d_p = (Path*)d_shape.ptr;
171
+ auto base_point_id = boundary_data.path.base_point_id;
172
+ auto point_id = boundary_data.path.point_id;
173
+ auto t = boundary_data.path.t;
174
+ const Path &path = *(const Path *)shape.ptr;
175
+ if (path.num_control_points[base_point_id] == 0) {
176
+ // Straight line
177
+ auto i0 = point_id;
178
+ auto i1 = (point_id + 1) % path.num_points;
179
+ // pt = p0 + t * (p1 - p0)
180
+ // velocity for p0.x: (1 - t, 0)
181
+ // p0.y: ( 0, 1 - t)
182
+ // p1.x: ( t, 0)
183
+ // p1.y: ( 0, t)
184
+ atomic_add(&d_p->points[2 * i0 + 0], (1 - t) * normal.x * contrib);
185
+ atomic_add(&d_p->points[2 * i0 + 1], (1 - t) * normal.y * contrib);
186
+ atomic_add(&d_p->points[2 * i1 + 0], ( t) * normal.x * contrib);
187
+ atomic_add(&d_p->points[2 * i1 + 1], ( t) * normal.y * contrib);
188
+ } else if (path.num_control_points[base_point_id] == 1) {
189
+ // Quadratic Bezier curve
190
+ auto i0 = point_id;
191
+ auto i1 = point_id + 1;
192
+ auto i2 = (point_id + 2) % path.num_points;
193
+ // pt = (1-t)^2p0 + 2(1-t)t p1 + t^2 p2
194
+ // velocity for p0.x: ((1-t)^2, 0)
195
+ // p0.y: ( 0, (1-t)^2)
196
+ // p1.x: (2(1-t)t, 0)
197
+ // p1.y: ( 0, 2(1-t)t)
198
+ // p1.x: ( t^2, 0)
199
+ // p1.y: ( 0, t^2)
200
+ atomic_add(&d_p->points[2 * i0 + 0], square(1 - t) * normal.x * contrib);
201
+ atomic_add(&d_p->points[2 * i0 + 1], square(1 - t) * normal.y * contrib);
202
+ atomic_add(&d_p->points[2 * i1 + 0], (2*(1-t)*t) * normal.x * contrib);
203
+ atomic_add(&d_p->points[2 * i1 + 1], (2*(1-t)*t) * normal.y * contrib);
204
+ atomic_add(&d_p->points[2 * i2 + 0], (t*t) * normal.x * contrib);
205
+ atomic_add(&d_p->points[2 * i2 + 1], (t*t) * normal.y * contrib);
206
+ } else if (path.num_control_points[base_point_id] == 2) {
207
+ auto i0 = point_id;
208
+ auto i1 = point_id + 1;
209
+ auto i2 = point_id + 2;
210
+ auto i3 = (point_id + 3) % path.num_points;
211
+ // pt = (1-t)^3p0 + 3*(1-t)^2tp1 + 3*(1-t)t^2p2 + t^3p3
212
+ // velocity for p0.x: ( (1-t)^3, 0)
213
+ // p0.y: ( 0, (1-t)^3)
214
+ // p1.x: (3*(1-t)^2t, 0)
215
+ // p1.y: ( 0, 3*(1-t)^2t)
216
+ // p2.x: (3*(1-t)t^2, 0)
217
+ // p2.y: ( 0, 3*(1-t)t^2)
218
+ // p2.x: ( t^3, 0)
219
+ // p2.y: ( 0, t^3)
220
+ atomic_add(&d_p->points[2 * i0 + 0], cubic(1 - t) * normal.x * contrib);
221
+ atomic_add(&d_p->points[2 * i0 + 1], cubic(1 - t) * normal.y * contrib);
222
+ atomic_add(&d_p->points[2 * i1 + 0], 3 * square(1 - t) * t * normal.x * contrib);
223
+ atomic_add(&d_p->points[2 * i1 + 1], 3 * square(1 - t) * t * normal.y * contrib);
224
+ atomic_add(&d_p->points[2 * i2 + 0], 3 * (1 - t) * t * t * normal.x * contrib);
225
+ atomic_add(&d_p->points[2 * i2 + 1], 3 * (1 - t) * t * t * normal.y * contrib);
226
+ atomic_add(&d_p->points[2 * i3 + 0], t * t * t * normal.x * contrib);
227
+ atomic_add(&d_p->points[2 * i3 + 1], t * t * t * normal.y * contrib);
228
+ } else {
229
+ assert(false);
230
+ }
231
+ break;
232
+ } case ShapeType::Rect: {
233
+ Rect *d_p = (Rect*)d_shape.ptr;
234
+ // The velocity depends on the position of the boundary
235
+ if (normal == Vector2f{-1, 0}) {
236
+ // left
237
+ // velocity for p_min is (1, 0) for x and (0, 0) for y
238
+ atomic_add(&d_p->p_min.x, -contrib);
239
+ } else if (normal == Vector2f{1, 0}) {
240
+ // right
241
+ // velocity for p_max is (1, 0) for x and (0, 0) for y
242
+ atomic_add(&d_p->p_max.x, contrib);
243
+ } else if (normal == Vector2f{0, -1}) {
244
+ // top
245
+ // velocity for p_min is (0, 0) for x and (0, 1) for y
246
+ atomic_add(&d_p->p_min.y, -contrib);
247
+ } else if (normal == Vector2f{0, 1}) {
248
+ // bottom
249
+ // velocity for p_max is (0, 0) for x and (0, 1) for y
250
+ atomic_add(&d_p->p_max.y, contrib);
251
+ } else {
252
+ // incorrect normal assignment?
253
+ assert(false);
254
+ }
255
+ break;
256
+ } default: {
257
+ assert(false);
258
+ break;
259
+ }
260
+ }
261
+ // for shape_to_canvas we have the following relationship:
262
+ // boundary_pt = xform_pt(shape_to_canvas, local_pt)
263
+ // the velocity is the derivative of boundary_pt with respect to shape_to_canvas
264
+ // we can use reverse-mode AD to compute the dot product of the velocity and the Jacobian
265
+ // by passing the normal in d_xform_pt
266
+ auto d_shape_to_canvas_ = Matrix3x3f();
267
+ auto d_local_boundary_pt = Vector2f{0, 0};
268
+ d_xform_pt(shape_to_canvas,
269
+ local_boundary_pt,
270
+ normal * contrib,
271
+ d_shape_to_canvas_,
272
+ d_local_boundary_pt);
273
+ atomic_add(&d_shape_to_canvas(0, 0), d_shape_to_canvas_);
274
+ }
275
+
276
+ DEVICE
277
+ Vector4f sample_color(const ColorType &color_type,
278
+ void *color,
279
+ const Vector2f &pt) {
280
+ switch (color_type) {
281
+ case ColorType::Constant: {
282
+ auto c = (const Constant*)color;
283
+ assert(isfinite(c->color));
284
+ return c->color;
285
+ } case ColorType::LinearGradient: {
286
+ auto c = (const LinearGradient*)color;
287
+ // Project pt to (c->begin, c->end)
288
+ auto beg = c->begin;
289
+ auto end = c->end;
290
+ auto t = dot(pt - beg, end - beg) / max(dot(end - beg, end - beg), 1e-3f);
291
+ // Find the correponding stop:
292
+ if (t < c->stop_offsets[0]) {
293
+ return Vector4f{c->stop_colors[0],
294
+ c->stop_colors[1],
295
+ c->stop_colors[2],
296
+ c->stop_colors[3]};
297
+ }
298
+ for (int i = 0; i < c->num_stops - 1; i++) {
299
+ auto offset_curr = c->stop_offsets[i];
300
+ auto offset_next = c->stop_offsets[i + 1];
301
+ assert(offset_next > offset_curr);
302
+ if (t >= offset_curr && t < offset_next) {
303
+ auto color_curr = Vector4f{
304
+ c->stop_colors[4 * i + 0],
305
+ c->stop_colors[4 * i + 1],
306
+ c->stop_colors[4 * i + 2],
307
+ c->stop_colors[4 * i + 3]};
308
+ auto color_next = Vector4f{
309
+ c->stop_colors[4 * (i + 1) + 0],
310
+ c->stop_colors[4 * (i + 1) + 1],
311
+ c->stop_colors[4 * (i + 1) + 2],
312
+ c->stop_colors[4 * (i + 1) + 3]};
313
+ auto tt = (t - offset_curr) / (offset_next - offset_curr);
314
+ assert(isfinite(tt));
315
+ assert(isfinite(color_curr));
316
+ assert(isfinite(color_next));
317
+ return color_curr * (1 - tt) + color_next * tt;
318
+ }
319
+ }
320
+ return Vector4f{c->stop_colors[4 * (c->num_stops - 1) + 0],
321
+ c->stop_colors[4 * (c->num_stops - 1) + 1],
322
+ c->stop_colors[4 * (c->num_stops - 1) + 2],
323
+ c->stop_colors[4 * (c->num_stops - 1) + 3]};
324
+ } case ColorType::RadialGradient: {
325
+ auto c = (const RadialGradient*)color;
326
+ // Distance from pt to center
327
+ auto offset = pt - c->center;
328
+ auto normalized_offset = offset / c->radius;
329
+ auto t = length(normalized_offset);
330
+ // Find the correponding stop:
331
+ if (t < c->stop_offsets[0]) {
332
+ return Vector4f{c->stop_colors[0],
333
+ c->stop_colors[1],
334
+ c->stop_colors[2],
335
+ c->stop_colors[3]};
336
+ }
337
+ for (int i = 0; i < c->num_stops - 1; i++) {
338
+ auto offset_curr = c->stop_offsets[i];
339
+ auto offset_next = c->stop_offsets[i + 1];
340
+ assert(offset_next > offset_curr);
341
+ if (t >= offset_curr && t < offset_next) {
342
+ auto color_curr = Vector4f{
343
+ c->stop_colors[4 * i + 0],
344
+ c->stop_colors[4 * i + 1],
345
+ c->stop_colors[4 * i + 2],
346
+ c->stop_colors[4 * i + 3]};
347
+ auto color_next = Vector4f{
348
+ c->stop_colors[4 * (i + 1) + 0],
349
+ c->stop_colors[4 * (i + 1) + 1],
350
+ c->stop_colors[4 * (i + 1) + 2],
351
+ c->stop_colors[4 * (i + 1) + 3]};
352
+ auto tt = (t - offset_curr) / (offset_next - offset_curr);
353
+ assert(isfinite(tt));
354
+ assert(isfinite(color_curr));
355
+ assert(isfinite(color_next));
356
+ return color_curr * (1 - tt) + color_next * tt;
357
+ }
358
+ }
359
+ return Vector4f{c->stop_colors[4 * (c->num_stops - 1) + 0],
360
+ c->stop_colors[4 * (c->num_stops - 1) + 1],
361
+ c->stop_colors[4 * (c->num_stops - 1) + 2],
362
+ c->stop_colors[4 * (c->num_stops - 1) + 3]};
363
+ } default: {
364
+ assert(false);
365
+ }
366
+ }
367
+ return Vector4f{};
368
+ }
369
+
370
+ DEVICE
371
+ void d_sample_color(const ColorType &color_type,
372
+ void *color_ptr,
373
+ const Vector2f &pt,
374
+ const Vector4f &d_color,
375
+ void *d_color_ptr,
376
+ float *d_translation) {
377
+ switch (color_type) {
378
+ case ColorType::Constant: {
379
+ auto d_c = (Constant*)d_color_ptr;
380
+ atomic_add(&d_c->color[0], d_color);
381
+ return;
382
+ } case ColorType::LinearGradient: {
383
+ auto c = (const LinearGradient*)color_ptr;
384
+ auto d_c = (LinearGradient*)d_color_ptr;
385
+ // Project pt to (c->begin, c->end)
386
+ auto beg = c->begin;
387
+ auto end = c->end;
388
+ auto t = dot(pt - beg, end - beg) / max(dot(end - beg, end - beg), 1e-3f);
389
+ // Find the correponding stop:
390
+ if (t < c->stop_offsets[0]) {
391
+ atomic_add(&d_c->stop_colors[0], d_color);
392
+ return;
393
+ }
394
+ for (int i = 0; i < c->num_stops - 1; i++) {
395
+ auto offset_curr = c->stop_offsets[i];
396
+ auto offset_next = c->stop_offsets[i + 1];
397
+ assert(offset_next > offset_curr);
398
+ if (t >= offset_curr && t < offset_next) {
399
+ auto color_curr = Vector4f{
400
+ c->stop_colors[4 * i + 0],
401
+ c->stop_colors[4 * i + 1],
402
+ c->stop_colors[4 * i + 2],
403
+ c->stop_colors[4 * i + 3]};
404
+ auto color_next = Vector4f{
405
+ c->stop_colors[4 * (i + 1) + 0],
406
+ c->stop_colors[4 * (i + 1) + 1],
407
+ c->stop_colors[4 * (i + 1) + 2],
408
+ c->stop_colors[4 * (i + 1) + 3]};
409
+ auto tt = (t - offset_curr) / (offset_next - offset_curr);
410
+ // return color_curr * (1 - tt) + color_next * tt;
411
+ auto d_color_curr = d_color * (1 - tt);
412
+ auto d_color_next = d_color * tt;
413
+ auto d_tt = sum(d_color * (color_next - color_curr));
414
+ auto d_offset_next = -d_tt * tt / (offset_next - offset_curr);
415
+ auto d_offset_curr = d_tt * ((tt - 1.f) / (offset_next - offset_curr));
416
+ auto d_t = d_tt / (offset_next - offset_curr);
417
+ assert(isfinite(d_tt));
418
+ atomic_add(&d_c->stop_colors[4 * i], d_color_curr);
419
+ atomic_add(&d_c->stop_colors[4 * (i + 1)], d_color_next);
420
+ atomic_add(&d_c->stop_offsets[i], d_offset_curr);
421
+ atomic_add(&d_c->stop_offsets[i + 1], d_offset_next);
422
+ // auto t = dot(pt - beg, end - beg) / max(dot(end - beg, end - beg), 1e-6f);
423
+ // l = max(dot(end - beg, end - beg), 1e-3f)
424
+ // t = dot(pt - beg, end - beg) / l;
425
+ auto l = max(dot(end - beg, end - beg), 1e-3f);
426
+ auto d_beg = d_t * (-(pt - beg)-(end - beg)) / l;
427
+ auto d_end = d_t * (pt - beg) / l;
428
+ auto d_l = -d_t * t / l;
429
+ if (dot(end - beg, end - beg) > 1e-3f) {
430
+ d_beg += 2 * d_l * (beg - end);
431
+ d_end += 2 * d_l * (end - beg);
432
+ }
433
+ atomic_add(&d_c->begin[0], d_beg);
434
+ atomic_add(&d_c->end[0], d_end);
435
+ if (d_translation != nullptr) {
436
+ atomic_add(d_translation, (d_beg + d_end));
437
+ }
438
+ return;
439
+ }
440
+ }
441
+ atomic_add(&d_c->stop_colors[4 * (c->num_stops - 1)], d_color);
442
+ return;
443
+ } case ColorType::RadialGradient: {
444
+ auto c = (const RadialGradient*)color_ptr;
445
+ auto d_c = (RadialGradient*)d_color_ptr;
446
+ // Distance from pt to center
447
+ auto offset = pt - c->center;
448
+ auto normalized_offset = offset / c->radius;
449
+ auto t = length(normalized_offset);
450
+ // Find the correponding stop:
451
+ if (t < c->stop_offsets[0]) {
452
+ atomic_add(&d_c->stop_colors[0], d_color);
453
+ return;
454
+ }
455
+ for (int i = 0; i < c->num_stops - 1; i++) {
456
+ auto offset_curr = c->stop_offsets[i];
457
+ auto offset_next = c->stop_offsets[i + 1];
458
+ assert(offset_next > offset_curr);
459
+ if (t >= offset_curr && t < offset_next) {
460
+ auto color_curr = Vector4f{
461
+ c->stop_colors[4 * i + 0],
462
+ c->stop_colors[4 * i + 1],
463
+ c->stop_colors[4 * i + 2],
464
+ c->stop_colors[4 * i + 3]};
465
+ auto color_next = Vector4f{
466
+ c->stop_colors[4 * (i + 1) + 0],
467
+ c->stop_colors[4 * (i + 1) + 1],
468
+ c->stop_colors[4 * (i + 1) + 2],
469
+ c->stop_colors[4 * (i + 1) + 3]};
470
+ auto tt = (t - offset_curr) / (offset_next - offset_curr);
471
+ assert(isfinite(tt));
472
+ // return color_curr * (1 - tt) + color_next * tt;
473
+ auto d_color_curr = d_color * (1 - tt);
474
+ auto d_color_next = d_color * tt;
475
+ auto d_tt = sum(d_color * (color_next - color_curr));
476
+ auto d_offset_next = -d_tt * tt / (offset_next - offset_curr);
477
+ auto d_offset_curr = d_tt * ((tt - 1.f) / (offset_next - offset_curr));
478
+ auto d_t = d_tt / (offset_next - offset_curr);
479
+ assert(isfinite(d_t));
480
+ atomic_add(&d_c->stop_colors[4 * i], d_color_curr);
481
+ atomic_add(&d_c->stop_colors[4 * (i + 1)], d_color_next);
482
+ atomic_add(&d_c->stop_offsets[i], d_offset_curr);
483
+ atomic_add(&d_c->stop_offsets[i + 1], d_offset_next);
484
+ // offset = pt - c->center
485
+ // normalized_offset = offset / c->radius
486
+ // t = length(normalized_offset)
487
+ auto d_normalized_offset = d_length(normalized_offset, d_t);
488
+ auto d_offset = d_normalized_offset / c->radius;
489
+ auto d_radius = -d_normalized_offset * offset / (c->radius * c->radius);
490
+ auto d_center = -d_offset;
491
+ atomic_add(&d_c->center[0], d_center);
492
+ atomic_add(&d_c->radius[0], d_radius);
493
+ if (d_translation != nullptr) {
494
+ atomic_add(d_translation, d_center);
495
+ }
496
+ }
497
+ }
498
+ atomic_add(&d_c->stop_colors[4 * (c->num_stops - 1)], d_color);
499
+ return;
500
+ } default: {
501
+ assert(false);
502
+ }
503
+ }
504
+ }
505
+
506
+ struct Fragment {
507
+ Vector3f color;
508
+ float alpha;
509
+ int group_id;
510
+ bool is_stroke;
511
+ };
512
+
513
+ struct PrefilterFragment {
514
+ Vector3f color;
515
+ float alpha;
516
+ int group_id;
517
+ bool is_stroke;
518
+ int shape_id;
519
+ float distance;
520
+ Vector2f closest_pt;
521
+ ClosestPointPathInfo path_info;
522
+ bool within_distance;
523
+ };
524
+
525
+ DEVICE
526
+ Vector4f sample_color(const SceneData &scene,
527
+ const Vector4f *background_color,
528
+ const Vector2f &screen_pt,
529
+ const Vector4f *d_color = nullptr,
530
+ EdgeQuery *edge_query = nullptr,
531
+ Vector4f *d_background_color = nullptr,
532
+ float *d_translation = nullptr) {
533
+ if (edge_query != nullptr) {
534
+ edge_query->hit = false;
535
+ }
536
+
537
+ // screen_pt is in screen space ([0, 1), [0, 1)),
538
+ // need to transform to canvas space
539
+ auto pt = screen_pt;
540
+ pt.x *= scene.canvas_width;
541
+ pt.y *= scene.canvas_height;
542
+ constexpr auto max_hit_shapes = 256;
543
+ constexpr auto max_bvh_stack_size = 64;
544
+ Fragment fragments[max_hit_shapes];
545
+ int bvh_stack[max_bvh_stack_size];
546
+ auto stack_size = 0;
547
+ auto num_fragments = 0;
548
+ bvh_stack[stack_size++] = 2 * scene.num_shape_groups - 2;
549
+ while (stack_size > 0) {
550
+ const BVHNode &node = scene.bvh_nodes[bvh_stack[--stack_size]];
551
+ if (node.child1 < 0) {
552
+ // leaf
553
+ auto group_id = node.child0;
554
+ const ShapeGroup &shape_group = scene.shape_groups[group_id];
555
+ if (shape_group.stroke_color != nullptr) {
556
+ if (within_distance(scene, group_id, pt, edge_query)) {
557
+ auto color_alpha = sample_color(shape_group.stroke_color_type,
558
+ shape_group.stroke_color,
559
+ pt);
560
+ Fragment f;
561
+ f.color = Vector3f{color_alpha[0], color_alpha[1], color_alpha[2]};
562
+ f.alpha = color_alpha[3];
563
+ f.group_id = group_id;
564
+ f.is_stroke = true;
565
+ assert(num_fragments < max_hit_shapes);
566
+ fragments[num_fragments++] = f;
567
+ }
568
+ }
569
+ if (shape_group.fill_color != nullptr) {
570
+ if (is_inside(scene, group_id, pt, edge_query)) {
571
+ auto color_alpha = sample_color(shape_group.fill_color_type,
572
+ shape_group.fill_color,
573
+ pt);
574
+ Fragment f;
575
+ f.color = Vector3f{color_alpha[0], color_alpha[1], color_alpha[2]};
576
+ f.alpha = color_alpha[3];
577
+ f.group_id = group_id;
578
+ f.is_stroke = false;
579
+ assert(num_fragments < max_hit_shapes);
580
+ fragments[num_fragments++] = f;
581
+ }
582
+ }
583
+ } else {
584
+ assert(node.child0 >= 0 && node.child1 >= 0);
585
+ const AABB &b0 = scene.bvh_nodes[node.child0].box;
586
+ if (inside(b0, pt, scene.bvh_nodes[node.child0].max_radius)) {
587
+ bvh_stack[stack_size++] = node.child0;
588
+ }
589
+ const AABB &b1 = scene.bvh_nodes[node.child1].box;
590
+ if (inside(b1, pt, scene.bvh_nodes[node.child1].max_radius)) {
591
+ bvh_stack[stack_size++] = node.child1;
592
+ }
593
+ assert(stack_size <= max_bvh_stack_size);
594
+ }
595
+ }
596
+ if (num_fragments <= 0) {
597
+ if (background_color != nullptr) {
598
+ if (d_background_color != nullptr) {
599
+ *d_background_color = *d_color;
600
+ }
601
+ return *background_color;
602
+ }
603
+ return Vector4f{0, 0, 0, 0};
604
+ }
605
+ // Sort the fragments from back to front (i.e. increasing order of group id)
606
+ // https://github.com/frigaut/yorick-imutil/blob/master/insort.c#L37
607
+ for (int i = 1; i < num_fragments; i++) {
608
+ auto j = i;
609
+ auto temp = fragments[j];
610
+ while (j > 0 && fragments[j - 1].group_id > temp.group_id) {
611
+ fragments[j] = fragments[j - 1];
612
+ j--;
613
+ }
614
+ fragments[j] = temp;
615
+ }
616
+ // Blend the color
617
+ Vector3f accum_color[max_hit_shapes];
618
+ float accum_alpha[max_hit_shapes];
619
+ // auto hit_opaque = false;
620
+ auto first_alpha = 0.f;
621
+ auto first_color = Vector3f{0, 0, 0};
622
+ if (background_color != nullptr) {
623
+ first_alpha = background_color->w;
624
+ first_color = Vector3f{background_color->x,
625
+ background_color->y,
626
+ background_color->z};
627
+ }
628
+ for (int i = 0; i < num_fragments; i++) {
629
+ const Fragment &fragment = fragments[i];
630
+ auto new_color = fragment.color;
631
+ auto new_alpha = fragment.alpha;
632
+ auto prev_alpha = i > 0 ? accum_alpha[i - 1] : first_alpha;
633
+ auto prev_color = i > 0 ? accum_color[i - 1] : first_color;
634
+ if (edge_query != nullptr) {
635
+ // Do we hit the target shape?
636
+ if (new_alpha >= 1.f && edge_query->hit) {
637
+ // A fully opaque shape in front of the target occludes it
638
+ edge_query->hit = false;
639
+ }
640
+ if (edge_query->shape_group_id == fragment.group_id) {
641
+ edge_query->hit = true;
642
+ }
643
+ }
644
+ // prev_color is alpha premultiplied, don't need to multiply with
645
+ // prev_alpha
646
+ accum_color[i] = prev_color * (1 - new_alpha) + new_alpha * new_color;
647
+ accum_alpha[i] = prev_alpha * (1 - new_alpha) + new_alpha;
648
+ }
649
+ auto final_color = accum_color[num_fragments - 1];
650
+ auto final_alpha = accum_alpha[num_fragments - 1];
651
+ if (final_alpha > 1e-6f) {
652
+ final_color /= final_alpha;
653
+ }
654
+ assert(isfinite(final_color));
655
+ assert(isfinite(final_alpha));
656
+ if (d_color != nullptr) {
657
+ // Backward pass
658
+ auto d_final_color = Vector3f{(*d_color)[0], (*d_color)[1], (*d_color)[2]};
659
+ auto d_final_alpha = (*d_color)[3];
660
+ auto d_curr_color = d_final_color;
661
+ auto d_curr_alpha = d_final_alpha;
662
+ if (final_alpha > 1e-6f) {
663
+ // final_color = curr_color / final_alpha
664
+ d_curr_color = d_final_color / final_alpha;
665
+ d_curr_alpha -= sum(d_final_color * final_color) / final_alpha;
666
+ }
667
+ assert(isfinite(*d_color));
668
+ assert(isfinite(d_curr_color));
669
+ assert(isfinite(d_curr_alpha));
670
+ for (int i = num_fragments - 1; i >= 0; i--) {
671
+ // color[n] = prev_color * (1 - new_alpha) + new_alpha * new_color;
672
+ // alpha[n] = prev_alpha * (1 - new_alpha) + new_alpha;
673
+ auto prev_alpha = i > 0 ? accum_alpha[i - 1] : first_alpha;
674
+ auto prev_color = i > 0 ? accum_color[i - 1] : first_color;
675
+ auto d_prev_alpha = d_curr_alpha * (1.f - fragments[i].alpha);
676
+ auto d_alpha_i = d_curr_alpha * (1.f - prev_alpha);
677
+ d_alpha_i += sum(d_curr_color * (fragments[i].color - prev_color));
678
+ auto d_prev_color = d_curr_color * (1 - fragments[i].alpha);
679
+ auto d_color_i = d_curr_color * fragments[i].alpha;
680
+ auto group_id = fragments[i].group_id;
681
+ if (fragments[i].is_stroke) {
682
+ d_sample_color(scene.shape_groups[group_id].stroke_color_type,
683
+ scene.shape_groups[group_id].stroke_color,
684
+ pt,
685
+ Vector4f{d_color_i[0], d_color_i[1], d_color_i[2], d_alpha_i},
686
+ scene.d_shape_groups[group_id].stroke_color,
687
+ d_translation);
688
+ } else {
689
+ d_sample_color(scene.shape_groups[group_id].fill_color_type,
690
+ scene.shape_groups[group_id].fill_color,
691
+ pt,
692
+ Vector4f{d_color_i[0], d_color_i[1], d_color_i[2], d_alpha_i},
693
+ scene.d_shape_groups[group_id].fill_color,
694
+ d_translation);
695
+ }
696
+ d_curr_color = d_prev_color;
697
+ d_curr_alpha = d_prev_alpha;
698
+ }
699
+ if (d_background_color != nullptr) {
700
+ d_background_color->x += d_curr_color.x;
701
+ d_background_color->y += d_curr_color.y;
702
+ d_background_color->z += d_curr_color.z;
703
+ d_background_color->w += d_curr_alpha;
704
+ }
705
+ }
706
+ return Vector4f{final_color[0], final_color[1], final_color[2], final_alpha};
707
+ }
708
+
709
+ DEVICE
710
+ float sample_distance(const SceneData &scene,
711
+ const Vector2f &screen_pt,
712
+ float weight,
713
+ const float *d_dist = nullptr,
714
+ float *d_translation = nullptr) {
715
+ // screen_pt is in screen space ([0, 1), [0, 1)),
716
+ // need to transform to canvas space
717
+ auto pt = screen_pt;
718
+ pt.x *= scene.canvas_width;
719
+ pt.y *= scene.canvas_height;
720
+ // for each shape
721
+ auto min_group_id = -1;
722
+ auto min_distance = 0.f;
723
+ auto min_shape_id = -1;
724
+ auto closest_pt = Vector2f{0, 0};
725
+ auto min_path_info = ClosestPointPathInfo{-1, -1, 0};
726
+ for (int group_id = scene.num_shape_groups - 1; group_id >= 0; group_id--) {
727
+ auto s = -1;
728
+ auto p = Vector2f{0, 0};
729
+ ClosestPointPathInfo local_path_info;
730
+ auto d = infinity<float>();
731
+ if (compute_distance(scene, group_id, pt, infinity<float>(), &s, &p, &local_path_info, &d)) {
732
+ if (min_group_id == -1 || d < min_distance) {
733
+ min_distance = d;
734
+ min_group_id = group_id;
735
+ min_shape_id = s;
736
+ closest_pt = p;
737
+ min_path_info = local_path_info;
738
+ }
739
+ }
740
+ }
741
+ if (min_group_id == -1) {
742
+ return min_distance;
743
+ }
744
+ min_distance *= weight;
745
+ auto inside = false;
746
+ const ShapeGroup &shape_group = scene.shape_groups[min_group_id];
747
+ if (shape_group.fill_color != nullptr) {
748
+ inside = is_inside(scene,
749
+ min_group_id,
750
+ pt,
751
+ nullptr);
752
+ if (inside) {
753
+ min_distance = -min_distance;
754
+ }
755
+ }
756
+ assert((min_group_id >= 0 && min_shape_id >= 0) || scene.num_shape_groups == 0);
757
+ if (d_dist != nullptr) {
758
+ auto d_abs_dist = inside ? -(*d_dist) : (*d_dist);
759
+ const ShapeGroup &shape_group = scene.shape_groups[min_group_id];
760
+ const Shape &shape = scene.shapes[min_shape_id];
761
+ ShapeGroup &d_shape_group = scene.d_shape_groups[min_group_id];
762
+ Shape &d_shape = scene.d_shapes[min_shape_id];
763
+ d_compute_distance(shape_group.canvas_to_shape,
764
+ shape_group.shape_to_canvas,
765
+ shape,
766
+ pt,
767
+ closest_pt,
768
+ min_path_info,
769
+ d_abs_dist,
770
+ d_shape_group.shape_to_canvas,
771
+ d_shape,
772
+ d_translation);
773
+ }
774
+ return min_distance;
775
+ }
776
+
777
+ // Gather d_color from d_image inside the filter kernel, normalize by
778
+ // weight_image.
779
+ DEVICE
780
+ Vector4f gather_d_color(const Filter &filter,
781
+ const float *d_color_image,
782
+ const float *weight_image,
783
+ int width,
784
+ int height,
785
+ const Vector2f &pt) {
786
+ auto x = int(pt.x);
787
+ auto y = int(pt.y);
788
+ auto radius = filter.radius;
789
+ assert(radius > 0);
790
+ auto ri = (int)ceil(radius);
791
+ auto d_color = Vector4f{0, 0, 0, 0};
792
+ for (int dy = -ri; dy <= ri; dy++) {
793
+ for (int dx = -ri; dx <= ri; dx++) {
794
+ auto xx = x + dx;
795
+ auto yy = y + dy;
796
+ if (xx >= 0 && xx < width && yy >= 0 && yy < height) {
797
+ auto xc = xx + 0.5f;
798
+ auto yc = yy + 0.5f;
799
+ auto filter_weight =
800
+ compute_filter_weight(filter, xc - pt.x, yc - pt.y);
801
+ // pixel = \sum weight * color / \sum weight
802
+ auto weight_sum = weight_image[yy * width + xx];
803
+ if (weight_sum > 0) {
804
+ d_color += (filter_weight / weight_sum) * Vector4f{
805
+ d_color_image[4 * (yy * width + xx) + 0],
806
+ d_color_image[4 * (yy * width + xx) + 1],
807
+ d_color_image[4 * (yy * width + xx) + 2],
808
+ d_color_image[4 * (yy * width + xx) + 3],
809
+ };
810
+ }
811
+ }
812
+ }
813
+ }
814
+ return d_color;
815
+ }
816
+
817
+ DEVICE
818
+ float smoothstep(float d) {
819
+ auto t = clamp((d + 1.f) / 2.f, 0.f, 1.f);
820
+ return t * t * (3 - 2 * t);
821
+ }
822
+
823
+ DEVICE
824
+ float d_smoothstep(float d, float d_ret) {
825
+ if (d < -1.f || d > 1.f) {
826
+ return 0.f;
827
+ }
828
+ auto t = (d + 1.f) / 2.f;
829
+ // ret = t * t * (3 - 2 * t)
830
+ // = 3 * t * t - 2 * t * t * t
831
+ auto d_t = d_ret * (6 * t - 6 * t * t);
832
+ return d_t / 2.f;
833
+ }
834
+
835
+ DEVICE
836
+ Vector4f sample_color_prefiltered(const SceneData &scene,
837
+ const Vector4f *background_color,
838
+ const Vector2f &screen_pt,
839
+ const Vector4f *d_color = nullptr,
840
+ Vector4f *d_background_color = nullptr,
841
+ float *d_translation = nullptr) {
842
+ // screen_pt is in screen space ([0, 1), [0, 1)),
843
+ // need to transform to canvas space
844
+ auto pt = screen_pt;
845
+ pt.x *= scene.canvas_width;
846
+ pt.y *= scene.canvas_height;
847
+ constexpr auto max_hit_shapes = 64;
848
+ constexpr auto max_bvh_stack_size = 64;
849
+ PrefilterFragment fragments[max_hit_shapes];
850
+ int bvh_stack[max_bvh_stack_size];
851
+ auto stack_size = 0;
852
+ auto num_fragments = 0;
853
+ bvh_stack[stack_size++] = 2 * scene.num_shape_groups - 2;
854
+ while (stack_size > 0) {
855
+ const BVHNode &node = scene.bvh_nodes[bvh_stack[--stack_size]];
856
+ if (node.child1 < 0) {
857
+ // leaf
858
+ auto group_id = node.child0;
859
+ const ShapeGroup &shape_group = scene.shape_groups[group_id];
860
+ if (shape_group.stroke_color != nullptr) {
861
+ auto min_shape_id = -1;
862
+ auto closest_pt = Vector2f{0, 0};
863
+ auto local_path_info = ClosestPointPathInfo{-1, -1, 0};
864
+ auto d = infinity<float>();
865
+ compute_distance(scene, group_id, pt, infinity<float>(),
866
+ &min_shape_id, &closest_pt, &local_path_info, &d);
867
+ assert(min_shape_id != -1);
868
+ const auto &shape = scene.shapes[min_shape_id];
869
+ auto w = smoothstep(fabs(d) + shape.stroke_width) -
870
+ smoothstep(fabs(d) - shape.stroke_width);
871
+ if (w > 0) {
872
+ auto color_alpha = sample_color(shape_group.stroke_color_type,
873
+ shape_group.stroke_color,
874
+ pt);
875
+ color_alpha[3] *= w;
876
+
877
+ PrefilterFragment f;
878
+ f.color = Vector3f{color_alpha[0], color_alpha[1], color_alpha[2]};
879
+ f.alpha = color_alpha[3];
880
+ f.group_id = group_id;
881
+ f.shape_id = min_shape_id;
882
+ f.distance = d;
883
+ f.closest_pt = closest_pt;
884
+ f.is_stroke = true;
885
+ f.path_info = local_path_info;
886
+ f.within_distance = true;
887
+ assert(num_fragments < max_hit_shapes);
888
+ fragments[num_fragments++] = f;
889
+ }
890
+ }
891
+ if (shape_group.fill_color != nullptr) {
892
+ auto min_shape_id = -1;
893
+ auto closest_pt = Vector2f{0, 0};
894
+ auto local_path_info = ClosestPointPathInfo{-1, -1, 0};
895
+ auto d = infinity<float>();
896
+ auto found = compute_distance(scene,
897
+ group_id,
898
+ pt,
899
+ 1.f,
900
+ &min_shape_id,
901
+ &closest_pt,
902
+ &local_path_info,
903
+ &d);
904
+ auto inside = is_inside(scene, group_id, pt, nullptr);
905
+ if (found || inside) {
906
+ if (!inside) {
907
+ d = -d;
908
+ }
909
+ auto w = smoothstep(d);
910
+ if (w > 0) {
911
+ auto color_alpha = sample_color(shape_group.fill_color_type,
912
+ shape_group.fill_color,
913
+ pt);
914
+ color_alpha[3] *= w;
915
+
916
+ PrefilterFragment f;
917
+ f.color = Vector3f{color_alpha[0], color_alpha[1], color_alpha[2]};
918
+ f.alpha = color_alpha[3];
919
+ f.group_id = group_id;
920
+ f.shape_id = min_shape_id;
921
+ f.distance = d;
922
+ f.closest_pt = closest_pt;
923
+ f.is_stroke = false;
924
+ f.path_info = local_path_info;
925
+ f.within_distance = found;
926
+ assert(num_fragments < max_hit_shapes);
927
+ fragments[num_fragments++] = f;
928
+ }
929
+ }
930
+ }
931
+ } else {
932
+ assert(node.child0 >= 0 && node.child1 >= 0);
933
+ const AABB &b0 = scene.bvh_nodes[node.child0].box;
934
+ if (inside(b0, pt, scene.bvh_nodes[node.child0].max_radius)) {
935
+ bvh_stack[stack_size++] = node.child0;
936
+ }
937
+ const AABB &b1 = scene.bvh_nodes[node.child1].box;
938
+ if (inside(b1, pt, scene.bvh_nodes[node.child1].max_radius)) {
939
+ bvh_stack[stack_size++] = node.child1;
940
+ }
941
+ assert(stack_size <= max_bvh_stack_size);
942
+ }
943
+ }
944
+ if (num_fragments <= 0) {
945
+ if (background_color != nullptr) {
946
+ if (d_background_color != nullptr) {
947
+ *d_background_color = *d_color;
948
+ }
949
+ return *background_color;
950
+ }
951
+ return Vector4f{0, 0, 0, 0};
952
+ }
953
+ // Sort the fragments from back to front (i.e. increasing order of group id)
954
+ // https://github.com/frigaut/yorick-imutil/blob/master/insort.c#L37
955
+ for (int i = 1; i < num_fragments; i++) {
956
+ auto j = i;
957
+ auto temp = fragments[j];
958
+ while (j > 0 && fragments[j - 1].group_id > temp.group_id) {
959
+ fragments[j] = fragments[j - 1];
960
+ j--;
961
+ }
962
+ fragments[j] = temp;
963
+ }
964
+ // Blend the color
965
+ Vector3f accum_color[max_hit_shapes];
966
+ float accum_alpha[max_hit_shapes];
967
+ auto first_alpha = 0.f;
968
+ auto first_color = Vector3f{0, 0, 0};
969
+ if (background_color != nullptr) {
970
+ first_alpha = background_color->w;
971
+ first_color = Vector3f{background_color->x,
972
+ background_color->y,
973
+ background_color->z};
974
+ }
975
+ for (int i = 0; i < num_fragments; i++) {
976
+ const PrefilterFragment &fragment = fragments[i];
977
+ auto new_color = fragment.color;
978
+ auto new_alpha = fragment.alpha;
979
+ auto prev_alpha = i > 0 ? accum_alpha[i - 1] : first_alpha;
980
+ auto prev_color = i > 0 ? accum_color[i - 1] : first_color;
981
+ // prev_color is alpha premultiplied, don't need to multiply with
982
+ // prev_alpha
983
+ accum_color[i] = prev_color * (1 - new_alpha) + new_alpha * new_color;
984
+ accum_alpha[i] = prev_alpha * (1 - new_alpha) + new_alpha;
985
+ }
986
+ auto final_color = accum_color[num_fragments - 1];
987
+ auto final_alpha = accum_alpha[num_fragments - 1];
988
+ if (final_alpha > 1e-6f) {
989
+ final_color /= final_alpha;
990
+ }
991
+ assert(isfinite(final_color));
992
+ assert(isfinite(final_alpha));
993
+ if (d_color != nullptr) {
994
+ // Backward pass
995
+ auto d_final_color = Vector3f{(*d_color)[0], (*d_color)[1], (*d_color)[2]};
996
+ auto d_final_alpha = (*d_color)[3];
997
+ auto d_curr_color = d_final_color;
998
+ auto d_curr_alpha = d_final_alpha;
999
+ if (final_alpha > 1e-6f) {
1000
+ // final_color = curr_color / final_alpha
1001
+ d_curr_color = d_final_color / final_alpha;
1002
+ d_curr_alpha -= sum(d_final_color * final_color) / final_alpha;
1003
+ }
1004
+ assert(isfinite(*d_color));
1005
+ assert(isfinite(d_curr_color));
1006
+ assert(isfinite(d_curr_alpha));
1007
+ for (int i = num_fragments - 1; i >= 0; i--) {
1008
+ // color[n] = prev_color * (1 - new_alpha) + new_alpha * new_color;
1009
+ // alpha[n] = prev_alpha * (1 - new_alpha) + new_alpha;
1010
+ auto prev_alpha = i > 0 ? accum_alpha[i - 1] : first_alpha;
1011
+ auto prev_color = i > 0 ? accum_color[i - 1] : first_color;
1012
+ auto d_prev_alpha = d_curr_alpha * (1.f - fragments[i].alpha);
1013
+ auto d_alpha_i = d_curr_alpha * (1.f - prev_alpha);
1014
+ d_alpha_i += sum(d_curr_color * (fragments[i].color - prev_color));
1015
+ auto d_prev_color = d_curr_color * (1 - fragments[i].alpha);
1016
+ auto d_color_i = d_curr_color * fragments[i].alpha;
1017
+ auto group_id = fragments[i].group_id;
1018
+ if (fragments[i].is_stroke) {
1019
+ const auto &shape = scene.shapes[fragments[i].shape_id];
1020
+ auto d = fragments[i].distance;
1021
+ auto abs_d_plus_width = fabs(d) + shape.stroke_width;
1022
+ auto abs_d_minus_width = fabs(d) - shape.stroke_width;
1023
+ auto w = smoothstep(abs_d_plus_width) -
1024
+ smoothstep(abs_d_minus_width);
1025
+ if (w != 0) {
1026
+ auto d_w = w > 0 ? (fragments[i].alpha / w) * d_alpha_i : 0.f;
1027
+ d_alpha_i *= w;
1028
+
1029
+ // Backprop to color
1030
+ d_sample_color(scene.shape_groups[group_id].stroke_color_type,
1031
+ scene.shape_groups[group_id].stroke_color,
1032
+ pt,
1033
+ Vector4f{d_color_i[0], d_color_i[1], d_color_i[2], d_alpha_i},
1034
+ scene.d_shape_groups[group_id].stroke_color,
1035
+ d_translation);
1036
+
1037
+ auto d_abs_d_plus_width = d_smoothstep(abs_d_plus_width, d_w);
1038
+ auto d_abs_d_minus_width = -d_smoothstep(abs_d_minus_width, d_w);
1039
+
1040
+ auto d_d = d_abs_d_plus_width + d_abs_d_minus_width;
1041
+ if (d < 0) {
1042
+ d_d = -d_d;
1043
+ }
1044
+ auto d_stroke_width = d_abs_d_plus_width - d_abs_d_minus_width;
1045
+
1046
+ const auto &shape_group = scene.shape_groups[group_id];
1047
+ ShapeGroup &d_shape_group = scene.d_shape_groups[group_id];
1048
+ Shape &d_shape = scene.d_shapes[fragments[i].shape_id];
1049
+ if (fabs(d_d) > 1e-10f) {
1050
+ d_compute_distance(shape_group.canvas_to_shape,
1051
+ shape_group.shape_to_canvas,
1052
+ shape,
1053
+ pt,
1054
+ fragments[i].closest_pt,
1055
+ fragments[i].path_info,
1056
+ d_d,
1057
+ d_shape_group.shape_to_canvas,
1058
+ d_shape,
1059
+ d_translation);
1060
+ }
1061
+ atomic_add(&d_shape.stroke_width, d_stroke_width);
1062
+ }
1063
+ } else {
1064
+ const auto &shape = scene.shapes[fragments[i].shape_id];
1065
+ auto d = fragments[i].distance;
1066
+ auto w = smoothstep(d);
1067
+ if (w != 0) {
1068
+ // color_alpha[3] = color_alpha[3] * w;
1069
+ auto d_w = w > 0 ? (fragments[i].alpha / w) * d_alpha_i : 0.f;
1070
+ d_alpha_i *= w;
1071
+
1072
+ d_sample_color(scene.shape_groups[group_id].fill_color_type,
1073
+ scene.shape_groups[group_id].fill_color,
1074
+ pt,
1075
+ Vector4f{d_color_i[0], d_color_i[1], d_color_i[2], d_alpha_i},
1076
+ scene.d_shape_groups[group_id].fill_color,
1077
+ d_translation);
1078
+
1079
+ // w = smoothstep(d)
1080
+ auto d_d = d_smoothstep(d, d_w);
1081
+ if (d < 0) {
1082
+ d_d = -d_d;
1083
+ }
1084
+
1085
+ const auto &shape_group = scene.shape_groups[group_id];
1086
+ ShapeGroup &d_shape_group = scene.d_shape_groups[group_id];
1087
+ Shape &d_shape = scene.d_shapes[fragments[i].shape_id];
1088
+ if (fabs(d_d) > 1e-10f && fragments[i].within_distance) {
1089
+ d_compute_distance(shape_group.canvas_to_shape,
1090
+ shape_group.shape_to_canvas,
1091
+ shape,
1092
+ pt,
1093
+ fragments[i].closest_pt,
1094
+ fragments[i].path_info,
1095
+ d_d,
1096
+ d_shape_group.shape_to_canvas,
1097
+ d_shape,
1098
+ d_translation);
1099
+ }
1100
+ }
1101
+ }
1102
+ d_curr_color = d_prev_color;
1103
+ d_curr_alpha = d_prev_alpha;
1104
+ }
1105
+ if (d_background_color != nullptr) {
1106
+ d_background_color->x += d_curr_color.x;
1107
+ d_background_color->y += d_curr_color.y;
1108
+ d_background_color->z += d_curr_color.z;
1109
+ d_background_color->w += d_curr_alpha;
1110
+ }
1111
+ }
1112
+ return Vector4f{final_color[0], final_color[1], final_color[2], final_alpha};
1113
+ }
1114
+
1115
+ struct weight_kernel {
1116
+ DEVICE void operator()(int idx) {
1117
+ auto rng_state = init_pcg32(idx, seed);
1118
+ // height * width * num_samples_y * num_samples_x
1119
+ auto sx = idx % num_samples_x;
1120
+ auto sy = (idx / num_samples_x) % num_samples_y;
1121
+ auto x = (idx / (num_samples_x * num_samples_y)) % width;
1122
+ auto y = (idx / (num_samples_x * num_samples_y * width));
1123
+ assert(y < height);
1124
+ auto rx = next_pcg32_float(&rng_state);
1125
+ auto ry = next_pcg32_float(&rng_state);
1126
+ if (use_prefiltering) {
1127
+ rx = ry = 0.5f;
1128
+ }
1129
+ auto pt = Vector2f{x + ((float)sx + rx) / num_samples_x,
1130
+ y + ((float)sy + ry) / num_samples_y};
1131
+ auto radius = scene.filter->radius;
1132
+ assert(radius >= 0);
1133
+ auto ri = (int)ceil(radius);
1134
+ for (int dy = -ri; dy <= ri; dy++) {
1135
+ for (int dx = -ri; dx <= ri; dx++) {
1136
+ auto xx = x + dx;
1137
+ auto yy = y + dy;
1138
+ if (xx >= 0 && xx < width && yy >= 0 && yy < height) {
1139
+ auto xc = xx + 0.5f;
1140
+ auto yc = yy + 0.5f;
1141
+ auto filter_weight = compute_filter_weight(*scene.filter,
1142
+ xc - pt.x,
1143
+ yc - pt.y);
1144
+ atomic_add(weight_image[yy * width + xx], filter_weight);
1145
+ }
1146
+ }
1147
+ }
1148
+ }
1149
+
1150
+ SceneData scene;
1151
+ float *weight_image;
1152
+ int width;
1153
+ int height;
1154
+ int num_samples_x;
1155
+ int num_samples_y;
1156
+ uint64_t seed;
1157
+ bool use_prefiltering;
1158
+ };
1159
+
1160
+ // We use a "mega kernel" for rendering
1161
+ struct render_kernel {
1162
+ DEVICE void operator()(int idx) {
1163
+ // height * width * num_samples_y * num_samples_x
1164
+ auto pt = Vector2f{0, 0};
1165
+ auto x = 0;
1166
+ auto y = 0;
1167
+ if (eval_positions == nullptr) {
1168
+ auto rng_state = init_pcg32(idx, seed);
1169
+ auto sx = idx % num_samples_x;
1170
+ auto sy = (idx / num_samples_x) % num_samples_y;
1171
+ x = (idx / (num_samples_x * num_samples_y)) % width;
1172
+ y = (idx / (num_samples_x * num_samples_y * width));
1173
+ assert(x < width && y < height);
1174
+ auto rx = next_pcg32_float(&rng_state);
1175
+ auto ry = next_pcg32_float(&rng_state);
1176
+ if (use_prefiltering) {
1177
+ rx = ry = 0.5f;
1178
+ }
1179
+ pt = Vector2f{x + ((float)sx + rx) / num_samples_x,
1180
+ y + ((float)sy + ry) / num_samples_y};
1181
+ } else {
1182
+ pt = Vector2f{eval_positions[2 * idx],
1183
+ eval_positions[2 * idx + 1]};
1184
+ x = int(pt.x);
1185
+ y = int(pt.y);
1186
+ }
1187
+
1188
+ // normalize pt to [0, 1]
1189
+ auto npt = pt;
1190
+ npt.x /= width;
1191
+ npt.y /= height;
1192
+ auto num_samples = num_samples_x * num_samples_y;
1193
+ if (render_image != nullptr || d_render_image != nullptr) {
1194
+ Vector4f d_color = Vector4f{0, 0, 0, 0};
1195
+ if (d_render_image != nullptr) {
1196
+ // Gather d_color from d_render_image inside the filter kernel
1197
+ // normalize using weight_image
1198
+ d_color = gather_d_color(*scene.filter,
1199
+ d_render_image,
1200
+ weight_image,
1201
+ width,
1202
+ height,
1203
+ pt);
1204
+ }
1205
+ auto color = Vector4f{0, 0, 0, 0};
1206
+ if (use_prefiltering) {
1207
+ color = sample_color_prefiltered(scene,
1208
+ background_image != nullptr ? (const Vector4f*)&background_image[4 * ((y * width) + x)] : nullptr,
1209
+ npt,
1210
+ d_render_image != nullptr ? &d_color : nullptr,
1211
+ d_background_image != nullptr ? (Vector4f*)&d_background_image[4 * ((y * width) + x)] : nullptr,
1212
+ d_translation != nullptr ? &d_translation[2 * (y * width + x)] : nullptr);
1213
+ } else {
1214
+ color = sample_color(scene,
1215
+ background_image != nullptr ? (const Vector4f*)&background_image[4 * ((y * width) + x)] : nullptr,
1216
+ npt,
1217
+ d_render_image != nullptr ? &d_color : nullptr,
1218
+ nullptr,
1219
+ d_background_image != nullptr ? (Vector4f*)&d_background_image[4 * ((y * width) + x)] : nullptr,
1220
+ d_translation != nullptr ? &d_translation[2 * (y * width + x)] : nullptr);
1221
+ }
1222
+ assert(isfinite(color));
1223
+ // Splat color onto render_image
1224
+ auto radius = scene.filter->radius;
1225
+ assert(radius >= 0);
1226
+ auto ri = (int)ceil(radius);
1227
+ for (int dy = -ri; dy <= ri; dy++) {
1228
+ for (int dx = -ri; dx <= ri; dx++) {
1229
+ auto xx = x + dx;
1230
+ auto yy = y + dy;
1231
+ if (xx >= 0 && xx < width && yy >= 0 && yy < height &&
1232
+ weight_image[yy * width + xx] > 0) {
1233
+ auto weight_sum = weight_image[yy * width + xx];
1234
+ auto xc = xx + 0.5f;
1235
+ auto yc = yy + 0.5f;
1236
+ auto filter_weight = compute_filter_weight(*scene.filter,
1237
+ xc - pt.x,
1238
+ yc - pt.y);
1239
+ auto weighted_color = filter_weight * color / weight_sum;
1240
+ if (render_image != nullptr) {
1241
+ atomic_add(render_image[4 * (yy * width + xx) + 0],
1242
+ weighted_color[0]);
1243
+ atomic_add(render_image[4 * (yy * width + xx) + 1],
1244
+ weighted_color[1]);
1245
+ atomic_add(render_image[4 * (yy * width + xx) + 2],
1246
+ weighted_color[2]);
1247
+ atomic_add(render_image[4 * (yy * width + xx) + 3],
1248
+ weighted_color[3]);
1249
+ }
1250
+ if (d_render_image != nullptr) {
1251
+ // Backprop to filter_weight
1252
+ // pixel = \sum weight * color / \sum weight
1253
+ auto d_pixel = Vector4f{
1254
+ d_render_image[4 * (yy * width + xx) + 0],
1255
+ d_render_image[4 * (yy * width + xx) + 1],
1256
+ d_render_image[4 * (yy * width + xx) + 2],
1257
+ d_render_image[4 * (yy * width + xx) + 3],
1258
+ };
1259
+ auto d_weight =
1260
+ (dot(d_pixel, color) * weight_sum -
1261
+ filter_weight * dot(d_pixel, color) * (weight_sum - filter_weight)) /
1262
+ square(weight_sum);
1263
+ d_compute_filter_weight(*scene.filter,
1264
+ xc - pt.x,
1265
+ yc - pt.y,
1266
+ d_weight,
1267
+ scene.d_filter);
1268
+ }
1269
+ }
1270
+ }
1271
+ }
1272
+ }
1273
+ if (sdf_image != nullptr || d_sdf_image != nullptr) {
1274
+ float d_dist = 0.f;
1275
+ if (d_sdf_image != nullptr) {
1276
+ if (eval_positions == nullptr) {
1277
+ d_dist = d_sdf_image[y * width + x];
1278
+ } else {
1279
+ d_dist = d_sdf_image[idx];
1280
+ }
1281
+ }
1282
+ auto weight = eval_positions == nullptr ? 1.f / num_samples : 1.f;
1283
+ auto dist = sample_distance(scene, npt, weight,
1284
+ d_sdf_image != nullptr ? &d_dist : nullptr,
1285
+ d_translation != nullptr ? &d_translation[2 * (y * width + x)] : nullptr);
1286
+ if (sdf_image != nullptr) {
1287
+ if (eval_positions == nullptr) {
1288
+ atomic_add(sdf_image[y * width + x], dist);
1289
+ } else {
1290
+ atomic_add(sdf_image[idx], dist);
1291
+ }
1292
+ }
1293
+ }
1294
+ }
1295
+
1296
+ SceneData scene;
1297
+ float *background_image;
1298
+ float *render_image;
1299
+ float *weight_image;
1300
+ float *sdf_image;
1301
+ float *d_background_image;
1302
+ float *d_render_image;
1303
+ float *d_sdf_image;
1304
+ float *d_translation;
1305
+ int width;
1306
+ int height;
1307
+ int num_samples_x;
1308
+ int num_samples_y;
1309
+ uint64_t seed;
1310
+ bool use_prefiltering;
1311
+ float *eval_positions;
1312
+ };
1313
+
1314
+ struct BoundarySample {
1315
+ Vector2f pt;
1316
+ Vector2f local_pt;
1317
+ Vector2f normal;
1318
+ int shape_group_id;
1319
+ int shape_id;
1320
+ float t;
1321
+ BoundaryData data;
1322
+ float pdf;
1323
+ };
1324
+
1325
+ struct sample_boundary_kernel {
1326
+ DEVICE void operator()(int idx) {
1327
+ boundary_samples[idx].pt = Vector2f{0, 0};
1328
+ boundary_samples[idx].shape_id = -1;
1329
+ boundary_ids[idx] = idx;
1330
+ morton_codes[idx] = 0;
1331
+
1332
+ auto rng_state = init_pcg32(idx, seed);
1333
+ auto u = next_pcg32_float(&rng_state);
1334
+ // Sample a shape
1335
+ auto sample_id = sample(scene.sample_shapes_cdf,
1336
+ scene.num_total_shapes,
1337
+ u);
1338
+ assert(sample_id >= 0 && sample_id < scene.num_total_shapes);
1339
+ auto shape_id = scene.sample_shape_id[sample_id];
1340
+ assert(shape_id >= 0 && shape_id < scene.num_shapes);
1341
+ auto shape_group_id = scene.sample_group_id[sample_id];
1342
+ assert(shape_group_id >= 0 && shape_group_id < scene.num_shape_groups);
1343
+ auto shape_pmf = scene.sample_shapes_pmf[shape_id];
1344
+ if (shape_pmf <= 0) {
1345
+ return;
1346
+ }
1347
+ // Sample a point on the boundary of the shape
1348
+ auto boundary_pdf = 0.f;
1349
+ auto normal = Vector2f{0, 0};
1350
+ auto t = next_pcg32_float(&rng_state);
1351
+ BoundaryData boundary_data;
1352
+ const ShapeGroup &shape_group = scene.shape_groups[shape_group_id];
1353
+ auto local_boundary_pt = sample_boundary(
1354
+ scene, shape_group_id, shape_id,
1355
+ t, normal, boundary_pdf, boundary_data);
1356
+ if (boundary_pdf <= 0) {
1357
+ return;
1358
+ }
1359
+
1360
+ // local_boundary_pt & normal are in shape's local space,
1361
+ // transform them to canvas space
1362
+ auto boundary_pt = xform_pt(shape_group.shape_to_canvas, local_boundary_pt);
1363
+ normal = xform_normal(shape_group.canvas_to_shape, normal);
1364
+ // Normalize boundary_pt to [0, 1)
1365
+ boundary_pt.x /= scene.canvas_width;
1366
+ boundary_pt.y /= scene.canvas_height;
1367
+
1368
+ boundary_samples[idx].pt = boundary_pt;
1369
+ boundary_samples[idx].local_pt = local_boundary_pt;
1370
+ boundary_samples[idx].normal = normal;
1371
+ boundary_samples[idx].shape_group_id = shape_group_id;
1372
+ boundary_samples[idx].shape_id = shape_id;
1373
+ boundary_samples[idx].t = t;
1374
+ boundary_samples[idx].data = boundary_data;
1375
+ boundary_samples[idx].pdf = shape_pmf * boundary_pdf;
1376
+ TVector2<uint32_t> p_i{boundary_pt.x * 1023, boundary_pt.y * 1023};
1377
+ morton_codes[idx] = (expand_bits(p_i.x) << 1u) |
1378
+ (expand_bits(p_i.y) << 0u);
1379
+ }
1380
+
1381
+ SceneData scene;
1382
+ uint64_t seed;
1383
+ BoundarySample *boundary_samples;
1384
+ int *boundary_ids;
1385
+ uint32_t *morton_codes;
1386
+ };
1387
+
1388
+ struct render_edge_kernel {
1389
+ DEVICE void operator()(int idx) {
1390
+ auto bid = boundary_ids[idx];
1391
+ if (boundary_samples[bid].shape_id == -1) {
1392
+ return;
1393
+ }
1394
+ auto boundary_pt = boundary_samples[bid].pt;
1395
+ auto local_boundary_pt = boundary_samples[bid].local_pt;
1396
+ auto normal = boundary_samples[bid].normal;
1397
+ auto shape_group_id = boundary_samples[bid].shape_group_id;
1398
+ auto shape_id = boundary_samples[bid].shape_id;
1399
+ auto t = boundary_samples[bid].t;
1400
+ auto boundary_data = boundary_samples[bid].data;
1401
+ auto pdf = boundary_samples[bid].pdf;
1402
+
1403
+ const ShapeGroup &shape_group = scene.shape_groups[shape_group_id];
1404
+
1405
+ auto bx = int(boundary_pt.x * width);
1406
+ auto by = int(boundary_pt.y * height);
1407
+ if (bx < 0 || bx >= width || by < 0 || by >= height) {
1408
+ return;
1409
+ }
1410
+
1411
+ // Sample the two sides of the boundary
1412
+ auto inside_query = EdgeQuery{shape_group_id, shape_id, false};
1413
+ auto outside_query = EdgeQuery{shape_group_id, shape_id, false};
1414
+ auto color_inside = sample_color(scene,
1415
+ background_image != nullptr ? (const Vector4f *)&background_image[4 * ((by * width) + bx)] : nullptr,
1416
+ boundary_pt - 1e-4f * normal,
1417
+ nullptr, &inside_query);
1418
+ auto color_outside = sample_color(scene,
1419
+ background_image != nullptr ? (const Vector4f *)&background_image[4 * ((by * width) + bx)] : nullptr,
1420
+ boundary_pt + 1e-4f * normal,
1421
+ nullptr, &outside_query);
1422
+ if (!inside_query.hit && !outside_query.hit) {
1423
+ // occluded
1424
+ return;
1425
+ }
1426
+ if (!inside_query.hit) {
1427
+ normal = -normal;
1428
+ swap_(inside_query, outside_query);
1429
+ swap_(color_inside, color_outside);
1430
+ }
1431
+ // Boundary point in screen space
1432
+ auto sboundary_pt = boundary_pt;
1433
+ sboundary_pt.x *= width;
1434
+ sboundary_pt.y *= height;
1435
+ auto d_color = gather_d_color(*scene.filter,
1436
+ d_render_image,
1437
+ weight_image,
1438
+ width,
1439
+ height,
1440
+ sboundary_pt);
1441
+ // Normalization factor
1442
+ d_color /= float(scene.canvas_width * scene.canvas_height);
1443
+
1444
+ assert(isfinite(d_color));
1445
+ assert(isfinite(pdf) && pdf > 0);
1446
+ auto contrib = dot(color_inside - color_outside, d_color) / pdf;
1447
+ ShapeGroup &d_shape_group = scene.d_shape_groups[shape_group_id];
1448
+ accumulate_boundary_gradient(scene.shapes[shape_id],
1449
+ contrib, t, normal, boundary_data, scene.d_shapes[shape_id],
1450
+ shape_group.shape_to_canvas, local_boundary_pt, d_shape_group.shape_to_canvas);
1451
+ // Don't need to backprop to filter weights:
1452
+ // \int f'(x) g(x) dx doesn't contain discontinuities
1453
+ // if f is continuous, even if g is discontinuous
1454
+ if (d_translation != nullptr) {
1455
+ // According to Reynold transport theorem,
1456
+ // the Jacobian of the boundary integral is dot(velocity, normal)
1457
+ // The velocity of the object translating x is (1, 0)
1458
+ // The velocity of the object translating y is (0, 1)
1459
+ atomic_add(&d_translation[2 * (by * width + bx) + 0], normal.x * contrib);
1460
+ atomic_add(&d_translation[2 * (by * width + bx) + 1], normal.y * contrib);
1461
+ }
1462
+ }
1463
+
1464
+ SceneData scene;
1465
+ const float *background_image;
1466
+ const BoundarySample *boundary_samples;
1467
+ const int *boundary_ids;
1468
+ float *weight_image;
1469
+ float *d_render_image;
1470
+ float *d_translation;
1471
+ int width;
1472
+ int height;
1473
+ int num_samples_x;
1474
+ int num_samples_y;
1475
+ };
1476
+
1477
+ void render(std::shared_ptr<Scene> scene,
1478
+ ptr<float> background_image,
1479
+ ptr<float> render_image,
1480
+ ptr<float> render_sdf,
1481
+ int width,
1482
+ int height,
1483
+ int num_samples_x,
1484
+ int num_samples_y,
1485
+ uint64_t seed,
1486
+ ptr<float> d_background_image,
1487
+ ptr<float> d_render_image,
1488
+ ptr<float> d_render_sdf,
1489
+ ptr<float> d_translation,
1490
+ bool use_prefiltering,
1491
+ ptr<float> eval_positions,
1492
+ int num_eval_positions) {
1493
+ #ifdef __NVCC__
1494
+ int old_device_id = -1;
1495
+ if (scene->use_gpu) {
1496
+ checkCuda(cudaGetDevice(&old_device_id));
1497
+ if (scene->gpu_index != -1) {
1498
+ checkCuda(cudaSetDevice(scene->gpu_index));
1499
+ }
1500
+ }
1501
+ #endif
1502
+ parallel_init();
1503
+
1504
+ float *weight_image = nullptr;
1505
+ // Allocate and zero the weight image
1506
+ if (scene->use_gpu) {
1507
+ #ifdef __CUDACC__
1508
+ if (eval_positions.get() == nullptr) {
1509
+ checkCuda(cudaMallocManaged(&weight_image, width * height * sizeof(float)));
1510
+ cudaMemset(weight_image, 0, width * height * sizeof(float));
1511
+ }
1512
+ #else
1513
+ assert(false);
1514
+ #endif
1515
+ } else {
1516
+ if (eval_positions.get() == nullptr) {
1517
+ weight_image = (float*)malloc(width * height * sizeof(float));
1518
+ memset(weight_image, 0, width * height * sizeof(float));
1519
+ }
1520
+ }
1521
+
1522
+ if (render_image.get() != nullptr || d_render_image.get() != nullptr ||
1523
+ render_sdf.get() != nullptr || d_render_sdf.get() != nullptr) {
1524
+ if (weight_image != nullptr) {
1525
+ parallel_for(weight_kernel{
1526
+ get_scene_data(*scene.get()),
1527
+ weight_image,
1528
+ width,
1529
+ height,
1530
+ num_samples_x,
1531
+ num_samples_y,
1532
+ seed
1533
+ }, width * height * num_samples_x * num_samples_y, scene->use_gpu);
1534
+ }
1535
+
1536
+ auto num_samples = eval_positions.get() == nullptr ?
1537
+ width * height * num_samples_x * num_samples_y : num_eval_positions;
1538
+ parallel_for(render_kernel{
1539
+ get_scene_data(*scene.get()),
1540
+ background_image.get(),
1541
+ render_image.get(),
1542
+ weight_image,
1543
+ render_sdf.get(),
1544
+ d_background_image.get(),
1545
+ d_render_image.get(),
1546
+ d_render_sdf.get(),
1547
+ d_translation.get(),
1548
+ width,
1549
+ height,
1550
+ num_samples_x,
1551
+ num_samples_y,
1552
+ seed,
1553
+ use_prefiltering,
1554
+ eval_positions.get()
1555
+ }, num_samples, scene->use_gpu);
1556
+ }
1557
+
1558
+ // Boundary sampling
1559
+ if (!use_prefiltering && d_render_image.get() != nullptr) {
1560
+ auto num_samples = width * height * num_samples_x * num_samples_y;
1561
+ BoundarySample *boundary_samples = nullptr;
1562
+ int *boundary_ids = nullptr; // for sorting
1563
+ uint32_t *morton_codes = nullptr; // for sorting
1564
+ // Allocate boundary samples
1565
+ if (scene->use_gpu) {
1566
+ #ifdef __CUDACC__
1567
+ checkCuda(cudaMallocManaged(&boundary_samples,
1568
+ num_samples * sizeof(BoundarySample)));
1569
+ checkCuda(cudaMallocManaged(&boundary_ids,
1570
+ num_samples * sizeof(int)));
1571
+ checkCuda(cudaMallocManaged(&morton_codes,
1572
+ num_samples * sizeof(uint32_t)));
1573
+ #else
1574
+ assert(false);
1575
+ #endif
1576
+ } else {
1577
+ boundary_samples = (BoundarySample*)malloc(
1578
+ num_samples * sizeof(BoundarySample));
1579
+ boundary_ids = (int*)malloc(
1580
+ num_samples * sizeof(int));
1581
+ morton_codes = (uint32_t*)malloc(
1582
+ num_samples * sizeof(uint32_t));
1583
+ }
1584
+
1585
+ // Edge sampling
1586
+ // We sort the boundary samples for better thread coherency
1587
+ parallel_for(sample_boundary_kernel{
1588
+ get_scene_data(*scene.get()),
1589
+ seed,
1590
+ boundary_samples,
1591
+ boundary_ids,
1592
+ morton_codes
1593
+ }, num_samples, scene->use_gpu);
1594
+ if (scene->use_gpu) {
1595
+ thrust::sort_by_key(thrust::device, morton_codes, morton_codes + num_samples, boundary_ids);
1596
+ } else {
1597
+ // Don't need to sort for CPU, we are not using SIMD hardware anyway.
1598
+ // thrust::sort_by_key(thrust::host, morton_codes, morton_codes + num_samples, boundary_ids);
1599
+ }
1600
+ parallel_for(render_edge_kernel{
1601
+ get_scene_data(*scene.get()),
1602
+ background_image.get(),
1603
+ boundary_samples,
1604
+ boundary_ids,
1605
+ weight_image,
1606
+ d_render_image.get(),
1607
+ d_translation.get(),
1608
+ width,
1609
+ height,
1610
+ num_samples_x,
1611
+ num_samples_y
1612
+ }, num_samples, scene->use_gpu);
1613
+ if (scene->use_gpu) {
1614
+ #ifdef __CUDACC__
1615
+ checkCuda(cudaFree(boundary_samples));
1616
+ checkCuda(cudaFree(boundary_ids));
1617
+ checkCuda(cudaFree(morton_codes));
1618
+ #else
1619
+ assert(false);
1620
+ #endif
1621
+ } else {
1622
+ free(boundary_samples);
1623
+ free(boundary_ids);
1624
+ free(morton_codes);
1625
+ }
1626
+ }
1627
+
1628
+ // Clean up weight image
1629
+ if (scene->use_gpu) {
1630
+ #ifdef __CUDACC__
1631
+ checkCuda(cudaFree(weight_image));
1632
+ #else
1633
+ assert(false);
1634
+ #endif
1635
+ } else {
1636
+ free(weight_image);
1637
+ }
1638
+
1639
+ if (scene->use_gpu) {
1640
+ cuda_synchronize();
1641
+ }
1642
+
1643
+ parallel_cleanup();
1644
+ #ifdef __NVCC__
1645
+ if (old_device_id != -1) {
1646
+ checkCuda(cudaSetDevice(old_device_id));
1647
+ }
1648
+ #endif
1649
+ }
1650
+
1651
+ PYBIND11_MODULE(diffvg, m) {
1652
+ m.doc() = "Differential Vector Graphics";
1653
+
1654
+ py::class_<ptr<void>>(m, "void_ptr")
1655
+ .def(py::init<std::size_t>())
1656
+ .def("as_size_t", &ptr<void>::as_size_t);
1657
+ py::class_<ptr<float>>(m, "float_ptr")
1658
+ .def(py::init<std::size_t>());
1659
+ py::class_<ptr<int>>(m, "int_ptr")
1660
+ .def(py::init<std::size_t>());
1661
+
1662
+ py::class_<Vector2f>(m, "Vector2f")
1663
+ .def(py::init<float, float>())
1664
+ .def_readwrite("x", &Vector2f::x)
1665
+ .def_readwrite("y", &Vector2f::y);
1666
+
1667
+ py::class_<Vector3f>(m, "Vector3f")
1668
+ .def(py::init<float, float, float>())
1669
+ .def_readwrite("x", &Vector3f::x)
1670
+ .def_readwrite("y", &Vector3f::y)
1671
+ .def_readwrite("z", &Vector3f::z);
1672
+
1673
+ py::class_<Vector4f>(m, "Vector4f")
1674
+ .def(py::init<float, float, float, float>())
1675
+ .def_readwrite("x", &Vector4f::x)
1676
+ .def_readwrite("y", &Vector4f::y)
1677
+ .def_readwrite("z", &Vector4f::z)
1678
+ .def_readwrite("w", &Vector4f::w);
1679
+
1680
+ py::enum_<ShapeType>(m, "ShapeType")
1681
+ .value("circle", ShapeType::Circle)
1682
+ .value("ellipse", ShapeType::Ellipse)
1683
+ .value("path", ShapeType::Path)
1684
+ .value("rect", ShapeType::Rect);
1685
+
1686
+ py::class_<Circle>(m, "Circle")
1687
+ .def(py::init<float, Vector2f>())
1688
+ .def("get_ptr", &Circle::get_ptr)
1689
+ .def_readonly("radius", &Circle::radius)
1690
+ .def_readonly("center", &Circle::center);
1691
+
1692
+ py::class_<Ellipse>(m, "Ellipse")
1693
+ .def(py::init<Vector2f, Vector2f>())
1694
+ .def("get_ptr", &Ellipse::get_ptr)
1695
+ .def_readonly("radius", &Ellipse::radius)
1696
+ .def_readonly("center", &Ellipse::center);
1697
+
1698
+ py::class_<Path>(m, "Path")
1699
+ .def(py::init<ptr<int>, ptr<float>, ptr<float>, int, int, bool, bool>())
1700
+ .def("get_ptr", &Path::get_ptr)
1701
+ .def("has_thickness", &Path::has_thickness)
1702
+ .def("copy_to", &Path::copy_to)
1703
+ .def_readonly("num_points", &Path::num_points);
1704
+
1705
+ py::class_<Rect>(m, "Rect")
1706
+ .def(py::init<Vector2f, Vector2f>())
1707
+ .def("get_ptr", &Rect::get_ptr)
1708
+ .def_readonly("p_min", &Rect::p_min)
1709
+ .def_readonly("p_max", &Rect::p_max);
1710
+
1711
+ py::enum_<ColorType>(m, "ColorType")
1712
+ .value("constant", ColorType::Constant)
1713
+ .value("linear_gradient", ColorType::LinearGradient)
1714
+ .value("radial_gradient", ColorType::RadialGradient);
1715
+
1716
+ py::class_<Constant>(m, "Constant")
1717
+ .def(py::init<Vector4f>())
1718
+ .def("get_ptr", &Constant::get_ptr)
1719
+ .def_readonly("color", &Constant::color);
1720
+
1721
+ py::class_<LinearGradient>(m, "LinearGradient")
1722
+ .def(py::init<Vector2f, Vector2f, int, ptr<float>, ptr<float>>())
1723
+ .def("get_ptr", &LinearGradient::get_ptr)
1724
+ .def("copy_to", &LinearGradient::copy_to)
1725
+ .def_readonly("begin", &LinearGradient::begin)
1726
+ .def_readonly("end", &LinearGradient::end)
1727
+ .def_readonly("num_stops", &LinearGradient::num_stops);
1728
+
1729
+ py::class_<RadialGradient>(m, "RadialGradient")
1730
+ .def(py::init<Vector2f, Vector2f, int, ptr<float>, ptr<float>>())
1731
+ .def("get_ptr", &RadialGradient::get_ptr)
1732
+ .def("copy_to", &RadialGradient::copy_to)
1733
+ .def_readonly("center", &RadialGradient::center)
1734
+ .def_readonly("radius", &RadialGradient::radius)
1735
+ .def_readonly("num_stops", &RadialGradient::num_stops);
1736
+
1737
+ py::class_<Shape>(m, "Shape")
1738
+ .def(py::init<ShapeType, ptr<void>, float>())
1739
+ .def("as_circle", &Shape::as_circle)
1740
+ .def("as_ellipse", &Shape::as_ellipse)
1741
+ .def("as_path", &Shape::as_path)
1742
+ .def("as_rect", &Shape::as_rect)
1743
+ .def_readonly("type", &Shape::type)
1744
+ .def_readonly("stroke_width", &Shape::stroke_width);
1745
+
1746
+ py::class_<ShapeGroup>(m, "ShapeGroup")
1747
+ .def(py::init<ptr<int>,
1748
+ int,
1749
+ ColorType,
1750
+ ptr<void>,
1751
+ ColorType,
1752
+ ptr<void>,
1753
+ bool,
1754
+ ptr<float>>())
1755
+ .def("fill_color_as_constant", &ShapeGroup::fill_color_as_constant)
1756
+ .def("fill_color_as_linear_gradient", &ShapeGroup::fill_color_as_linear_gradient)
1757
+ .def("fill_color_as_radial_gradient", &ShapeGroup::fill_color_as_radial_gradient)
1758
+ .def("stroke_color_as_constant", &ShapeGroup::stroke_color_as_constant)
1759
+ .def("stroke_color_as_linear_gradient", &ShapeGroup::stroke_color_as_linear_gradient)
1760
+ .def("stroke_color_as_radial_gradient", &ShapeGroup::fill_color_as_radial_gradient)
1761
+ .def("has_fill_color", &ShapeGroup::has_fill_color)
1762
+ .def("has_stroke_color", &ShapeGroup::has_stroke_color)
1763
+ .def("copy_to", &ShapeGroup::copy_to)
1764
+ .def_readonly("fill_color_type", &ShapeGroup::fill_color_type)
1765
+ .def_readonly("stroke_color_type", &ShapeGroup::stroke_color_type);
1766
+
1767
+ py::enum_<FilterType>(m, "FilterType")
1768
+ .value("box", FilterType::Box)
1769
+ .value("tent", FilterType::Tent)
1770
+ .value("parabolic", FilterType::RadialParabolic)
1771
+ .value("hann", FilterType::Hann);
1772
+
1773
+ py::class_<Filter>(m, "Filter")
1774
+ .def(py::init<FilterType,
1775
+ float>());
1776
+
1777
+ py::class_<Scene, std::shared_ptr<Scene>>(m, "Scene")
1778
+ .def(py::init<int,
1779
+ int,
1780
+ const std::vector<const Shape*> &,
1781
+ const std::vector<const ShapeGroup*> &,
1782
+ const Filter &,
1783
+ bool,
1784
+ int>())
1785
+ .def("get_d_shape", &Scene::get_d_shape)
1786
+ .def("get_d_shape_group", &Scene::get_d_shape_group)
1787
+ .def("get_d_filter_radius", &Scene::get_d_filter_radius)
1788
+ .def_readonly("num_shapes", &Scene::num_shapes)
1789
+ .def_readonly("num_shape_groups", &Scene::num_shape_groups);
1790
+
1791
+ m.def("render", &render, "");
1792
+ }
DiffVG/diffvg.h ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #ifdef __NVCC__
4
+ #define DEVICE __device__ __host__
5
+ #else
6
+ #define DEVICE
7
+ #endif
8
+
9
+ #ifndef __NVCC__
10
+ #include <cmath>
11
+ namespace {
12
+ inline float fmodf(float a, float b) {
13
+ return std::fmod(a, b);
14
+ }
15
+ inline double fmod(double a, double b) {
16
+ return std::fmod(a, b);
17
+ }
18
+ }
19
+ using std::isfinite;
20
+ #endif
21
+
22
+ #ifndef M_PI
23
+ #define M_PI 3.14159265358979323846
24
+ #endif
25
+
26
+ #include <cstdint>
27
+ #include <atomic>
28
+
29
+ // We use Real for most of the internal computation.
30
+ // However, for PyTorch interfaces, Optix Prime and Embree queries
31
+ // we use float
32
+ using Real = float;
33
+
34
+ template <typename T>
35
+ DEVICE
36
+ inline T square(const T &x) {
37
+ return x * x;
38
+ }
39
+
40
+ template <typename T>
41
+ DEVICE
42
+ inline T cubic(const T &x) {
43
+ return x * x * x;
44
+ }
45
+
46
+ template <typename T>
47
+ DEVICE
48
+ inline T clamp(const T &v, const T &lo, const T &hi) {
49
+ if (v < lo) return lo;
50
+ else if (v > hi) return hi;
51
+ else return v;
52
+ }
53
+
54
+ DEVICE
55
+ inline int modulo(int a, int b) {
56
+ auto r = a % b;
57
+ return (r < 0) ? r+b : r;
58
+ }
59
+
60
+ DEVICE
61
+ inline float modulo(float a, float b) {
62
+ float r = ::fmodf(a, b);
63
+ return (r < 0.0f) ? r+b : r;
64
+ }
65
+
66
+ DEVICE
67
+ inline double modulo(double a, double b) {
68
+ double r = ::fmod(a, b);
69
+ return (r < 0.0) ? r+b : r;
70
+ }
71
+
72
+ template <typename T>
73
+ DEVICE
74
+ inline T max(const T &a, const T &b) {
75
+ return a > b ? a : b;
76
+ }
77
+
78
+ template <typename T>
79
+ DEVICE
80
+ inline T min(const T &a, const T &b) {
81
+ return a < b ? a : b;
82
+ }
83
+
84
+ /// Return ceil(x/y) for integers x and y
85
+ inline int idiv_ceil(int x, int y) {
86
+ return (x + y-1) / y;
87
+ }
88
+
89
+ template <typename T>
90
+ DEVICE
91
+ inline void swap_(T &a, T &b) {
92
+ T tmp = a;
93
+ a = b;
94
+ b = tmp;
95
+ }
96
+
97
+ inline double log2(double x) {
98
+ return log(x) / log(Real(2));
99
+ }
100
+
101
+ template <typename T>
102
+ DEVICE
103
+ inline T safe_acos(const T &x) {
104
+ if (x >= 1) return T(0);
105
+ else if(x <= -1) return T(M_PI);
106
+ return acos(x);
107
+ }
108
+
109
+ // For Morton code computation. This can be made faster.
110
+ DEVICE
111
+ inline uint32_t expand_bits(uint32_t x) {
112
+ // Insert one zero after every bit given a 10-bit integer
113
+ constexpr uint64_t mask = 0x1u;
114
+ // We start from LSB (bit 31)
115
+ auto result = (x & (mask << 0u));
116
+ result |= ((x & (mask << 1u)) << 1u);
117
+ result |= ((x & (mask << 2u)) << 2u);
118
+ result |= ((x & (mask << 3u)) << 3u);
119
+ result |= ((x & (mask << 4u)) << 4u);
120
+ result |= ((x & (mask << 5u)) << 5u);
121
+ result |= ((x & (mask << 6u)) << 6u);
122
+ result |= ((x & (mask << 7u)) << 7u);
123
+ result |= ((x & (mask << 8u)) << 8u);
124
+ result |= ((x & (mask << 9u)) << 9u);
125
+ return result;
126
+ }
127
+
128
+ // DEVICE
129
+ // inline int clz(uint64_t x) {
130
+ // #ifdef __CUDA_ARCH__
131
+ // return __clzll(x);
132
+ // #else
133
+ // // TODO: use _BitScanReverse in windows
134
+ // return x == 0 ? 64 : __builtin_clzll(x);
135
+ // #endif
136
+ // }
137
+
138
+ // DEVICE
139
+ // inline int ffs(uint8_t x) {
140
+ // #ifdef __CUDA_ARCH__
141
+ // return __ffs(x);
142
+ // #else
143
+ // // TODO: use _BitScanReverse in windows
144
+ // return __builtin_ffs(x);
145
+ // #endif
146
+ // }
147
+
148
+ // DEVICE
149
+ // inline int popc(uint8_t x) {
150
+ // #ifdef __CUDA_ARCH__
151
+ // return __popc(x);
152
+ // #else
153
+ // // TODO: use _popcnt in windows
154
+ // return __builtin_popcount(x);
155
+ // #endif
156
+ // }
DiffVG/edge_query.h ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ struct EdgeQuery {
4
+ int shape_group_id;
5
+ int shape_id;
6
+ bool hit; // Do we hit the specified shape_group_id & shape_id?
7
+ };
DiffVG/filter.h ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #include "diffvg.h"
4
+ #include "atomic.h"
5
+
6
+ enum class FilterType {
7
+ Box,
8
+ Tent,
9
+ RadialParabolic, // 4/3(1 - (d/r))
10
+ Hann // https://en.wikipedia.org/wiki/Window_function#Hann_and_Hamming_windows
11
+ };
12
+
13
+ struct Filter {
14
+ FilterType type;
15
+ float radius;
16
+ };
17
+
18
+ struct DFilter {
19
+ float radius;
20
+ };
21
+
22
+ DEVICE
23
+ inline
24
+ float compute_filter_weight(const Filter &filter,
25
+ float dx,
26
+ float dy) {
27
+ if (fabs(dx) > filter.radius || fabs(dy) > filter.radius) {
28
+ return 0;
29
+ }
30
+ if (filter.type == FilterType::Box) {
31
+ return 1.f / square(2 * filter.radius);
32
+ } else if (filter.type == FilterType::Tent) {
33
+ return (filter.radius - fabs(dx)) * (filter.radius - fabs(dy)) /
34
+ square(square(filter.radius));
35
+ } else if (filter.type == FilterType::RadialParabolic) {
36
+ return (4.f / 3.f) * (1 - square(dx / filter.radius)) *
37
+ (4.f / 3.f) * (1 - square(dy / filter.radius));
38
+ } else {
39
+ assert(filter.type == FilterType::Hann);
40
+ // normalize dx, dy to [0, 1]
41
+ auto ndx = (dx / (2*filter.radius)) + 0.5f;
42
+ auto ndy = (dy / (2*filter.radius)) + 0.5f;
43
+ // the normalization factor is R^2
44
+ return 0.5f * (1.f - cos(float(2 * M_PI) * ndx)) *
45
+ 0.5f * (1.f - cos(float(2 * M_PI) * ndy)) /
46
+ square(filter.radius);
47
+ }
48
+ }
49
+
50
+ DEVICE
51
+ inline
52
+ void d_compute_filter_weight(const Filter &filter,
53
+ float dx,
54
+ float dy,
55
+ float d_return,
56
+ DFilter *d_filter) {
57
+ if (filter.type == FilterType::Box) {
58
+ // return 1.f / square(2 * filter.radius);
59
+ atomic_add(d_filter->radius,
60
+ d_return * (-2) * 2 * filter.radius / cubic(2 * filter.radius));
61
+ } else if (filter.type == FilterType::Tent) {
62
+ // return (filer.radius - fabs(dx)) * (filer.radius - fabs(dy)) /
63
+ // square(square(filter.radius));
64
+ auto fx = filter.radius - fabs(dx);
65
+ auto fy = filter.radius - fabs(dy);
66
+ auto norm = 1 / square(filter.radius);
67
+ auto d_fx = d_return * fy * norm;
68
+ auto d_fy = d_return * fx * norm;
69
+ auto d_norm = d_return * fx * fy;
70
+ atomic_add(d_filter->radius,
71
+ d_fx + d_fy + (-4) * d_norm / pow(filter.radius, 5));
72
+ } else if (filter.type == FilterType::RadialParabolic) {
73
+ // return (4.f / 3.f) * (1 - square(dx / filter.radius)) *
74
+ // (4.f / 3.f) * (1 - square(dy / filter.radius));
75
+ // auto d_square_x = d_return * (-4.f / 3.f);
76
+ // auto d_square_y = d_return * (-4.f / 3.f);
77
+ auto r3 = filter.radius * filter.radius * filter.radius;
78
+ auto d_radius = -(2 * square(dx) + 2 * square(dy)) / r3;
79
+ atomic_add(d_filter->radius, d_radius);
80
+ } else {
81
+ assert(filter.type == FilterType::Hann);
82
+ // // normalize dx, dy to [0, 1]
83
+ // auto ndx = (dx / (2*filter.radius)) + 0.5f;
84
+ // auto ndy = (dy / (2*filter.radius)) + 0.5f;
85
+ // // the normalization factor is R^2
86
+ // return 0.5f * (1.f - cos(float(2 * M_PI) * ndx)) *
87
+ // 0.5f * (1.f - cos(float(2 * M_PI) * ndy)) /
88
+ // square(filter.radius);
89
+
90
+ // normalize dx, dy to [0, 1]
91
+ auto ndx = (dx / (2*filter.radius)) + 0.5f;
92
+ auto ndy = (dy / (2*filter.radius)) + 0.5f;
93
+ auto fx = 0.5f * (1.f - cos(float(2*M_PI) * ndx));
94
+ auto fy = 0.5f * (1.f - cos(float(2*M_PI) * ndy));
95
+ auto norm = 1 / square(filter.radius);
96
+ auto d_fx = d_return * fy * norm;
97
+ auto d_fy = d_return * fx * norm;
98
+ auto d_norm = d_return * fx * fy;
99
+ auto d_ndx = d_fx * 0.5f * sin(float(2*M_PI) * ndx) * float(2*M_PI);
100
+ auto d_ndy = d_fy * 0.5f * sin(float(2*M_PI) * ndy) * float(2*M_PI);
101
+ atomic_add(d_filter->radius,
102
+ d_ndx * (-2*dx / square(2*filter.radius)) +
103
+ d_ndy * (-2*dy / square(2*filter.radius)) +
104
+ (-2) * d_norm / cubic(filter.radius));
105
+ }
106
+ }
DiffVG/matrix.h ADDED
@@ -0,0 +1,544 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #include "diffvg.h"
4
+ #include "vector.h"
5
+ #include <iostream>
6
+
7
+ template <typename T>
8
+ struct TMatrix3x3 {
9
+ DEVICE
10
+ TMatrix3x3() {
11
+ for (int i = 0; i < 3; i++) {
12
+ for (int j = 0; j < 3; j++) {
13
+ data[i][j] = T(0);
14
+ }
15
+ }
16
+ }
17
+
18
+ template <typename T2>
19
+ DEVICE
20
+ TMatrix3x3(T2 *arr) {
21
+ data[0][0] = arr[0];
22
+ data[0][1] = arr[1];
23
+ data[0][2] = arr[2];
24
+ data[1][0] = arr[3];
25
+ data[1][1] = arr[4];
26
+ data[1][2] = arr[5];
27
+ data[2][0] = arr[6];
28
+ data[2][1] = arr[7];
29
+ data[2][2] = arr[8];
30
+ }
31
+ DEVICE
32
+ TMatrix3x3(T v00, T v01, T v02,
33
+ T v10, T v11, T v12,
34
+ T v20, T v21, T v22) {
35
+ data[0][0] = v00;
36
+ data[0][1] = v01;
37
+ data[0][2] = v02;
38
+ data[1][0] = v10;
39
+ data[1][1] = v11;
40
+ data[1][2] = v12;
41
+ data[2][0] = v20;
42
+ data[2][1] = v21;
43
+ data[2][2] = v22;
44
+ }
45
+
46
+ DEVICE
47
+ const T& operator()(int i, int j) const {
48
+ return data[i][j];
49
+ }
50
+ DEVICE
51
+ T& operator()(int i, int j) {
52
+ return data[i][j];
53
+ }
54
+ DEVICE
55
+ static TMatrix3x3<T> identity() {
56
+ TMatrix3x3<T> m(1, 0, 0,
57
+ 0, 1, 0,
58
+ 0, 0, 1);
59
+ return m;
60
+ }
61
+
62
+ T data[3][3];
63
+ };
64
+
65
+ using Matrix3x3 = TMatrix3x3<Real>;
66
+ using Matrix3x3f = TMatrix3x3<float>;
67
+
68
+ template <typename T>
69
+ struct TMatrix4x4 {
70
+ DEVICE TMatrix4x4() {
71
+ for (int i = 0; i < 4; i++) {
72
+ for (int j = 0; j < 4; j++) {
73
+ data[i][j] = T(0);
74
+ }
75
+ }
76
+ }
77
+
78
+ template <typename T2>
79
+ DEVICE TMatrix4x4(const T2 *arr) {
80
+ for (int i = 0; i < 4; i++) {
81
+ for (int j = 0; j < 4; j++) {
82
+ data[i][j] = (T)arr[i * 4 + j];
83
+ }
84
+ }
85
+ }
86
+
87
+ template <typename T2>
88
+ DEVICE TMatrix4x4(const TMatrix4x4<T2> &m) {
89
+ for (int i = 0; i < 4; i++) {
90
+ for (int j = 0; j < 4; j++) {
91
+ data[i][j] = T(m.data[i][j]);
92
+ }
93
+ }
94
+ }
95
+
96
+ template <typename T2>
97
+ DEVICE TMatrix4x4(T2 v00, T2 v01, T2 v02, T2 v03,
98
+ T2 v10, T2 v11, T2 v12, T2 v13,
99
+ T2 v20, T2 v21, T2 v22, T2 v23,
100
+ T2 v30, T2 v31, T2 v32, T2 v33) {
101
+ data[0][0] = (T)v00;
102
+ data[0][1] = (T)v01;
103
+ data[0][2] = (T)v02;
104
+ data[0][3] = (T)v03;
105
+ data[1][0] = (T)v10;
106
+ data[1][1] = (T)v11;
107
+ data[1][2] = (T)v12;
108
+ data[1][3] = (T)v13;
109
+ data[2][0] = (T)v20;
110
+ data[2][1] = (T)v21;
111
+ data[2][2] = (T)v22;
112
+ data[2][3] = (T)v23;
113
+ data[3][0] = (T)v30;
114
+ data[3][1] = (T)v31;
115
+ data[3][2] = (T)v32;
116
+ data[3][3] = (T)v33;
117
+ }
118
+
119
+ DEVICE
120
+ const T& operator()(int i, int j) const {
121
+ return data[i][j];
122
+ }
123
+
124
+ DEVICE
125
+ T& operator()(int i, int j) {
126
+ return data[i][j];
127
+ }
128
+
129
+ DEVICE
130
+ static TMatrix4x4<T> identity() {
131
+ TMatrix4x4<T> m(1, 0, 0, 0,
132
+ 0, 1, 0, 0,
133
+ 0, 0, 1, 0,
134
+ 0, 0, 0, 1);
135
+ return m;
136
+ }
137
+
138
+ T data[4][4];
139
+ };
140
+
141
+ using Matrix4x4 = TMatrix4x4<Real>;
142
+ using Matrix4x4f = TMatrix4x4<float>;
143
+
144
+ template <typename T0, typename T1>
145
+ DEVICE
146
+ inline auto operator+(const TMatrix3x3<T0> &m0, const TMatrix3x3<T1> &m1) -> TMatrix3x3<decltype(m0(0, 0) + m1(0, 0))> {
147
+ TMatrix3x3<decltype(m0(0, 0) + m1(0, 0))> m;
148
+ for (int i = 0; i < 3; i++) {
149
+ for (int j = 0; j < 3; j++) {
150
+ m(i, j) = m0(i, j) + m1(i, j);
151
+ }
152
+ }
153
+ return m;
154
+ }
155
+
156
+ template <typename T0, typename T1>
157
+ DEVICE
158
+ inline auto operator-(const TMatrix3x3<T0> &m0, const TMatrix3x3<T1> &m1) -> TMatrix3x3<decltype(m0(0, 0) - m1(0, 0))> {
159
+ TMatrix3x3<decltype(m0(0, 0) - m1(0, 0))> m;
160
+ for (int i = 0; i < 3; i++) {
161
+ for (int j = 0; j < 3; j++) {
162
+ m(i, j) = m0(i, j) - m1(i, j);
163
+ }
164
+ }
165
+ return m;
166
+ }
167
+
168
+ template <typename T>
169
+ DEVICE
170
+ inline auto operator*(const TMatrix3x3<T> &m0, const TMatrix3x3<T> &m1) -> TMatrix3x3<T> {
171
+ TMatrix3x3<T> ret;
172
+ for (int i = 0; i < 3; i++) {
173
+ for (int j = 0; j < 3; j++) {
174
+ ret(i, j) = T(0);
175
+ for (int k = 0; k < 3; k++) {
176
+ ret(i, j) += m0(i, k) * m1(k, j);
177
+ }
178
+ }
179
+ }
180
+ return ret;
181
+ }
182
+
183
+ template <typename T>
184
+ DEVICE
185
+ inline auto operator*(const TVector3<T> &v, const TMatrix3x3<T> &m) -> TVector3<T> {
186
+ TVector3<T> ret;
187
+ for (int i = 0; i < 3; i++) {
188
+ ret[i] = T(0);
189
+ for (int j = 0; j < 3; j++) {
190
+ ret[i] += v[j] * m(j, i);
191
+ }
192
+ }
193
+ return ret;
194
+ }
195
+
196
+ template <typename T>
197
+ DEVICE
198
+ inline auto operator*(const TMatrix3x3<T> &m, const TVector3<T> &v) -> TVector3<T> {
199
+ TVector3<T> ret;
200
+ for (int i = 0; i < 3; i++) {
201
+ ret[i] = 0.f;
202
+ for (int j = 0; j < 3; j++) {
203
+ ret[i] += m(i, j) * v[j];
204
+ }
205
+ }
206
+ return ret;
207
+ }
208
+
209
+ template <typename T>
210
+ DEVICE
211
+ inline auto inverse(const TMatrix3x3<T> &m) -> TMatrix3x3<T> {
212
+ // computes the inverse of a matrix m
213
+ auto det = m(0, 0) * (m(1, 1) * m(2, 2) - m(2, 1) * m(1, 2)) -
214
+ m(0, 1) * (m(1, 0) * m(2, 2) - m(1, 2) * m(2, 0)) +
215
+ m(0, 2) * (m(1, 0) * m(2, 1) - m(1, 1) * m(2, 0));
216
+
217
+ auto invdet = 1 / det;
218
+
219
+ auto m_inv = TMatrix3x3<T>{};
220
+ m_inv(0, 0) = (m(1, 1) * m(2, 2) - m(2, 1) * m(1, 2)) * invdet;
221
+ m_inv(0, 1) = (m(0, 2) * m(2, 1) - m(0, 1) * m(2, 2)) * invdet;
222
+ m_inv(0, 2) = (m(0, 1) * m(1, 2) - m(0, 2) * m(1, 1)) * invdet;
223
+ m_inv(1, 0) = (m(1, 2) * m(2, 0) - m(1, 0) * m(2, 2)) * invdet;
224
+ m_inv(1, 1) = (m(0, 0) * m(2, 2) - m(0, 2) * m(2, 0)) * invdet;
225
+ m_inv(1, 2) = (m(1, 0) * m(0, 2) - m(0, 0) * m(1, 2)) * invdet;
226
+ m_inv(2, 0) = (m(1, 0) * m(2, 1) - m(2, 0) * m(1, 1)) * invdet;
227
+ m_inv(2, 1) = (m(2, 0) * m(0, 1) - m(0, 0) * m(2, 1)) * invdet;
228
+ m_inv(2, 2) = (m(0, 0) * m(1, 1) - m(1, 0) * m(0, 1)) * invdet;
229
+ return m_inv;
230
+ }
231
+
232
+ template <typename T0, typename T1>
233
+ DEVICE
234
+ inline auto operator+(const TMatrix4x4<T0> &m0, const TMatrix4x4<T1> &m1) -> TMatrix4x4<decltype(m0(0, 0) + m1(0, 0))> {
235
+ TMatrix4x4<decltype(m0(0, 0) + m1(0, 0))> m;
236
+ for (int i = 0; i < 4; i++) {
237
+ for (int j = 0; j < 4; j++) {
238
+ m(i, j) = m0(i, j) + m1(i, j);
239
+ }
240
+ }
241
+ return m;
242
+ }
243
+
244
+ template <typename T>
245
+ DEVICE
246
+ TMatrix3x3<T> transpose(const TMatrix3x3<T> &m) {
247
+ return TMatrix3x3<T>(m(0, 0), m(1, 0), m(2, 0),
248
+ m(0, 1), m(1, 1), m(2, 1),
249
+ m(0, 2), m(1, 2), m(2, 2));
250
+ }
251
+
252
+ template <typename T>
253
+ DEVICE
254
+ TMatrix4x4<T> transpose(const TMatrix4x4<T> &m) {
255
+ return TMatrix4x4<T>(m(0, 0), m(1, 0), m(2, 0), m(3, 0),
256
+ m(0, 1), m(1, 1), m(2, 1), m(3, 1),
257
+ m(0, 2), m(1, 2), m(2, 2), m(3, 2),
258
+ m(0, 3), m(1, 3), m(2, 3), m(3, 3));
259
+ }
260
+
261
+ template <typename T>
262
+ DEVICE
263
+ inline TMatrix3x3<T> operator-(const TMatrix3x3<T> &m0) {
264
+ TMatrix3x3<T> m;
265
+ for (int i = 0; i < 3; i++) {
266
+ for (int j = 0; j < 3; j++) {
267
+ m(i, j) = -m0(i, j);
268
+ }
269
+ }
270
+ return m;
271
+ }
272
+
273
+ template <typename T>
274
+ DEVICE
275
+ inline TMatrix4x4<T> operator-(const TMatrix4x4<T> &m0) {
276
+ TMatrix4x4<T> m;
277
+ for (int i = 0; i < 4; i++) {
278
+ for (int j = 0; j < 4; j++) {
279
+ m(i, j) = -m0(i, j);
280
+ }
281
+ }
282
+ return m;
283
+ }
284
+
285
+ template <typename T>
286
+ DEVICE
287
+ inline TMatrix4x4<T> operator-(const TMatrix4x4<T> &m0, const TMatrix4x4<T> &m1) {
288
+ TMatrix4x4<T> m;
289
+ for (int i = 0; i < 4; i++) {
290
+ for (int j = 0; j < 4; j++) {
291
+ m(i, j) = m0(i, j) - m1(i, j);
292
+ }
293
+ }
294
+ return m;
295
+ }
296
+
297
+ template <typename T>
298
+ DEVICE
299
+ inline TMatrix3x3<T>& operator+=(TMatrix3x3<T> &m0, const TMatrix3x3<T> &m1) {
300
+ for (int i = 0; i < 3; i++) {
301
+ for (int j = 0; j < 3; j++) {
302
+ m0(i, j) += m1(i, j);
303
+ }
304
+ }
305
+ return m0;
306
+ }
307
+
308
+ template <typename T>
309
+ DEVICE
310
+ inline TMatrix4x4<T>& operator+=(TMatrix4x4<T> &m0, const TMatrix4x4<T> &m1) {
311
+ for (int i = 0; i < 4; i++) {
312
+ for (int j = 0; j < 4; j++) {
313
+ m0(i, j) += m1(i, j);
314
+ }
315
+ }
316
+ return m0;
317
+ }
318
+
319
+ template <typename T>
320
+ DEVICE
321
+ inline TMatrix4x4<T>& operator-=(TMatrix4x4<T> &m0, const TMatrix4x4<T> &m1) {
322
+ for (int i = 0; i < 4; i++) {
323
+ for (int j = 0; j < 4; j++) {
324
+ m0(i, j) -= m1(i, j);
325
+ }
326
+ }
327
+ return m0;
328
+ }
329
+
330
+ template <typename T>
331
+ DEVICE
332
+ inline TMatrix4x4<T> operator*(const TMatrix4x4<T> &m0, const TMatrix4x4<T> &m1) {
333
+ TMatrix4x4<T> m;
334
+ for (int i = 0; i < 4; i++) {
335
+ for (int j = 0; j < 4; j++) {
336
+ for (int k = 0; k < 4; k++) {
337
+ m(i, j) += m0(i, k) * m1(k, j);
338
+ }
339
+ }
340
+ }
341
+ return m;
342
+ }
343
+
344
+ template <typename T>
345
+ DEVICE
346
+ TMatrix4x4<T> inverse(const TMatrix4x4<T> &m) {
347
+ // https://stackoverflow.com/questions/1148309/inverting-a-4x4-matrix
348
+ TMatrix4x4<T> inv;
349
+
350
+ inv(0, 0) = m(1, 1) * m(2, 2) * m(3, 3) -
351
+ m(1, 1) * m(2, 3) * m(3, 2) -
352
+ m(2, 1) * m(1, 2) * m(3, 3) +
353
+ m(2, 1) * m(1, 3) * m(3, 2) +
354
+ m(3, 1) * m(1, 2) * m(2, 3) -
355
+ m(3, 1) * m(1, 3) * m(2, 2);
356
+
357
+ inv(1, 0) = -m(1, 0) * m(2, 2) * m(3, 3) +
358
+ m(1, 0) * m(2, 3) * m(3, 2) +
359
+ m(2, 0) * m(1, 2) * m(3, 3) -
360
+ m(2, 0) * m(1, 3) * m(3, 2) -
361
+ m(3, 0) * m(1, 2) * m(2, 3) +
362
+ m(3, 0) * m(1, 3) * m(2, 2);
363
+
364
+ inv(2, 0) = m(1, 0) * m(2, 1) * m(3, 3) -
365
+ m(1, 0) * m(2, 3) * m(3, 1) -
366
+ m(2, 0) * m(1, 1) * m(3, 3) +
367
+ m(2, 0) * m(1, 3) * m(3, 1) +
368
+ m(3, 0) * m(1, 1) * m(2, 3) -
369
+ m(3, 0) * m(1, 3) * m(2, 1);
370
+
371
+ inv(3, 0) = -m(1, 0) * m(2, 1) * m(3, 2) +
372
+ m(1, 0) * m(2, 2) * m(3, 1) +
373
+ m(2, 0) * m(1, 1) * m(3, 2) -
374
+ m(2, 0) * m(1, 2) * m(3, 1) -
375
+ m(3, 0) * m(1, 1) * m(2, 2) +
376
+ m(3, 0) * m(1, 2) * m(2, 1);
377
+
378
+ inv(0, 1) = -m(0, 1) * m(2, 2) * m(3, 3) +
379
+ m(0, 1) * m(2, 3) * m(3, 2) +
380
+ m(2, 1) * m(0, 2) * m(3, 3) -
381
+ m(2, 1) * m(0, 3) * m(3, 2) -
382
+ m(3, 1) * m(0, 2) * m(2, 3) +
383
+ m(3, 1) * m(0, 3) * m(2, 2);
384
+
385
+ inv(1, 1) = m(0, 0) * m(2, 2) * m(3, 3) -
386
+ m(0, 0) * m(2, 3) * m(3, 2) -
387
+ m(2, 0) * m(0, 2) * m(3, 3) +
388
+ m(2, 0) * m(0, 3) * m(3, 2) +
389
+ m(3, 0) * m(0, 2) * m(2, 3) -
390
+ m(3, 0) * m(0, 3) * m(2, 2);
391
+
392
+ inv(2, 1) = -m(0, 0) * m(2, 1) * m(3, 3) +
393
+ m(0, 0) * m(2, 3) * m(3, 1) +
394
+ m(2, 0) * m(0, 1) * m(3, 3) -
395
+ m(2, 0) * m(0, 3) * m(3, 1) -
396
+ m(3, 0) * m(0, 1) * m(2, 3) +
397
+ m(3, 0) * m(0, 3) * m(2, 1);
398
+
399
+ inv(3, 1) = m(0, 0) * m(2, 1) * m(3, 2) -
400
+ m(0, 0) * m(2, 2) * m(3, 1) -
401
+ m(2, 0) * m(0, 1) * m(3, 2) +
402
+ m(2, 0) * m(0, 2) * m(3, 1) +
403
+ m(3, 0) * m(0, 1) * m(2, 2) -
404
+ m(3, 0) * m(0, 2) * m(2, 1);
405
+
406
+ inv(0, 2) = m(0, 1) * m(1, 2) * m(3, 3) -
407
+ m(0, 1) * m(1, 3) * m(3, 2) -
408
+ m(1, 1) * m(0, 2) * m(3, 3) +
409
+ m(1, 1) * m(0, 3) * m(3, 2) +
410
+ m(3, 1) * m(0, 2) * m(1, 3) -
411
+ m(3, 1) * m(0, 3) * m(1, 2);
412
+
413
+ inv(1, 2) = -m(0, 0) * m(1, 2) * m(3, 3) +
414
+ m(0, 0) * m(1, 3) * m(3, 2) +
415
+ m(1, 0) * m(0, 2) * m(3, 3) -
416
+ m(1, 0) * m(0, 3) * m(3, 2) -
417
+ m(3, 0) * m(0, 2) * m(1, 3) +
418
+ m(3, 0) * m(0, 3) * m(1, 2);
419
+
420
+ inv(2, 2) = m(0, 0) * m(1, 1) * m(3, 3) -
421
+ m(0, 0) * m(1, 3) * m(3, 1) -
422
+ m(1, 0) * m(0, 1) * m(3, 3) +
423
+ m(1, 0) * m(0, 3) * m(3, 1) +
424
+ m(3, 0) * m(0, 1) * m(1, 3) -
425
+ m(3, 0) * m(0, 3) * m(1, 1);
426
+
427
+ inv(3, 2) = -m(0, 0) * m(1, 1) * m(3, 2) +
428
+ m(0, 0) * m(1, 2) * m(3, 1) +
429
+ m(1, 0) * m(0, 1) * m(3, 2) -
430
+ m(1, 0) * m(0, 2) * m(3, 1) -
431
+ m(3, 0) * m(0, 1) * m(1, 2) +
432
+ m(3, 0) * m(0, 2) * m(1, 1);
433
+
434
+ inv(0, 3) = -m(0, 1) * m(1, 2) * m(2, 3) +
435
+ m(0, 1) * m(1, 3) * m(2, 2) +
436
+ m(1, 1) * m(0, 2) * m(2, 3) -
437
+ m(1, 1) * m(0, 3) * m(2, 2) -
438
+ m(2, 1) * m(0, 2) * m(1, 3) +
439
+ m(2, 1) * m(0, 3) * m(1, 2);
440
+
441
+ inv(1, 3) = m(0, 0) * m(1, 2) * m(2, 3) -
442
+ m(0, 0) * m(1, 3) * m(2, 2) -
443
+ m(1, 0) * m(0, 2) * m(2, 3) +
444
+ m(1, 0) * m(0, 3) * m(2, 2) +
445
+ m(2, 0) * m(0, 2) * m(1, 3) -
446
+ m(2, 0) * m(0, 3) * m(1, 2);
447
+
448
+ inv(2, 3) = -m(0, 0) * m(1, 1) * m(2, 3) +
449
+ m(0, 0) * m(1, 3) * m(2, 1) +
450
+ m(1, 0) * m(0, 1) * m(2, 3) -
451
+ m(1, 0) * m(0, 3) * m(2, 1) -
452
+ m(2, 0) * m(0, 1) * m(1, 3) +
453
+ m(2, 0) * m(0, 3) * m(1, 1);
454
+
455
+ inv(3, 3) = m(0, 0) * m(1, 1) * m(2, 2) -
456
+ m(0, 0) * m(1, 2) * m(2, 1) -
457
+ m(1, 0) * m(0, 1) * m(2, 2) +
458
+ m(1, 0) * m(0, 2) * m(2, 1) +
459
+ m(2, 0) * m(0, 1) * m(1, 2) -
460
+ m(2, 0) * m(0, 2) * m(1, 1);
461
+
462
+ auto det = m(0, 0) * inv(0, 0) +
463
+ m(0, 1) * inv(1, 0) +
464
+ m(0, 2) * inv(2, 0) +
465
+ m(0, 3) * inv(3, 0);
466
+
467
+ if (det == 0) {
468
+ return TMatrix4x4<T>{};
469
+ }
470
+
471
+ auto inv_det = 1.0 / det;
472
+
473
+ for (int i = 0; i < 4; i++) {
474
+ for (int j = 0; j < 4; j++) {
475
+ inv(i, j) *= inv_det;
476
+ }
477
+ }
478
+
479
+ return inv;
480
+ }
481
+
482
+ template <typename T>
483
+ inline std::ostream& operator<<(std::ostream &os, const TMatrix3x3<T> &m) {
484
+ for (int i = 0; i < 3; i++) {
485
+ for (int j = 0; j < 3; j++) {
486
+ os << m(i, j) << " ";
487
+ }
488
+ os << std::endl;
489
+ }
490
+ return os;
491
+ }
492
+
493
+ template <typename T>
494
+ inline std::ostream& operator<<(std::ostream &os, const TMatrix4x4<T> &m) {
495
+ for (int i = 0; i < 4; i++) {
496
+ for (int j = 0; j < 4; j++) {
497
+ os << m(i, j) << " ";
498
+ }
499
+ os << std::endl;
500
+ }
501
+ return os;
502
+ }
503
+
504
+ template <typename T>
505
+ DEVICE
506
+ TVector2<T> xform_pt(const TMatrix3x3<T> &m, const TVector2<T> &pt) {
507
+ TVector3<T> t{m(0, 0) * pt[0] + m(0, 1) * pt[1] + m(0, 2),
508
+ m(1, 0) * pt[0] + m(1, 1) * pt[1] + m(1, 2),
509
+ m(2, 0) * pt[0] + m(2, 1) * pt[1] + m(2, 2)};
510
+ return TVector2<T>{t[0] / t[2], t[1] / t[2]};
511
+ }
512
+
513
+ template <typename T>
514
+ DEVICE
515
+ void d_xform_pt(const TMatrix3x3<T> &m, const TVector2<T> &pt,
516
+ const TVector2<T> &d_out,
517
+ TMatrix3x3<T> &d_m,
518
+ TVector2<T> &d_pt) {
519
+ TVector3<T> t{m(0, 0) * pt[0] + m(0, 1) * pt[1] + m(0, 2),
520
+ m(1, 0) * pt[0] + m(1, 1) * pt[1] + m(1, 2),
521
+ m(2, 0) * pt[0] + m(2, 1) * pt[1] + m(2, 2)};
522
+ auto out = TVector2<T>{t[0] / t[2], t[1] / t[2]};
523
+ TVector3<T> d_t{d_out[0] / t[2],
524
+ d_out[1] / t[2],
525
+ -(d_out[0] * out[0] + d_out[1] * out[1]) / t[2]};
526
+ d_m(0, 0) += d_t[0] * pt[0];
527
+ d_m(0, 1) += d_t[0] * pt[1];
528
+ d_m(0, 2) += d_t[0];
529
+ d_m(1, 0) += d_t[1] * pt[0];
530
+ d_m(1, 1) += d_t[1] * pt[1];
531
+ d_m(1, 2) += d_t[1];
532
+ d_m(2, 0) += d_t[2] * pt[0];
533
+ d_m(2, 1) += d_t[2] * pt[1];
534
+ d_m(2, 2) += d_t[2];
535
+ d_pt[0] += d_t[0] * m(0, 0) + d_t[1] * m(1, 0) + d_t[2] * m(2, 0);
536
+ d_pt[1] += d_t[0] * m(0, 1) + d_t[1] * m(1, 1) + d_t[2] * m(2, 1);
537
+ }
538
+
539
+ template <typename T>
540
+ DEVICE
541
+ TVector2<T> xform_normal(const TMatrix3x3<T> &m_inv, const TVector2<T> &n) {
542
+ return normalize(TVector2<T>{m_inv(0, 0) * n[0] + m_inv(1, 0) * n[1],
543
+ m_inv(0, 1) * n[0] + m_inv(1, 1) * n[1]});
544
+ }
DiffVG/painterly_rendering.py ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Scream: python painterly_rendering.py imgs/scream.jpg --num_paths 2048 --max_width 4.0
3
+ Fallingwater: python painterly_rendering.py imgs/fallingwater.jpg --num_paths 2048 --max_width 4.0
4
+ Fallingwater: python painterly_rendering.py imgs/fallingwater.jpg --num_paths 2048 --max_width 4.0 --use_lpips_loss
5
+ Baboon: python painterly_rendering.py imgs/baboon.png --num_paths 1024 --max_width 4.0 --num_iter 250
6
+ Baboon Lpips: python painterly_rendering.py imgs/baboon.png --num_paths 1024 --max_width 4.0 --num_iter 500 --use_lpips_loss
7
+ smile: python painterly_rendering.py ../LIVE/figures/smile.png --num_paths 5 --use_blob --num_iter 500
8
+ """
9
+ import pydiffvg
10
+ import torch
11
+ import skimage
12
+ import skimage.io
13
+ import random
14
+ import ttools.modules
15
+ import argparse
16
+ import math
17
+
18
+ pydiffvg.set_print_timing(True)
19
+
20
+ gamma = 1.0
21
+
22
+ def main(args):
23
+ # Use GPU if available
24
+ pydiffvg.set_use_gpu(torch.cuda.is_available())
25
+
26
+ perception_loss = ttools.modules.LPIPS().to(pydiffvg.get_device())
27
+
28
+ #target = torch.from_numpy(skimage.io.imread('imgs/lena.png')).to(torch.float32) / 255.0
29
+ target = torch.from_numpy(skimage.io.imread(args.target)).to(torch.float32) / 255.0
30
+ target = target.pow(gamma)
31
+ target = target.to(pydiffvg.get_device())
32
+ target = target.unsqueeze(0)
33
+ target = target.permute(0, 3, 1, 2) # NHWC -> NCHW
34
+ #target = torch.nn.functional.interpolate(target, size = [256, 256], mode = 'area')
35
+ canvas_width, canvas_height = target.shape[3], target.shape[2]
36
+ num_paths = args.num_paths
37
+ max_width = args.max_width
38
+
39
+ random.seed(1234)
40
+ torch.manual_seed(1234)
41
+
42
+ shapes = []
43
+ shape_groups = []
44
+ if args.use_blob:
45
+ for i in range(num_paths):
46
+ num_segments = random.randint(3, 5)
47
+ num_control_points = torch.zeros(num_segments, dtype = torch.int32) + 2
48
+ points = []
49
+ p0 = (random.random(), random.random())
50
+ points.append(p0)
51
+ for j in range(num_segments):
52
+ radius = 0.05
53
+ p1 = (p0[0] + radius * (random.random() - 0.5), p0[1] + radius * (random.random() - 0.5))
54
+ p2 = (p1[0] + radius * (random.random() - 0.5), p1[1] + radius * (random.random() - 0.5))
55
+ p3 = (p2[0] + radius * (random.random() - 0.5), p2[1] + radius * (random.random() - 0.5))
56
+ points.append(p1)
57
+ points.append(p2)
58
+ if j < num_segments - 1:
59
+ points.append(p3)
60
+ p0 = p3
61
+ points = torch.tensor(points)
62
+ points[:, 0] *= canvas_width
63
+ points[:, 1] *= canvas_height
64
+ path = pydiffvg.Path(num_control_points = num_control_points,
65
+ points = points,
66
+ stroke_width = torch.tensor(1.0),
67
+ is_closed = True)
68
+ shapes.append(path)
69
+ path_group = pydiffvg.ShapeGroup(shape_ids = torch.tensor([len(shapes) - 1]),
70
+ fill_color = torch.tensor([random.random(),
71
+ random.random(),
72
+ random.random(),
73
+ random.random()]))
74
+ shape_groups.append(path_group)
75
+ else:
76
+ for i in range(num_paths):
77
+ num_segments = random.randint(1, 3)
78
+ num_control_points = torch.zeros(num_segments, dtype = torch.int32) + 2
79
+ points = []
80
+ p0 = (random.random(), random.random())
81
+ points.append(p0)
82
+ for j in range(num_segments):
83
+ radius = 0.05
84
+ p1 = (p0[0] + radius * (random.random() - 0.5), p0[1] + radius * (random.random() - 0.5))
85
+ p2 = (p1[0] + radius * (random.random() - 0.5), p1[1] + radius * (random.random() - 0.5))
86
+ p3 = (p2[0] + radius * (random.random() - 0.5), p2[1] + radius * (random.random() - 0.5))
87
+ points.append(p1)
88
+ points.append(p2)
89
+ points.append(p3)
90
+ p0 = p3
91
+ points = torch.tensor(points)
92
+ points[:, 0] *= canvas_width
93
+ points[:, 1] *= canvas_height
94
+ #points = torch.rand(3 * num_segments + 1, 2) * min(canvas_width, canvas_height)
95
+ path = pydiffvg.Path(num_control_points = num_control_points,
96
+ points = points,
97
+ stroke_width = torch.tensor(1.0),
98
+ is_closed = False)
99
+ shapes.append(path)
100
+ path_group = pydiffvg.ShapeGroup(shape_ids = torch.tensor([len(shapes) - 1]),
101
+ fill_color = None,
102
+ stroke_color = torch.tensor([random.random(),
103
+ random.random(),
104
+ random.random(),
105
+ random.random()]))
106
+ shape_groups.append(path_group)
107
+
108
+ scene_args = pydiffvg.RenderFunction.serialize_scene(\
109
+ canvas_width, canvas_height, shapes, shape_groups)
110
+
111
+ render = pydiffvg.RenderFunction.apply
112
+ img = render(canvas_width, # width
113
+ canvas_height, # height
114
+ 2, # num_samples_x
115
+ 2, # num_samples_y
116
+ 0, # seed
117
+ None,
118
+ *scene_args)
119
+ pydiffvg.imwrite(img.cpu(), 'results/painterly_rendering/init.png', gamma=gamma)
120
+
121
+ points_vars = []
122
+ stroke_width_vars = []
123
+ color_vars = []
124
+ for path in shapes:
125
+ path.points.requires_grad = True
126
+ points_vars.append(path.points)
127
+ if not args.use_blob:
128
+ for path in shapes:
129
+ path.stroke_width.requires_grad = True
130
+ stroke_width_vars.append(path.stroke_width)
131
+ if args.use_blob:
132
+ for group in shape_groups:
133
+ group.fill_color.requires_grad = True
134
+ color_vars.append(group.fill_color)
135
+ else:
136
+ for group in shape_groups:
137
+ group.stroke_color.requires_grad = True
138
+ color_vars.append(group.stroke_color)
139
+
140
+ # Optimize
141
+ points_optim = torch.optim.Adam(points_vars, lr=1.0)
142
+ if len(stroke_width_vars) > 0:
143
+ width_optim = torch.optim.Adam(stroke_width_vars, lr=0.1)
144
+ color_optim = torch.optim.Adam(color_vars, lr=0.01)
145
+ # Adam iterations.
146
+ for t in range(args.num_iter):
147
+ print('iteration:', t)
148
+ points_optim.zero_grad()
149
+ if len(stroke_width_vars) > 0:
150
+ width_optim.zero_grad()
151
+ color_optim.zero_grad()
152
+ # Forward pass: render the image.
153
+ scene_args = pydiffvg.RenderFunction.serialize_scene(\
154
+ canvas_width, canvas_height, shapes, shape_groups)
155
+ img = render(canvas_width, # width
156
+ canvas_height, # height
157
+ 2, # num_samples_x
158
+ 2, # num_samples_y
159
+ t, # seed
160
+ None,
161
+ *scene_args)
162
+ # Compose img with white background
163
+ img = img[:, :, 3:4] * img[:, :, :3] + torch.ones(img.shape[0], img.shape[1], 3, device = pydiffvg.get_device()) * (1 - img[:, :, 3:4])
164
+ # Save the intermediate render.
165
+ pydiffvg.imwrite(img.cpu(), 'results/painterly_rendering/iter_{}.png'.format(t), gamma=gamma)
166
+ img = img[:, :, :3]
167
+ # Convert img from HWC to NCHW
168
+ img = img.unsqueeze(0)
169
+ img = img.permute(0, 3, 1, 2) # NHWC -> NCHW
170
+ if args.use_lpips_loss:
171
+ loss = perception_loss(img, target) + (img.mean() - target.mean()).pow(2)
172
+ else:
173
+ loss = (img - target).pow(2).mean()
174
+ print('render loss:', loss.item())
175
+
176
+ # Backpropagate the gradients.
177
+ loss.backward()
178
+
179
+ # Take a gradient descent step.
180
+ points_optim.step()
181
+ if len(stroke_width_vars) > 0:
182
+ width_optim.step()
183
+ color_optim.step()
184
+ if len(stroke_width_vars) > 0:
185
+ for path in shapes:
186
+ path.stroke_width.data.clamp_(1.0, max_width)
187
+ if args.use_blob:
188
+ for group in shape_groups:
189
+ group.fill_color.data.clamp_(0.0, 1.0)
190
+ else:
191
+ for group in shape_groups:
192
+ group.stroke_color.data.clamp_(0.0, 1.0)
193
+
194
+ if t % 10 == 0 or t == args.num_iter - 1:
195
+ pydiffvg.save_svg('results/painterly_rendering/iter_{}.svg'.format(t),
196
+ canvas_width, canvas_height, shapes, shape_groups)
197
+
198
+ # Render the final result.
199
+ img = render(target.shape[1], # width
200
+ target.shape[0], # height
201
+ 2, # num_samples_x
202
+ 2, # num_samples_y
203
+ 0, # seed
204
+ None,
205
+ *scene_args)
206
+ # Save the intermediate render.
207
+ pydiffvg.imwrite(img.cpu(), 'results/painterly_rendering/final.png'.format(t), gamma=gamma)
208
+ # Convert the intermediate renderings to a video.
209
+ from subprocess import call
210
+ call(["ffmpeg", "-framerate", "24", "-i",
211
+ "results/painterly_rendering/iter_%d.png", "-vb", "20M",
212
+ "results/painterly_rendering/out.mp4"])
213
+
214
+ if __name__ == "__main__":
215
+ parser = argparse.ArgumentParser()
216
+ parser.add_argument("target", help="target image path")
217
+ parser.add_argument("--num_paths", type=int, default=512)
218
+ parser.add_argument("--max_width", type=float, default=2.0)
219
+ parser.add_argument("--use_lpips_loss", dest='use_lpips_loss', action='store_true')
220
+ parser.add_argument("--num_iter", type=int, default=500)
221
+ parser.add_argument("--use_blob", dest='use_blob', action='store_true')
222
+ args = parser.parse_args()
223
+ main(args)
DiffVG/parallel.cpp ADDED
@@ -0,0 +1,273 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include "parallel.h"
2
+ #include <list>
3
+ #include <thread>
4
+ #include <condition_variable>
5
+ #include <vector>
6
+ #include <cassert>
7
+
8
+ // From https://github.com/mmp/pbrt-v3/blob/master/src/core/parallel.cpp
9
+
10
+ static std::vector<std::thread> threads;
11
+ static bool shutdownThreads = false;
12
+ struct ParallelForLoop;
13
+ static ParallelForLoop *workList = nullptr;
14
+ static std::mutex workListMutex;
15
+
16
+ struct ParallelForLoop {
17
+ ParallelForLoop(std::function<void(int64_t)> func1D, int64_t maxIndex, int chunkSize)
18
+ : func1D(std::move(func1D)), maxIndex(maxIndex), chunkSize(chunkSize) {
19
+ }
20
+ ParallelForLoop(const std::function<void(Vector2i)> &f, const Vector2i count)
21
+ : func2D(f), maxIndex(count[0] * count[1]), chunkSize(1) {
22
+ nX = count[0];
23
+ }
24
+
25
+ std::function<void(int64_t)> func1D;
26
+ std::function<void(Vector2i)> func2D;
27
+ const int64_t maxIndex;
28
+ const int chunkSize;
29
+ int64_t nextIndex = 0;
30
+ int activeWorkers = 0;
31
+ ParallelForLoop *next = nullptr;
32
+ int nX = -1;
33
+
34
+ bool Finished() const {
35
+ return nextIndex >= maxIndex && activeWorkers == 0;
36
+ }
37
+ };
38
+
39
+ void Barrier::Wait() {
40
+ std::unique_lock<std::mutex> lock(mutex);
41
+ assert(count > 0);
42
+ if (--count == 0) {
43
+ // This is the last thread to reach the barrier; wake up all of the
44
+ // other ones before exiting.
45
+ cv.notify_all();
46
+ } else {
47
+ // Otherwise there are still threads that haven't reached it. Give
48
+ // up the lock and wait to be notified.
49
+ cv.wait(lock, [this] { return count == 0; });
50
+ }
51
+ }
52
+
53
+ static std::condition_variable workListCondition;
54
+
55
+ static void worker_thread_func(const int tIndex, std::shared_ptr<Barrier> barrier) {
56
+ ThreadIndex = tIndex;
57
+
58
+ // The main thread sets up a barrier so that it can be sure that all
59
+ // workers have called ProfilerWorkerThreadInit() before it continues
60
+ // (and actually starts the profiling system).
61
+ barrier->Wait();
62
+
63
+ // Release our reference to the Barrier so that it's freed once all of
64
+ // the threads have cleared it.
65
+ barrier.reset();
66
+
67
+ std::unique_lock<std::mutex> lock(workListMutex);
68
+ while (!shutdownThreads) {
69
+ if (!workList) {
70
+ // Sleep until there are more tasks to run
71
+ workListCondition.wait(lock);
72
+ } else {
73
+ // Get work from _workList_ and run loop iterations
74
+ ParallelForLoop &loop = *workList;
75
+
76
+ // Run a chunk of loop iterations for _loop_
77
+
78
+ // Find the set of loop iterations to run next
79
+ int64_t indexStart = loop.nextIndex;
80
+ int64_t indexEnd = std::min(indexStart + loop.chunkSize, loop.maxIndex);
81
+
82
+ // Update _loop_ to reflect iterations this thread will run
83
+ loop.nextIndex = indexEnd;
84
+ if (loop.nextIndex == loop.maxIndex)
85
+ workList = loop.next;
86
+ loop.activeWorkers++;
87
+
88
+ // Run loop indices in _[indexStart, indexEnd)_
89
+ lock.unlock();
90
+ for (int64_t index = indexStart; index < indexEnd; ++index) {
91
+ if (loop.func1D) {
92
+ loop.func1D(index);
93
+ }
94
+ // Handle other types of loops
95
+ else {
96
+ assert(loop.func2D != nullptr);
97
+ loop.func2D(Vector2i{int(index % loop.nX),
98
+ int(index / loop.nX)});
99
+ }
100
+ }
101
+ lock.lock();
102
+
103
+ // Update _loop_ to reflect completion of iterations
104
+ loop.activeWorkers--;
105
+ if (loop.Finished()) {
106
+ workListCondition.notify_all();
107
+ }
108
+ }
109
+ }
110
+ }
111
+
112
+ void parallel_for_host(const std::function<void(int64_t)> &func,
113
+ int64_t count,
114
+ int chunkSize) {
115
+ // Run iterations immediately if not using threads or if _count_ is small
116
+ if (threads.empty() || count < chunkSize) {
117
+ for (int64_t i = 0; i < count; ++i) {
118
+ func(i);
119
+ }
120
+ return;
121
+ }
122
+
123
+ // Create and enqueue _ParallelForLoop_ for this loop
124
+ ParallelForLoop loop(func, count, chunkSize);
125
+ workListMutex.lock();
126
+ loop.next = workList;
127
+ workList = &loop;
128
+ workListMutex.unlock();
129
+
130
+ // Notify worker threads of work to be done
131
+ std::unique_lock<std::mutex> lock(workListMutex);
132
+ workListCondition.notify_all();
133
+
134
+ // Help out with parallel loop iterations in the current thread
135
+ while (!loop.Finished()) {
136
+ // Run a chunk of loop iterations for _loop_
137
+
138
+ // Find the set of loop iterations to run next
139
+ int64_t indexStart = loop.nextIndex;
140
+ int64_t indexEnd = std::min(indexStart + loop.chunkSize, loop.maxIndex);
141
+
142
+ // Update _loop_ to reflect iterations this thread will run
143
+ loop.nextIndex = indexEnd;
144
+ if (loop.nextIndex == loop.maxIndex) {
145
+ workList = loop.next;
146
+ }
147
+ loop.activeWorkers++;
148
+
149
+ // Run loop indices in _[indexStart, indexEnd)_
150
+ lock.unlock();
151
+ for (int64_t index = indexStart; index < indexEnd; ++index) {
152
+ if (loop.func1D) {
153
+ loop.func1D(index);
154
+ }
155
+ // Handle other types of loops
156
+ else {
157
+ assert(loop.func2D != nullptr);
158
+ loop.func2D(Vector2i{int(index % loop.nX),
159
+ int(index / loop.nX)});
160
+ }
161
+ }
162
+ lock.lock();
163
+
164
+ // Update _loop_ to reflect completion of iterations
165
+ loop.activeWorkers--;
166
+ }
167
+ }
168
+
169
+ thread_local int ThreadIndex;
170
+
171
+ void parallel_for_host(
172
+ std::function<void(Vector2i)> func, const Vector2i count) {
173
+ // Launch worker threads if needed
174
+ if (threads.empty() || count.x * count.y <= 1) {
175
+ for (int y = 0; y < count.y; ++y) {
176
+ for (int x = 0; x < count.x; ++x) {
177
+ func(Vector2i{x, y});
178
+ }
179
+ }
180
+ return;
181
+ }
182
+
183
+ ParallelForLoop loop(std::move(func), count);
184
+ {
185
+ std::lock_guard<std::mutex> lock(workListMutex);
186
+ loop.next = workList;
187
+ workList = &loop;
188
+ }
189
+
190
+ std::unique_lock<std::mutex> lock(workListMutex);
191
+ workListCondition.notify_all();
192
+
193
+ // Help out with parallel loop iterations in the current thread
194
+ while (!loop.Finished()) {
195
+ // Run a chunk of loop iterations for _loop_
196
+
197
+ // Find the set of loop iterations to run next
198
+ int64_t indexStart = loop.nextIndex;
199
+ int64_t indexEnd = std::min(indexStart + loop.chunkSize, loop.maxIndex);
200
+
201
+ // Update _loop_ to reflect iterations this thread will run
202
+ loop.nextIndex = indexEnd;
203
+ if (loop.nextIndex == loop.maxIndex) {
204
+ workList = loop.next;
205
+ }
206
+ loop.activeWorkers++;
207
+
208
+ // Run loop indices in _[indexStart, indexEnd)_
209
+ lock.unlock();
210
+ for (int64_t index = indexStart; index < indexEnd; ++index) {
211
+ if (loop.func1D) {
212
+ loop.func1D(index);
213
+ }
214
+ // Handle other types of loops
215
+ else {
216
+ assert(loop.func2D != nullptr);
217
+ loop.func2D(Vector2i{int(index % loop.nX),
218
+ int(index / loop.nX)});
219
+ }
220
+ }
221
+ lock.lock();
222
+
223
+ // Update _loop_ to reflect completion of iterations
224
+ loop.activeWorkers--;
225
+ }
226
+ }
227
+
228
+ int num_system_cores() {
229
+ // return 1;
230
+ int ret = std::thread::hardware_concurrency();
231
+ if (ret == 0) {
232
+ return 16;
233
+ }
234
+ return ret;
235
+ }
236
+
237
+ void parallel_init() {
238
+ assert(threads.size() == 0);
239
+ int nThreads = num_system_cores();
240
+ ThreadIndex = 0;
241
+
242
+ // Create a barrier so that we can be sure all worker threads get past
243
+ // their call to ProfilerWorkerThreadInit() before we return from this
244
+ // function. In turn, we can be sure that the profiling system isn't
245
+ // started until after all worker threads have done that.
246
+ std::shared_ptr<Barrier> barrier = std::make_shared<Barrier>(nThreads);
247
+
248
+ // Launch one fewer worker thread than the total number we want doing
249
+ // work, since the main thread helps out, too.
250
+ for (int i = 0; i < nThreads - 1; ++i) {
251
+ threads.push_back(std::thread(worker_thread_func, i + 1, barrier));
252
+ }
253
+
254
+ barrier->Wait();
255
+ }
256
+
257
+ void parallel_cleanup() {
258
+ if (threads.empty()) {
259
+ return;
260
+ }
261
+
262
+ {
263
+ std::lock_guard<std::mutex> lock(workListMutex);
264
+ shutdownThreads = true;
265
+ workListCondition.notify_all();
266
+ }
267
+
268
+ for (std::thread &thread : threads) {
269
+ thread.join();
270
+ }
271
+ threads.erase(threads.begin(), threads.end());
272
+ shutdownThreads = false;
273
+ }
DiffVG/parallel.h ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #include "vector.h"
4
+
5
+ #include <mutex>
6
+ #include <condition_variable>
7
+ #include <functional>
8
+ #include <atomic>
9
+ #include <cstdint>
10
+ #include <cassert>
11
+ #include <algorithm>
12
+ // From https://github.com/mmp/pbrt-v3/blob/master/src/core/parallel.h
13
+
14
+ class Barrier {
15
+ public:
16
+ Barrier(int count) : count(count) { assert(count > 0); }
17
+ ~Barrier() { assert(count == 0); }
18
+ void Wait();
19
+
20
+ private:
21
+ std::mutex mutex;
22
+ std::condition_variable cv;
23
+ int count;
24
+ };
25
+
26
+ void parallel_for_host(const std::function<void(int64_t)> &func,
27
+ int64_t count,
28
+ int chunkSize = 1);
29
+ extern thread_local int ThreadIndex;
30
+ void parallel_for_host(
31
+ std::function<void(Vector2i)> func, const Vector2i count);
32
+ int num_system_cores();
33
+
34
+ void parallel_init();
35
+ void parallel_cleanup();
36
+
37
+ #ifdef __CUDACC__
38
+ template <typename T>
39
+ __global__ void parallel_for_device_kernel(T functor, int count) {
40
+ auto idx = threadIdx.x + blockIdx.x * blockDim.x;
41
+ if (idx >= count) {
42
+ return;
43
+ }
44
+ functor(idx);
45
+ }
46
+ template <typename T>
47
+ inline void parallel_for_device(T functor,
48
+ int count,
49
+ int work_per_thread = 256) {
50
+ if (count <= 0) {
51
+ return;
52
+ }
53
+ auto block_size = work_per_thread;
54
+ auto block_count = idiv_ceil(count, block_size);
55
+ parallel_for_device_kernel<T><<<block_count, block_size>>>(functor, count);
56
+ }
57
+ #endif
58
+
59
+ template <typename T>
60
+ inline void parallel_for(T functor,
61
+ int count,
62
+ bool use_gpu,
63
+ int work_per_thread = -1) {
64
+ if (work_per_thread == -1) {
65
+ work_per_thread = use_gpu ? 64 : 256;
66
+ }
67
+ if (count <= 0) {
68
+ return;
69
+ }
70
+ if (use_gpu) {
71
+ #ifdef __CUDACC__
72
+ auto block_size = work_per_thread;
73
+ auto block_count = idiv_ceil(count, block_size);
74
+ parallel_for_device_kernel<T><<<block_count, block_size>>>(functor, count);
75
+ #else
76
+ throw std::runtime_error("diffvg not compiled with GPU");
77
+ assert(false);
78
+ #endif
79
+ } else {
80
+ auto num_threads = idiv_ceil(count, work_per_thread);
81
+ parallel_for_host([&](int thread_index) {
82
+ auto id_offset = work_per_thread * thread_index;
83
+ auto work_end = std::min(id_offset + work_per_thread, count);
84
+ for (int work_id = id_offset; work_id < work_end; work_id++) {
85
+ auto idx = work_id;
86
+ assert(idx < count);
87
+ functor(idx);
88
+ }
89
+ }, num_threads);
90
+ }
91
+ }
DiffVG/pcg.h ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #include "diffvg.h"
4
+
5
+ // http://www.pcg-random.org/download.html
6
+ struct pcg32_state {
7
+ uint64_t state;
8
+ uint64_t inc;
9
+ };
10
+
11
+ DEVICE inline uint32_t next_pcg32(pcg32_state *rng) {
12
+ uint64_t oldstate = rng->state;
13
+ // Advance internal state
14
+ rng->state = oldstate * 6364136223846793005ULL + (rng->inc|1);
15
+ // Calculate output function (XSH RR), uses old state for max ILP
16
+ uint32_t xorshifted = ((oldstate >> 18u) ^ oldstate) >> 27u;
17
+ uint32_t rot = oldstate >> 59u;
18
+ return (xorshifted >> rot) | (xorshifted << ((-rot) & 31));
19
+ }
20
+
21
+ // https://github.com/wjakob/pcg32/blob/master/pcg32.h
22
+ DEVICE inline float next_pcg32_float(pcg32_state *rng) {
23
+ union {
24
+ uint32_t u;
25
+ float f;
26
+ } x;
27
+ x.u = (next_pcg32(rng) >> 9) | 0x3f800000u;
28
+ return x.f - 1.0f;
29
+ }
30
+
31
+ // Initialize each pixel with a PCG rng with a different stream
32
+ DEVICE inline pcg32_state init_pcg32(int idx, uint64_t seed) {
33
+ pcg32_state state;
34
+ state.state = 0U;
35
+ state.inc = (((uint64_t)idx + 1) << 1u) | 1u;
36
+ next_pcg32(&state);
37
+ state.state += (0x853c49e6748fea9bULL + seed);
38
+ next_pcg32(&state);
39
+ return state;
40
+ }
DiffVG/poetry.lock ADDED
The diff for this file is too large to render. See raw diff
 
DiffVG/ptr.h ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #include <cstddef>
4
+
5
+ /**
6
+ * Python doesn't have a pointer type, therefore we create a pointer wrapper
7
+ * see https://stackoverflow.com/questions/48982143/returning-and-passing-around-raw-pod-pointers-arrays-with-python-c-and-pyb?rq=1
8
+ */
9
+ template <typename T>
10
+ class ptr {
11
+ public:
12
+ ptr() : p(nullptr) {}
13
+ ptr(T* p) : p(p) {}
14
+ ptr(std::size_t p) : p((T*)p) {}
15
+ ptr(const ptr& other) : ptr(other.p) {}
16
+ T* operator->() const { return p; }
17
+ T* get() const { return p; }
18
+ void destroy() { delete p; }
19
+ bool is_null() const { return p == nullptr; }
20
+ size_t as_size_t() const {return (size_t)p;}
21
+ private:
22
+ T* p;
23
+ };
DiffVG/pybind11/.appveyor.yml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: 1.0.{build}
2
+ image:
3
+ - Visual Studio 2015
4
+ test: off
5
+ skip_branch_with_pr: true
6
+ build:
7
+ parallel: true
8
+ platform:
9
+ - x86
10
+ environment:
11
+ matrix:
12
+ - PYTHON: 36
13
+ CONFIG: Debug
14
+ - PYTHON: 27
15
+ CONFIG: Debug
16
+ install:
17
+ - ps: |
18
+ $env:CMAKE_GENERATOR = "Visual Studio 14 2015"
19
+ if ($env:PLATFORM -eq "x64") { $env:PYTHON = "$env:PYTHON-x64" }
20
+ $env:PATH = "C:\Python$env:PYTHON\;C:\Python$env:PYTHON\Scripts\;$env:PATH"
21
+ python -W ignore -m pip install --upgrade pip wheel
22
+ python -W ignore -m pip install pytest numpy --no-warn-script-location
23
+ - ps: |
24
+ Start-FileDownload 'https://gitlab.com/libeigen/eigen/-/archive/3.3.7/eigen-3.3.7.zip'
25
+ 7z x eigen-3.3.7.zip -y > $null
26
+ $env:CMAKE_INCLUDE_PATH = "eigen-3.3.7;$env:CMAKE_INCLUDE_PATH"
27
+ build_script:
28
+ - cmake -G "%CMAKE_GENERATOR%" -A "%CMAKE_ARCH%"
29
+ -DCMAKE_CXX_STANDARD=14
30
+ -DPYBIND11_WERROR=ON
31
+ -DDOWNLOAD_CATCH=ON
32
+ -DCMAKE_SUPPRESS_REGENERATION=1
33
+ .
34
+ - set MSBuildLogger="C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll"
35
+ - cmake --build . --config %CONFIG% --target pytest -- /m /v:m /logger:%MSBuildLogger%
36
+ - cmake --build . --config %CONFIG% --target cpptest -- /m /v:m /logger:%MSBuildLogger%
37
+ on_failure: if exist "tests\test_cmake_build" type tests\test_cmake_build\*.log*
DiffVG/pybind11/.cmake-format.yaml ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ parse:
2
+ additional_commands:
3
+ pybind11_add_module:
4
+ flags:
5
+ - THIN_LTO
6
+ - MODULE
7
+ - SHARED
8
+ - NO_EXTRAS
9
+ - EXCLUDE_FROM_ALL
10
+ - SYSTEM
11
+
12
+ format:
13
+ line_width: 99
14
+ tab_size: 2
15
+
16
+ # If an argument group contains more than this many sub-groups
17
+ # (parg or kwarg groups) then force it to a vertical layout.
18
+ max_subgroups_hwrap: 2
19
+
20
+ # If a positional argument group contains more than this many
21
+ # arguments, then force it to a vertical layout.
22
+ max_pargs_hwrap: 6
23
+
24
+ # If a cmdline positional group consumes more than this many
25
+ # lines without nesting, then invalidate the layout (and nest)
26
+ max_rows_cmdline: 2
27
+ separate_ctrl_name_with_space: false
28
+ separate_fn_name_with_space: false
29
+ dangle_parens: false
30
+
31
+ # If the trailing parenthesis must be 'dangled' on its on
32
+ # 'line, then align it to this reference: `prefix`: the start'
33
+ # 'of the statement, `prefix-indent`: the start of the'
34
+ # 'statement, plus one indentation level, `child`: align to'
35
+ # the column of the arguments
36
+ dangle_align: prefix
37
+ # If the statement spelling length (including space and
38
+ # parenthesis) is smaller than this amount, then force reject
39
+ # nested layouts.
40
+ min_prefix_chars: 4
41
+
42
+ # If the statement spelling length (including space and
43
+ # parenthesis) is larger than the tab width by more than this
44
+ # amount, then force reject un-nested layouts.
45
+ max_prefix_chars: 10
46
+
47
+ # If a candidate layout is wrapped horizontally but it exceeds
48
+ # this many lines, then reject the layout.
49
+ max_lines_hwrap: 2
50
+
51
+ line_ending: unix
52
+
53
+ # Format command names consistently as 'lower' or 'upper' case
54
+ command_case: canonical
55
+
56
+ # Format keywords consistently as 'lower' or 'upper' case
57
+ # unchanged is valid too
58
+ keyword_case: 'upper'
59
+
60
+ # A list of command names which should always be wrapped
61
+ always_wrap: []
62
+
63
+ # If true, the argument lists which are known to be sortable
64
+ # will be sorted lexicographically
65
+ enable_sort: true
66
+
67
+ # If true, the parsers may infer whether or not an argument
68
+ # list is sortable (without annotation).
69
+ autosort: false
70
+
71
+ # Causes a few issues - can be solved later, possibly.
72
+ markup:
73
+ enable_markup: false
DiffVG/pybind11/.github/CONTRIBUTING.md ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Thank you for your interest in this project! Please refer to the following
2
+ sections on how to contribute code and bug reports.
3
+
4
+ ### Reporting bugs
5
+
6
+ Before submitting a question or bug report, please take a moment of your time
7
+ and ensure that your issue isn't already discussed in the project documentation
8
+ provided at [pybind11.readthedocs.org][] or in the [issue tracker][]. You can
9
+ also check [gitter][] to see if it came up before.
10
+
11
+ Assuming that you have identified a previously unknown problem or an important
12
+ question, it's essential that you submit a self-contained and minimal piece of
13
+ code that reproduces the problem. In other words: no external dependencies,
14
+ isolate the function(s) that cause breakage, submit matched and complete C++
15
+ and Python snippets that can be easily compiled and run in isolation; or
16
+ ideally make a small PR with a failing test case that can be used as a starting
17
+ point.
18
+
19
+ ## Pull requests
20
+
21
+ Contributions are submitted, reviewed, and accepted using GitHub pull requests.
22
+ Please refer to [this article][using pull requests] for details and adhere to
23
+ the following rules to make the process as smooth as possible:
24
+
25
+ * Make a new branch for every feature you're working on.
26
+ * Make small and clean pull requests that are easy to review but make sure they
27
+ do add value by themselves.
28
+ * Add tests for any new functionality and run the test suite (`cmake --build
29
+ build --target pytest`) to ensure that no existing features break.
30
+ * Please run [`pre-commit`][pre-commit] to check your code matches the
31
+ project style. (Note that `gawk` is required.) Use `pre-commit run
32
+ --all-files` before committing (or use installed-mode, check pre-commit docs)
33
+ to verify your code passes before pushing to save time.
34
+ * This project has a strong focus on providing general solutions using a
35
+ minimal amount of code, thus small pull requests are greatly preferred.
36
+
37
+ ### Licensing of contributions
38
+
39
+ pybind11 is provided under a BSD-style license that can be found in the
40
+ ``LICENSE`` file. By using, distributing, or contributing to this project, you
41
+ agree to the terms and conditions of this license.
42
+
43
+ You are under no obligation whatsoever to provide any bug fixes, patches, or
44
+ upgrades to the features, functionality or performance of the source code
45
+ ("Enhancements") to anyone; however, if you choose to make your Enhancements
46
+ available either publicly, or directly to the author of this software, without
47
+ imposing a separate written license agreement for such Enhancements, then you
48
+ hereby grant the following license: a non-exclusive, royalty-free perpetual
49
+ license to install, use, modify, prepare derivative works, incorporate into
50
+ other computer software, distribute, and sublicense such enhancements or
51
+ derivative works thereof, in binary and source code form.
52
+
53
+
54
+ ## Development of pybind11
55
+
56
+ To setup an ideal development environment, run the following commands on a
57
+ system with CMake 3.14+:
58
+
59
+ ```bash
60
+ python3 -m venv venv
61
+ source venv/bin/activate
62
+ pip install -r tests/requirements.txt
63
+ cmake -S . -B build -DDOWNLOAD_CATCH=ON -DDOWNLOAD_EIGEN=ON
64
+ cmake --build build -j4
65
+ ```
66
+
67
+ Tips:
68
+
69
+ * You can use `virtualenv` (from PyPI) instead of `venv` (which is Python 3
70
+ only).
71
+ * You can select any name for your environment folder; if it contains "env" it
72
+ will be ignored by git.
73
+ * If you don’t have CMake 3.14+, just add “cmake” to the pip install command.
74
+ * You can use `-DPYBIND11_FINDPYTHON=ON` to use FindPython on CMake 3.12+
75
+ * In classic mode, you may need to set `-DPYTHON_EXECUTABLE=/path/to/python`.
76
+ FindPython uses `-DPython_ROOT_DIR=/path/to` or
77
+ `-DPython_EXECUTABLE=/path/to/python`.
78
+
79
+ ### Configuration options
80
+
81
+ In CMake, configuration options are given with “-D”. Options are stored in the
82
+ build directory, in the `CMakeCache.txt` file, so they are remembered for each
83
+ build directory. Two selections are special - the generator, given with `-G`,
84
+ and the compiler, which is selected based on environment variables `CXX` and
85
+ similar, or `-DCMAKE_CXX_COMPILER=`. Unlike the others, these cannot be changed
86
+ after the initial run.
87
+
88
+ The valid options are:
89
+
90
+ * `-DCMAKE_BUILD_TYPE`: Release, Debug, MinSizeRel, RelWithDebInfo
91
+ * `-DPYBIND11_FINDPYTHON=ON`: Use CMake 3.12+’s FindPython instead of the
92
+ classic, deprecated, custom FindPythonLibs
93
+ * `-DPYBIND11_NOPYTHON=ON`: Disable all Python searching (disables tests)
94
+ * `-DBUILD_TESTING=ON`: Enable the tests
95
+ * `-DDOWNLOAD_CATCH=ON`: Download catch to build the C++ tests
96
+ * `-DOWNLOAD_EIGEN=ON`: Download Eigen for the NumPy tests
97
+ * `-DPYBIND11_INSTALL=ON/OFF`: Enable the install target (on by default for the
98
+ master project)
99
+ * `-DUSE_PYTHON_INSTALL_DIR=ON`: Try to install into the python dir
100
+
101
+
102
+ <details><summary>A few standard CMake tricks: (click to expand)</summary><p>
103
+
104
+ * Use `cmake --build build -v` to see the commands used to build the files.
105
+ * Use `cmake build -LH` to list the CMake options with help.
106
+ * Use `ccmake` if available to see a curses (terminal) gui, or `cmake-gui` for
107
+ a completely graphical interface (not present in the PyPI package).
108
+ * Use `cmake --build build -j12` to build with 12 cores (for example).
109
+ * Use `-G` and the name of a generator to use something different. `cmake
110
+ --help` lists the generators available.
111
+ - On Unix, setting `CMAKE_GENERATER=Ninja` in your environment will give
112
+ you automatic mulithreading on all your CMake projects!
113
+ * Open the `CMakeLists.txt` with QtCreator to generate for that IDE.
114
+ * You can use `-DCMAKE_EXPORT_COMPILE_COMMANDS=ON` to generate the `.json` file
115
+ that some tools expect.
116
+
117
+ </p></details>
118
+
119
+
120
+ To run the tests, you can "build" the check target:
121
+
122
+ ```bash
123
+ cmake --build build --target check
124
+ ```
125
+
126
+ `--target` can be spelled `-t` in CMake 3.15+. You can also run individual
127
+ tests with these targets:
128
+
129
+ * `pytest`: Python tests only
130
+ * `cpptest`: C++ tests only
131
+ * `test_cmake_build`: Install / subdirectory tests
132
+
133
+ If you want to build just a subset of tests, use
134
+ `-DPYBIND11_TEST_OVERRIDE="test_callbacks.cpp;test_pickling.cpp"`. If this is
135
+ empty, all tests will be built.
136
+
137
+ ### Formatting
138
+
139
+ All formatting is handled by pre-commit.
140
+
141
+ Install with brew (macOS) or pip (any OS):
142
+
143
+ ```bash
144
+ # Any OS
145
+ python3 -m pip install pre-commit
146
+
147
+ # OR macOS with homebrew:
148
+ brew install pre-commit
149
+ ```
150
+
151
+ Then, you can run it on the items you've added to your staging area, or all
152
+ files:
153
+
154
+ ```bash
155
+ pre-commit run
156
+ # OR
157
+ pre-commit run --all-files
158
+ ```
159
+
160
+ And, if you want to always use it, you can install it as a git hook (hence the
161
+ name, pre-commit):
162
+
163
+ ```bash
164
+ pre-commit install
165
+ ```
166
+
167
+ [pre-commit]: https://pre-commit.com
168
+ [pybind11.readthedocs.org]: http://pybind11.readthedocs.org/en/latest
169
+ [issue tracker]: https://github.com/pybind/pybind11/issues
170
+ [gitter]: https://gitter.im/pybind/Lobby
171
+ [using pull requests]: https://help.github.com/articles/using-pull-requests
DiffVG/pybind11/.github/ISSUE_TEMPLATE/bug-report.md ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ name: Bug Report
3
+ about: File an issue about a bug
4
+ title: "[BUG] "
5
+ ---
6
+
7
+
8
+ Make sure you've completed the following steps before submitting your issue -- thank you!
9
+
10
+ 1. Make sure you've read the [documentation][]. Your issue may be addressed there.
11
+ 2. Search the [issue tracker][] to verify that this hasn't already been reported. +1 or comment there if it has.
12
+ 3. Consider asking first in the [Gitter chat room][].
13
+ 4. Include a self-contained and minimal piece of code that reproduces the problem. If that's not possible, try to make the description as clear as possible.
14
+ a. If possible, make a PR with a new, failing test to give us a starting point to work on!
15
+
16
+ [documentation]: https://pybind11.readthedocs.io
17
+ [issue tracker]: https://github.com/pybind/pybind11/issues
18
+ [Gitter chat room]: https://gitter.im/pybind/Lobby
19
+
20
+ *After reading, remove this checklist and the template text in parentheses below.*
21
+
22
+ ## Issue description
23
+
24
+ (Provide a short description, state the expected behavior and what actually happens.)
25
+
26
+ ## Reproducible example code
27
+
28
+ (The code should be minimal, have no external dependencies, isolate the function(s) that cause breakage. Submit matched and complete C++ and Python snippets that can be easily compiled and run to diagnose the issue.)
DiffVG/pybind11/.github/ISSUE_TEMPLATE/config.yml ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ blank_issues_enabled: false
2
+ contact_links:
3
+ - name: Gitter room
4
+ url: https://gitter.im/pybind/Lobby
5
+ about: A room for discussing pybind11 with an active community
DiffVG/pybind11/.github/ISSUE_TEMPLATE/feature-request.md ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ name: Feature Request
3
+ about: File an issue about adding a feature
4
+ title: "[FEAT] "
5
+ ---
6
+
7
+
8
+ Make sure you've completed the following steps before submitting your issue -- thank you!
9
+
10
+ 1. Check if your feature has already been mentioned / rejected / planned in other issues.
11
+ 2. If those resources didn't help, consider asking in the [Gitter chat room][] to see if this is interesting / useful to a larger audience and possible to implement reasonably,
12
+ 4. If you have a useful feature that passes the previous items (or not suitable for chat), please fill in the details below.
13
+
14
+ [Gitter chat room]: https://gitter.im/pybind/Lobby
15
+
16
+ *After reading, remove this checklist.*
DiffVG/pybind11/.github/ISSUE_TEMPLATE/question.md ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ name: Question
3
+ about: File an issue about unexplained behavior
4
+ title: "[QUESTION] "
5
+ ---
6
+
7
+ If you have a question, please check the following first:
8
+
9
+ 1. Check if your question has already been answered in the [FAQ][] section.
10
+ 2. Make sure you've read the [documentation][]. Your issue may be addressed there.
11
+ 3. If those resources didn't help and you only have a short question (not a bug report), consider asking in the [Gitter chat room][]
12
+ 4. Search the [issue tracker][], including the closed issues, to see if your question has already been asked/answered. +1 or comment if it has been asked but has no answer.
13
+ 5. If you have a more complex question which is not answered in the previous items (or not suitable for chat), please fill in the details below.
14
+ 6. Include a self-contained and minimal piece of code that illustrates your question. If that's not possible, try to make the description as clear as possible.
15
+
16
+ [FAQ]: http://pybind11.readthedocs.io/en/latest/faq.html
17
+ [documentation]: https://pybind11.readthedocs.io
18
+ [issue tracker]: https://github.com/pybind/pybind11/issues
19
+ [Gitter chat room]: https://gitter.im/pybind/Lobby
20
+
21
+ *After reading, remove this checklist.*
DiffVG/pybind11/.github/workflows/ci.yml ADDED
@@ -0,0 +1,359 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: CI
2
+
3
+ on:
4
+ workflow_dispatch:
5
+ pull_request:
6
+ push:
7
+ branches:
8
+ - master
9
+ - stable
10
+ - v*
11
+
12
+ jobs:
13
+ standard:
14
+ strategy:
15
+ matrix:
16
+ runs-on: [ubuntu-latest, windows-latest, macos-latest]
17
+ arch: [x64]
18
+ python:
19
+ - 2.7
20
+ - 3.5
21
+ - 3.8
22
+ - pypy2
23
+ - pypy3
24
+
25
+ include:
26
+ - runs-on: ubuntu-latest
27
+ python: 3.6
28
+ arch: x64
29
+ args: >
30
+ -DPYBIND11_FINDPYTHON=ON
31
+ - runs-on: windows-2016
32
+ python: 3.7
33
+ arch: x86
34
+ args2: >
35
+ -DCMAKE_CXX_FLAGS="/permissive- /EHsc /GR"
36
+ - runs-on: windows-latest
37
+ python: 3.6
38
+ arch: x64
39
+ args: >
40
+ -DPYBIND11_FINDPYTHON=ON
41
+ - runs-on: windows-latest
42
+ python: 3.7
43
+ arch: x64
44
+
45
+ - runs-on: ubuntu-latest
46
+ python: 3.9-dev
47
+ arch: x64
48
+ - runs-on: macos-latest
49
+ python: 3.9-dev
50
+ arch: x64
51
+ args: >
52
+ -DPYBIND11_FINDPYTHON=ON
53
+
54
+ exclude:
55
+ # Currently 32bit only, and we build 64bit
56
+ - runs-on: windows-latest
57
+ python: pypy2
58
+ arch: x64
59
+ - runs-on: windows-latest
60
+ python: pypy3
61
+ arch: x64
62
+
63
+ # Currently broken on embed_test
64
+ - runs-on: windows-latest
65
+ python: 3.8
66
+ arch: x64
67
+ - runs-on: windows-latest
68
+ python: 3.9-dev
69
+ arch: x64
70
+
71
+
72
+ name: "🐍 ${{ matrix.python }} • ${{ matrix.runs-on }} • ${{ matrix.arch }} ${{ matrix.args }}"
73
+ runs-on: ${{ matrix.runs-on }}
74
+ continue-on-error: ${{ endsWith(matrix.python, 'dev') }}
75
+
76
+ steps:
77
+ - uses: actions/checkout@v2
78
+
79
+ - name: Setup Python ${{ matrix.python }}
80
+ uses: actions/setup-python@v2
81
+ with:
82
+ python-version: ${{ matrix.python }}
83
+ architecture: ${{ matrix.arch }}
84
+
85
+ - name: Setup Boost (Windows / Linux latest)
86
+ run: echo "::set-env name=BOOST_ROOT::$BOOST_ROOT_1_72_0"
87
+
88
+ - name: Update CMake
89
+ uses: jwlawson/actions-setup-cmake@v1.3
90
+
91
+ - name: Cache wheels
92
+ if: runner.os == 'macOS'
93
+ uses: actions/cache@v2
94
+ with:
95
+ # This path is specific to macOS - we really only need it for PyPy NumPy wheels
96
+ # See https://github.com/actions/cache/blob/master/examples.md#python---pip
97
+ # for ways to do this more generally
98
+ path: ~/Library/Caches/pip
99
+ # Look to see if there is a cache hit for the corresponding requirements file
100
+ key: ${{ runner.os }}-pip-${{ matrix.python }}-${{ matrix.arch }}-${{ hashFiles('tests/requirements.txt') }}
101
+
102
+ - name: Prepare env
103
+ run: python -m pip install -r tests/requirements.txt --prefer-binary
104
+
105
+ - name: Setup annotations
106
+ run: python -m pip install pytest-github-actions-annotate-failures
107
+
108
+ - name: Configure C++11 ${{ matrix.args }}
109
+ run: >
110
+ cmake -S . -B .
111
+ -DPYBIND11_WERROR=ON
112
+ -DDOWNLOAD_CATCH=ON
113
+ -DDOWNLOAD_EIGEN=ON
114
+ -DCMAKE_CXX_STANDARD=11
115
+ ${{ matrix.args }}
116
+
117
+ - name: Build C++11
118
+ run: cmake --build . -j 2
119
+
120
+ - name: Python tests C++11
121
+ run: cmake --build . --target pytest -j 2
122
+
123
+ - name: C++11 tests
124
+ run: cmake --build . --target cpptest -j 2
125
+
126
+ - name: Interface test C++11
127
+ run: cmake --build . --target test_cmake_build
128
+
129
+ - name: Clean directory
130
+ run: git clean -fdx
131
+
132
+ - name: Configure ${{ matrix.args2 }}
133
+ run: >
134
+ cmake -S . -B build2
135
+ -DPYBIND11_WERROR=ON
136
+ -DDOWNLOAD_CATCH=ON
137
+ -DDOWNLOAD_EIGEN=ON
138
+ -DCMAKE_CXX_STANDARD=17
139
+ ${{ matrix.args }}
140
+ ${{ matrix.args2 }}
141
+
142
+ - name: Build
143
+ run: cmake --build build2 -j 2
144
+
145
+ - name: Python tests
146
+ run: cmake --build build2 --target pytest
147
+
148
+ - name: C++ tests
149
+ run: cmake --build build2 --target cpptest
150
+
151
+ - name: Interface test
152
+ run: cmake --build build2 --target test_cmake_build
153
+
154
+ clang:
155
+ runs-on: ubuntu-latest
156
+ strategy:
157
+ fail-fast: false
158
+ matrix:
159
+ clang:
160
+ - 3.6
161
+ - 3.7
162
+ - 3.9
163
+ - 5
164
+ - 7
165
+ - 9
166
+ - dev
167
+
168
+ name: "🐍 3 • Clang ${{ matrix.clang }} • x64"
169
+ container: "silkeh/clang:${{ matrix.clang }}"
170
+
171
+ steps:
172
+ - uses: actions/checkout@v2
173
+
174
+ - name: Add wget and python3
175
+ run: apt-get update && apt-get install -y python3-dev python3-numpy python3-pytest libeigen3-dev
176
+
177
+ - name: Configure
178
+ shell: bash
179
+ run: >
180
+ cmake -S . -B build
181
+ -DPYBIND11_WERROR=ON
182
+ -DDOWNLOAD_CATCH=ON
183
+ -DPYTHON_EXECUTABLE=$(python3 -c "import sys; print(sys.executable)")
184
+
185
+ - name: Build
186
+ run: cmake --build build -j 2
187
+
188
+ - name: Python tests
189
+ run: cmake --build build --target pytest
190
+
191
+ - name: C++ tests
192
+ run: cmake --build build --target cpptest
193
+
194
+ - name: Interface test
195
+ run: cmake --build build --target test_cmake_build
196
+
197
+ gcc:
198
+ runs-on: ubuntu-latest
199
+ strategy:
200
+ fail-fast: false
201
+ matrix:
202
+ gcc:
203
+ - 7
204
+ - latest
205
+
206
+ name: "🐍 3 • GCC ${{ matrix.gcc }} • x64"
207
+ container: "gcc:${{ matrix.gcc }}"
208
+
209
+ steps:
210
+ - uses: actions/checkout@v1
211
+
212
+ - name: Add Python 3
213
+ run: apt-get update; apt-get install -y python3-dev python3-numpy python3-pytest python3-pip libeigen3-dev
214
+
215
+ - name: Update pip
216
+ run: python3 -m pip install --upgrade pip
217
+
218
+ - name: Setup CMake 3.18
219
+ uses: jwlawson/actions-setup-cmake@v1.3
220
+ with:
221
+ cmake-version: 3.18
222
+
223
+ - name: Configure
224
+ shell: bash
225
+ run: >
226
+ cmake -S . -B build
227
+ -DPYBIND11_WERROR=ON
228
+ -DDOWNLOAD_CATCH=ON
229
+ -DCMAKE_CXX_STANDARD=11
230
+ -DPYTHON_EXECUTABLE=$(python3 -c "import sys; print(sys.executable)")
231
+
232
+ - name: Build
233
+ run: cmake --build build -j 2
234
+
235
+ - name: Python tests
236
+ run: cmake --build build --target pytest
237
+
238
+ - name: C++ tests
239
+ run: cmake --build build --target cpptest
240
+
241
+ - name: Interface test
242
+ run: cmake --build build --target test_cmake_build
243
+
244
+ centos:
245
+ runs-on: ubuntu-latest
246
+ strategy:
247
+ fail-fast: false
248
+ matrix:
249
+ centos:
250
+ - 7 # GCC 4.8
251
+ - 8
252
+
253
+ name: "🐍 3 • CentOS ${{ matrix.centos }} • x64"
254
+ container: "centos:${{ matrix.centos }}"
255
+
256
+ steps:
257
+ - uses: actions/checkout@v2
258
+
259
+ - name: Add Python 3
260
+ run: yum update -y && yum install -y python3-devel gcc-c++ make git
261
+
262
+ - name: Update pip
263
+ run: python3 -m pip install --upgrade pip
264
+
265
+ - name: Install dependencies
266
+ run: python3 -m pip install cmake -r tests/requirements.txt --prefer-binary
267
+
268
+ - name: Configure
269
+ shell: bash
270
+ run: >
271
+ cmake -S . -B build
272
+ -DPYBIND11_WERROR=ON
273
+ -DDOWNLOAD_CATCH=ON
274
+ -DDOWNLOAD_EIGEN=ON
275
+ -DCMAKE_CXX_STANDARD=11
276
+ -DPYTHON_EXECUTABLE=$(python3 -c "import sys; print(sys.executable)")
277
+
278
+ - name: Build
279
+ run: cmake --build build -j 2
280
+
281
+ - name: Python tests
282
+ run: cmake --build build --target pytest
283
+
284
+ - name: C++ tests
285
+ run: cmake --build build --target cpptest
286
+
287
+ - name: Interface test
288
+ run: cmake --build build --target test_cmake_build
289
+
290
+ install-classic:
291
+ name: "🐍 3.5 • Debian • x86 • Install"
292
+ runs-on: ubuntu-latest
293
+ container: i386/debian:stretch
294
+
295
+ steps:
296
+ - uses: actions/checkout@v1
297
+
298
+ - name: Install requirements
299
+ run: |
300
+ apt-get update
301
+ apt-get install -y git make cmake g++ libeigen3-dev python3-dev python3-pip
302
+ pip3 install "pytest==3.1.*"
303
+
304
+ - name: Configure for install
305
+ run: >
306
+ cmake .
307
+ -DPYBIND11_INSTALL=1 -DPYBIND11_TEST=0
308
+ -DPYTHON_EXECUTABLE=$(python3 -c "import sys; print(sys.executable)")
309
+
310
+ - name: Make and install
311
+ run: make install
312
+
313
+ - name: Copy tests to new directory
314
+ run: cp -a tests /pybind11-tests
315
+
316
+ - name: Make a new test directory
317
+ run: mkdir /build-tests
318
+
319
+ - name: Configure tests
320
+ run: >
321
+ cmake ../pybind11-tests
322
+ -DDOWNLOAD_CATCH=ON
323
+ -DPYBIND11_WERROR=ON
324
+ -DPYTHON_EXECUTABLE=$(python3 -c "import sys; print(sys.executable)")
325
+ working-directory: /build-tests
326
+
327
+ - name: Run tests
328
+ run: make pytest -j 2
329
+ working-directory: /build-tests
330
+
331
+
332
+ doxygen:
333
+ name: "Documentation build test"
334
+ runs-on: ubuntu-latest
335
+ container: alpine:3.12
336
+
337
+ steps:
338
+ - uses: actions/checkout@v2
339
+
340
+ - name: Install system requirements
341
+ run: apk add doxygen python3-dev
342
+
343
+ - name: Ensure pip
344
+ run: python3 -m ensurepip
345
+
346
+ - name: Install docs & setup requirements
347
+ run: python3 -m pip install -r docs/requirements.txt pytest setuptools
348
+
349
+ - name: Build docs
350
+ run: python3 -m sphinx -W -b html docs docs/.build
351
+
352
+ - name: Make SDist
353
+ run: python3 setup.py sdist
354
+
355
+ - name: Compare Dists (headers only)
356
+ run: |
357
+ python3 -m pip install --user -U ./dist/*
358
+ installed=$(python3 -c "import pybind11; print(pybind11.get_include(True) + '/pybind11')")
359
+ diff -rq $installed ./include/pybind11
DiffVG/pybind11/.github/workflows/configure.yml ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Config
2
+
3
+ on:
4
+ workflow_dispatch:
5
+ pull_request:
6
+ push:
7
+ branches:
8
+ - master
9
+ - stable
10
+ - v*
11
+
12
+ jobs:
13
+ cmake:
14
+ strategy:
15
+ fail-fast: false
16
+ matrix:
17
+ runs-on: [ubuntu-latest, macos-latest, windows-latest]
18
+ arch: [x64]
19
+ cmake: [3.18]
20
+
21
+ include:
22
+ - runs-on: ubuntu-latest
23
+ arch: x64
24
+ cmake: 3.4
25
+
26
+ - runs-on: macos-latest
27
+ arch: x64
28
+ cmake: 3.7
29
+
30
+ - runs-on: windows-2016
31
+ arch: x86
32
+ cmake: 3.8
33
+
34
+ - runs-on: windows-2016
35
+ arch: x86
36
+ cmake: 3.18
37
+
38
+ name: 🐍 3.7 • CMake ${{ matrix.cmake }} • ${{ matrix.runs-on }}
39
+ runs-on: ${{ matrix.runs-on }}
40
+
41
+ steps:
42
+ - uses: actions/checkout@v2
43
+
44
+ - name: Setup Python 3.7
45
+ uses: actions/setup-python@v2
46
+ with:
47
+ python-version: 3.7
48
+ architecture: ${{ matrix.arch }}
49
+
50
+ - name: Prepare env
51
+ run: python -m pip install -r tests/requirements.txt
52
+
53
+ - name: Setup CMake ${{ matrix.cmake }}
54
+ uses: jwlawson/actions-setup-cmake@v1.3
55
+ with:
56
+ cmake-version: ${{ matrix.cmake }}
57
+
58
+ - name: Make build directories
59
+ run: mkdir "build dir"
60
+
61
+ - name: Configure
62
+ working-directory: build dir
63
+ shell: bash
64
+ run: >
65
+ cmake ..
66
+ -DPYBIND11_WERROR=ON
67
+ -DDOWNLOAD_CATCH=ON
68
+ -DPYTHON_EXECUTABLE=$(python -c "import sys; print(sys.executable)")
69
+
70
+ - name: Build
71
+ working-directory: build dir
72
+ if: github.event_name == 'workflow_dispatch'
73
+ run: cmake --build . --config Release
74
+
75
+ - name: Test
76
+ working-directory: build dir
77
+ if: github.event_name == 'workflow_dispatch'
78
+ run: cmake --build . --config Release --target check
DiffVG/pybind11/.github/workflows/format.yml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Format
2
+
3
+ on:
4
+ workflow_dispatch:
5
+ pull_request:
6
+ push:
7
+ branches:
8
+ - master
9
+ - stable
10
+ - "v*"
11
+
12
+ jobs:
13
+ pre-commit:
14
+ name: Format
15
+ runs-on: ubuntu-latest
16
+ steps:
17
+ - uses: actions/checkout@v2
18
+ - uses: actions/setup-python@v2
19
+ - uses: pre-commit/action@v2.0.0
DiffVG/pybind11/.gitignore ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CMakeCache.txt
2
+ CMakeFiles
3
+ Makefile
4
+ cmake_install.cmake
5
+ cmake_uninstall.cmake
6
+ .DS_Store
7
+ *.so
8
+ *.pyd
9
+ *.dll
10
+ *.sln
11
+ *.sdf
12
+ *.opensdf
13
+ *.vcxproj
14
+ *.vcxproj.user
15
+ *.filters
16
+ example.dir
17
+ Win32
18
+ x64
19
+ Release
20
+ Debug
21
+ .vs
22
+ CTestTestfile.cmake
23
+ Testing
24
+ autogen
25
+ MANIFEST
26
+ /.ninja_*
27
+ /*.ninja
28
+ /docs/.build
29
+ *.py[co]
30
+ *.egg-info
31
+ *~
32
+ .*.swp
33
+ .DS_Store
34
+ /dist
35
+ /*build*
36
+ .cache/
37
+ sosize-*.txt
38
+ pybind11Config*.cmake
39
+ pybind11Targets.cmake
40
+ /*env*
41
+ /.vscode
DiffVG/pybind11/.gitmodules ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [submodule "tools/clang"]
2
+ path = tools/clang
3
+ url = ../../wjakob/clang-cindex-python3.git
DiffVG/pybind11/.pre-commit-config.yaml ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ repos:
2
+ - repo: https://github.com/pre-commit/pre-commit-hooks
3
+ rev: v3.1.0
4
+ hooks:
5
+ - id: check-added-large-files
6
+ - id: check-case-conflict
7
+ - id: check-merge-conflict
8
+ - id: check-symlinks
9
+ - id: check-yaml
10
+ - id: debug-statements
11
+ - id: end-of-file-fixer
12
+ - id: mixed-line-ending
13
+ - id: requirements-txt-fixer
14
+ - id: trailing-whitespace
15
+ - id: fix-encoding-pragma
16
+
17
+ - repo: https://github.com/Lucas-C/pre-commit-hooks
18
+ rev: v1.1.9
19
+ hooks:
20
+ - id: remove-tabs
21
+
22
+ - repo: https://gitlab.com/pycqa/flake8
23
+ rev: 3.8.3
24
+ hooks:
25
+ - id: flake8
26
+ additional_dependencies: [flake8-bugbear, pep8-naming]
27
+ exclude: ^(docs/.*|tools/.*)$
28
+
29
+ - repo: https://github.com/cheshirekow/cmake-format-precommit
30
+ rev: v0.6.11
31
+ hooks:
32
+ - id: cmake-format
33
+ additional_dependencies: [pyyaml]
34
+ types: [file]
35
+ files: (\.cmake|CMakeLists.txt)(.in)?$
36
+
37
+ - repo: local
38
+ hooks:
39
+ - id: check-style
40
+ name: Classic check-style
41
+ language: system
42
+ types:
43
+ - c++
44
+ entry: ./tools/check-style.sh
DiffVG/pybind11/.readthedocs.yml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ python:
2
+ version: 3
3
+ requirements_file: docs/requirements.txt
DiffVG/pybind11/CMakeLists.txt ADDED
@@ -0,0 +1,271 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CMakeLists.txt -- Build system for the pybind11 modules
2
+ #
3
+ # Copyright (c) 2015 Wenzel Jakob <wenzel@inf.ethz.ch>
4
+ #
5
+ # All rights reserved. Use of this source code is governed by a
6
+ # BSD-style license that can be found in the LICENSE file.
7
+
8
+ cmake_minimum_required(VERSION 3.4)
9
+
10
+ # The `cmake_minimum_required(VERSION 3.4...3.18)` syntax does not work with
11
+ # some versions of VS that have a patched CMake 3.11. This forces us to emulate
12
+ # the behavior using the following workaround:
13
+ if(${CMAKE_VERSION} VERSION_LESS 3.18)
14
+ cmake_policy(VERSION ${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION})
15
+ else()
16
+ cmake_policy(VERSION 3.18)
17
+ endif()
18
+
19
+ # Extract project version from source
20
+ file(STRINGS "${CMAKE_CURRENT_SOURCE_DIR}/include/pybind11/detail/common.h"
21
+ pybind11_version_defines REGEX "#define PYBIND11_VERSION_(MAJOR|MINOR|PATCH) ")
22
+
23
+ foreach(ver ${pybind11_version_defines})
24
+ if(ver MATCHES [[#define PYBIND11_VERSION_(MAJOR|MINOR|PATCH) +([^ ]+)$]])
25
+ set(PYBIND11_VERSION_${CMAKE_MATCH_1} "${CMAKE_MATCH_2}")
26
+ endif()
27
+ endforeach()
28
+
29
+ if(PYBIND11_VERSION_PATCH MATCHES [[([a-zA-Z]+)]])
30
+ set(pybind11_VERSION_TYPE "${CMAKE_MATCH_1}")
31
+ endif()
32
+ string(REGEX MATCH "[0-9]+" PYBIND11_VERSION_PATCH "${PYBIND11_VERSION_PATCH}")
33
+
34
+ project(
35
+ pybind11
36
+ LANGUAGES CXX
37
+ VERSION "${PYBIND11_VERSION_MAJOR}.${PYBIND11_VERSION_MINOR}.${PYBIND11_VERSION_PATCH}")
38
+
39
+ # Standard includes
40
+ include(GNUInstallDirs)
41
+ include(CMakePackageConfigHelpers)
42
+ include(CMakeDependentOption)
43
+
44
+ if(NOT pybind11_FIND_QUIETLY)
45
+ message(STATUS "pybind11 v${pybind11_VERSION} ${pybind11_VERSION_TYPE}")
46
+ endif()
47
+
48
+ # Check if pybind11 is being used directly or via add_subdirectory
49
+ if(CMAKE_SOURCE_DIR STREQUAL PROJECT_SOURCE_DIR)
50
+ ### Warn if not an out-of-source builds
51
+ if(CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_CURRENT_BINARY_DIR)
52
+ set(lines
53
+ "You are building in-place. If that is not what you intended to "
54
+ "do, you can clean the source directory with:\n"
55
+ "rm -r CMakeCache.txt CMakeFiles/ cmake_uninstall.cmake pybind11Config.cmake "
56
+ "pybind11ConfigVersion.cmake tests/CMakeFiles/\n")
57
+ message(AUTHOR_WARNING ${lines})
58
+ endif()
59
+
60
+ set(PYBIND11_MASTER_PROJECT ON)
61
+
62
+ if(OSX AND CMAKE_VERSION VERSION_LESS 3.7)
63
+ # Bug in macOS CMake < 3.7 is unable to download catch
64
+ message(WARNING "CMAKE 3.7+ needed on macOS to download catch, and newer HIGHLY recommended")
65
+ elseif(WINDOWS AND CMAKE_VERSION VERSION_LESS 3.8)
66
+ # Only tested with 3.8+ in CI.
67
+ message(WARNING "CMAKE 3.8+ tested on Windows, previous versions untested")
68
+ endif()
69
+
70
+ message(STATUS "CMake ${CMAKE_VERSION}")
71
+
72
+ if(CMAKE_CXX_STANDARD)
73
+ set(CMAKE_CXX_EXTENSIONS OFF)
74
+ set(CMAKE_CXX_STANDARD_REQUIRED ON)
75
+ endif()
76
+ else()
77
+ set(PYBIND11_MASTER_PROJECT OFF)
78
+ set(pybind11_system SYSTEM)
79
+ endif()
80
+
81
+ # Options
82
+ option(PYBIND11_INSTALL "Install pybind11 header files?" ${PYBIND11_MASTER_PROJECT})
83
+ option(PYBIND11_TEST "Build pybind11 test suite?" ${PYBIND11_MASTER_PROJECT})
84
+ option(PYBIND11_NOPYTHON "Disable search for Python" OFF)
85
+
86
+ cmake_dependent_option(
87
+ USE_PYTHON_INCLUDE_DIR
88
+ "Install pybind11 headers in Python include directory instead of default installation prefix"
89
+ OFF "PYBIND11_INSTALL" OFF)
90
+
91
+ cmake_dependent_option(PYBIND11_FINDPYTHON "Force new FindPython" OFF
92
+ "NOT CMAKE_VERSION VERSION_LESS 3.12" OFF)
93
+
94
+ # NB: when adding a header don't forget to also add it to setup.py
95
+ set(PYBIND11_HEADERS
96
+ include/pybind11/detail/class.h
97
+ include/pybind11/detail/common.h
98
+ include/pybind11/detail/descr.h
99
+ include/pybind11/detail/init.h
100
+ include/pybind11/detail/internals.h
101
+ include/pybind11/detail/typeid.h
102
+ include/pybind11/attr.h
103
+ include/pybind11/buffer_info.h
104
+ include/pybind11/cast.h
105
+ include/pybind11/chrono.h
106
+ include/pybind11/common.h
107
+ include/pybind11/complex.h
108
+ include/pybind11/options.h
109
+ include/pybind11/eigen.h
110
+ include/pybind11/embed.h
111
+ include/pybind11/eval.h
112
+ include/pybind11/iostream.h
113
+ include/pybind11/functional.h
114
+ include/pybind11/numpy.h
115
+ include/pybind11/operators.h
116
+ include/pybind11/pybind11.h
117
+ include/pybind11/pytypes.h
118
+ include/pybind11/stl.h
119
+ include/pybind11/stl_bind.h)
120
+
121
+ # Compare with grep and warn if mismatched
122
+ if(PYBIND11_MASTER_PROJECT AND NOT CMAKE_VERSION VERSION_LESS 3.12)
123
+ file(
124
+ GLOB_RECURSE _pybind11_header_check
125
+ LIST_DIRECTORIES false
126
+ RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}"
127
+ CONFIGURE_DEPENDS "include/pybind11/*.h")
128
+ set(_pybind11_here_only ${PYBIND11_HEADERS})
129
+ set(_pybind11_disk_only ${_pybind11_header_check})
130
+ list(REMOVE_ITEM _pybind11_here_only ${_pybind11_header_check})
131
+ list(REMOVE_ITEM _pybind11_disk_only ${PYBIND11_HEADERS})
132
+ if(_pybind11_here_only)
133
+ message(AUTHOR_WARNING "PYBIND11_HEADERS has extra files:" ${_pybind11_here_only})
134
+ endif()
135
+ if(_pybind11_disk_only)
136
+ message(AUTHOR_WARNING "PYBIND11_HEADERS is missing files:" ${_pybind11_disk_only})
137
+ endif()
138
+ endif()
139
+
140
+ # CMake 3.12 added list(TRANSFORM <list> PREPEND
141
+ # But we can't use it yet
142
+ string(REPLACE "include/" "${CMAKE_CURRENT_SOURCE_DIR}/include/" PYBIND11_HEADERS
143
+ "${PYBIND11_HEADERS}")
144
+
145
+ # Cache variables so pybind11_add_module can be used in parent projects
146
+ set(PYBIND11_INCLUDE_DIR
147
+ "${CMAKE_CURRENT_LIST_DIR}/include"
148
+ CACHE INTERNAL "")
149
+
150
+ # Note: when creating targets, you cannot use if statements at configure time -
151
+ # you need generator expressions, because those will be placed in the target file.
152
+ # You can also place ifs *in* the Config.in, but not here.
153
+
154
+ # This section builds targets, but does *not* touch Python
155
+
156
+ # Build the headers-only target (no Python included):
157
+ # (long name used here to keep this from clashing in subdirectory mode)
158
+ add_library(pybind11_headers INTERFACE)
159
+ add_library(pybind11::pybind11_headers ALIAS pybind11_headers) # to match exported target
160
+ add_library(pybind11::headers ALIAS pybind11_headers) # easier to use/remember
161
+
162
+ include("${CMAKE_CURRENT_SOURCE_DIR}/tools/pybind11Common.cmake")
163
+
164
+ if(NOT PYBIND11_MASTER_PROJECT AND NOT pybind11_FIND_QUIETLY)
165
+ message(STATUS "Using pybind11: (version \"${pybind11_VERSION}\" ${pybind11_VERSION_TYPE})")
166
+ endif()
167
+
168
+ # Relative directory setting
169
+ if(USE_PYTHON_INCLUDE_DIR AND DEFINED Python_INCLUDE_DIRS)
170
+ file(RELATIVE_PATH CMAKE_INSTALL_INCLUDEDIR ${CMAKE_INSTALL_PREFIX} ${Python_INCLUDE_DIRS})
171
+ elseif(USE_PYTHON_INCLUDE_DIR AND DEFINED PYTHON_INCLUDE_DIR)
172
+ file(RELATIVE_PATH CMAKE_INSTALL_INCLUDEDIR ${CMAKE_INSTALL_PREFIX} ${PYTHON_INCLUDE_DIRS})
173
+ endif()
174
+
175
+ # Fill in headers target
176
+ target_include_directories(
177
+ pybind11_headers ${pybind11_system} INTERFACE $<BUILD_INTERFACE:${PYBIND11_INCLUDE_DIR}>
178
+ $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
179
+
180
+ target_compile_features(pybind11_headers INTERFACE cxx_inheriting_constructors cxx_user_literals
181
+ cxx_right_angle_brackets)
182
+
183
+ if(PYBIND11_INSTALL)
184
+ install(DIRECTORY ${PYBIND11_INCLUDE_DIR}/pybind11 DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
185
+ # GNUInstallDirs "DATADIR" wrong here; CMake search path wants "share".
186
+ set(PYBIND11_CMAKECONFIG_INSTALL_DIR
187
+ "share/cmake/${PROJECT_NAME}"
188
+ CACHE STRING "install path for pybind11Config.cmake")
189
+
190
+ configure_package_config_file(
191
+ tools/${PROJECT_NAME}Config.cmake.in "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake"
192
+ INSTALL_DESTINATION ${PYBIND11_CMAKECONFIG_INSTALL_DIR})
193
+
194
+ if(CMAKE_VERSION VERSION_LESS 3.14)
195
+ # Remove CMAKE_SIZEOF_VOID_P from ConfigVersion.cmake since the library does
196
+ # not depend on architecture specific settings or libraries.
197
+ set(_PYBIND11_CMAKE_SIZEOF_VOID_P ${CMAKE_SIZEOF_VOID_P})
198
+ unset(CMAKE_SIZEOF_VOID_P)
199
+
200
+ write_basic_package_version_file(
201
+ ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake
202
+ VERSION ${PROJECT_VERSION}
203
+ COMPATIBILITY AnyNewerVersion)
204
+
205
+ set(CMAKE_SIZEOF_VOID_P ${_PYBIND11_CMAKE_SIZEOF_VOID_P})
206
+ else()
207
+ # CMake 3.14+ natively supports header-only libraries
208
+ write_basic_package_version_file(
209
+ ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake
210
+ VERSION ${PROJECT_VERSION}
211
+ COMPATIBILITY AnyNewerVersion ARCH_INDEPENDENT)
212
+ endif()
213
+
214
+ install(
215
+ FILES ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake
216
+ ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake
217
+ tools/FindPythonLibsNew.cmake
218
+ tools/pybind11Common.cmake
219
+ tools/pybind11Tools.cmake
220
+ tools/pybind11NewTools.cmake
221
+ DESTINATION ${PYBIND11_CMAKECONFIG_INSTALL_DIR})
222
+
223
+ if(NOT PYBIND11_EXPORT_NAME)
224
+ set(PYBIND11_EXPORT_NAME "${PROJECT_NAME}Targets")
225
+ endif()
226
+
227
+ install(TARGETS pybind11_headers EXPORT "${PYBIND11_EXPORT_NAME}")
228
+
229
+ install(
230
+ EXPORT "${PYBIND11_EXPORT_NAME}"
231
+ NAMESPACE "pybind11::"
232
+ DESTINATION ${PYBIND11_CMAKECONFIG_INSTALL_DIR})
233
+
234
+ # Uninstall target
235
+ if(PYBIND11_MASTER_PROJECT)
236
+ configure_file("${CMAKE_CURRENT_SOURCE_DIR}/tools/cmake_uninstall.cmake.in"
237
+ "${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake" IMMEDIATE @ONLY)
238
+
239
+ add_custom_target(uninstall COMMAND ${CMAKE_COMMAND} -P
240
+ ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake)
241
+ endif()
242
+ endif()
243
+
244
+ # BUILD_TESTING takes priority, but only if this is the master project
245
+ if(PYBIND11_MASTER_PROJECT AND DEFINED BUILD_TESTING)
246
+ if(BUILD_TESTING)
247
+ if(_pybind11_nopython)
248
+ message(FATAL_ERROR "Cannot activate tests in NOPYTHON mode")
249
+ else()
250
+ add_subdirectory(tests)
251
+ endif()
252
+ endif()
253
+ else()
254
+ if(PYBIND11_TEST)
255
+ if(_pybind11_nopython)
256
+ message(FATAL_ERROR "Cannot activate tests in NOPYTHON mode")
257
+ else()
258
+ add_subdirectory(tests)
259
+ endif()
260
+ endif()
261
+ endif()
262
+
263
+ # Better symmetry with find_package(pybind11 CONFIG) mode.
264
+ if(NOT PYBIND11_MASTER_PROJECT)
265
+ set(pybind11_FOUND
266
+ TRUE
267
+ CACHE INTERNAL "true if pybind11 and all required components found on the system")
268
+ set(pybind11_INCLUDE_DIR
269
+ "${PYBIND11_INCLUDE_DIR}"
270
+ CACHE INTERNAL "Directory where pybind11 headers are located")
271
+ endif()
DiffVG/pybind11/LICENSE ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Copyright (c) 2016 Wenzel Jakob <wenzel.jakob@epfl.ch>, All rights reserved.
2
+
3
+ Redistribution and use in source and binary forms, with or without
4
+ modification, are permitted provided that the following conditions are met:
5
+
6
+ 1. Redistributions of source code must retain the above copyright notice, this
7
+ list of conditions and the following disclaimer.
8
+
9
+ 2. Redistributions in binary form must reproduce the above copyright notice,
10
+ this list of conditions and the following disclaimer in the documentation
11
+ and/or other materials provided with the distribution.
12
+
13
+ 3. Neither the name of the copyright holder nor the names of its contributors
14
+ may be used to endorse or promote products derived from this software
15
+ without specific prior written permission.
16
+
17
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
21
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
+
28
+ Please also refer to the file .github/CONTRIBUTING.md, which clarifies licensing of
29
+ external contributions to this project including patches, pull requests, etc.
DiffVG/pybind11/MANIFEST.in ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ recursive-include include/pybind11 *.h
2
+ include LICENSE README.md .github/CONTRIBUTING.md
DiffVG/pybind11/README.md ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ![pybind11 logo](https://github.com/pybind/pybind11/raw/master/docs/pybind11-logo.png)
2
+
3
+ # pybind11 — Seamless operability between C++11 and Python
4
+
5
+ [![Documentation Status](https://readthedocs.org/projects/pybind11/badge/?version=master)](http://pybind11.readthedocs.org/en/master/?badge=master)
6
+ [![Documentation Status](https://readthedocs.org/projects/pybind11/badge/?version=stable)](http://pybind11.readthedocs.org/en/stable/?badge=stable)
7
+ [![Gitter chat](https://img.shields.io/gitter/room/gitterHQ/gitter.svg)](https://gitter.im/pybind/Lobby)
8
+ [![CI](https://github.com/pybind/pybind11/workflows/CI/badge.svg)](https://github.com/pybind/pybind11/actions)
9
+ [![Build status](https://ci.appveyor.com/api/projects/status/riaj54pn4h08xy40?svg=true)](https://ci.appveyor.com/project/wjakob/pybind11)
10
+
11
+ **pybind11** is a lightweight header-only library that exposes C++ types in
12
+ Python and vice versa, mainly to create Python bindings of existing C++ code.
13
+ Its goals and syntax are similar to the excellent [Boost.Python][] library by
14
+ David Abrahams: to minimize boilerplate code in traditional extension modules
15
+ by inferring type information using compile-time introspection.
16
+
17
+ The main issue with Boost.Python—and the reason for creating such a similar
18
+ project—is Boost. Boost is an enormously large and complex suite of utility
19
+ libraries that works with almost every C++ compiler in existence. This
20
+ compatibility has its cost: arcane template tricks and workarounds are
21
+ necessary to support the oldest and buggiest of compiler specimens. Now that
22
+ C++11-compatible compilers are widely available, this heavy machinery has
23
+ become an excessively large and unnecessary dependency.
24
+
25
+ Think of this library as a tiny self-contained version of Boost.Python with
26
+ everything stripped away that isn't relevant for binding generation. Without
27
+ comments, the core header files only require ~4K lines of code and depend on
28
+ Python (2.7 or 3.5+, or PyPy) and the C++ standard library. This compact
29
+ implementation was possible thanks to some of the new C++11 language features
30
+ (specifically: tuples, lambda functions and variadic templates). Since its
31
+ creation, this library has grown beyond Boost.Python in many ways, leading to
32
+ dramatically simpler binding code in many common situations.
33
+
34
+ Tutorial and reference documentation is provided at
35
+ [pybind11.readthedocs.org][]. A PDF version of the manual is available
36
+ [here][docs-pdf].
37
+
38
+ ## Core features
39
+ pybind11 can map the following core C++ features to Python:
40
+
41
+ - Functions accepting and returning custom data structures per value, reference, or pointer
42
+ - Instance methods and static methods
43
+ - Overloaded functions
44
+ - Instance attributes and static attributes
45
+ - Arbitrary exception types
46
+ - Enumerations
47
+ - Callbacks
48
+ - Iterators and ranges
49
+ - Custom operators
50
+ - Single and multiple inheritance
51
+ - STL data structures
52
+ - Smart pointers with reference counting like `std::shared_ptr`
53
+ - Internal references with correct reference counting
54
+ - C++ classes with virtual (and pure virtual) methods can be extended in Python
55
+
56
+ ## Goodies
57
+ In addition to the core functionality, pybind11 provides some extra goodies:
58
+
59
+ - Python 2.7, 3.5+, and PyPy (tested on 7.3) are supported with an implementation-agnostic
60
+ interface.
61
+
62
+ - It is possible to bind C++11 lambda functions with captured variables. The
63
+ lambda capture data is stored inside the resulting Python function object.
64
+
65
+ - pybind11 uses C++11 move constructors and move assignment operators whenever
66
+ possible to efficiently transfer custom data types.
67
+
68
+ - It's easy to expose the internal storage of custom data types through
69
+ Pythons' buffer protocols. This is handy e.g. for fast conversion between
70
+ C++ matrix classes like Eigen and NumPy without expensive copy operations.
71
+
72
+ - pybind11 can automatically vectorize functions so that they are transparently
73
+ applied to all entries of one or more NumPy array arguments.
74
+
75
+ - Python's slice-based access and assignment operations can be supported with
76
+ just a few lines of code.
77
+
78
+ - Everything is contained in just a few header files; there is no need to link
79
+ against any additional libraries.
80
+
81
+ - Binaries are generally smaller by a factor of at least 2 compared to
82
+ equivalent bindings generated by Boost.Python. A recent pybind11 conversion
83
+ of PyRosetta, an enormous Boost.Python binding project,
84
+ [reported][pyrosetta-report] a binary size reduction of **5.4x** and compile
85
+ time reduction by **5.8x**.
86
+
87
+ - Function signatures are precomputed at compile time (using `constexpr`),
88
+ leading to smaller binaries.
89
+
90
+ - With little extra effort, C++ types can be pickled and unpickled similar to
91
+ regular Python objects.
92
+
93
+ ## Supported compilers
94
+
95
+ 1. Clang/LLVM 3.3 or newer (for Apple Xcode's clang, this is 5.0.0 or newer)
96
+ 2. GCC 4.8 or newer
97
+ 3. Microsoft Visual Studio 2015 Update 3 or newer
98
+ 4. Intel C++ compiler 17 or newer (16 with pybind11 v2.0 and 15 with pybind11
99
+ v2.0 and a [workaround][intel-15-workaround])
100
+ 5. Cygwin/GCC (tested on 2.5.1)
101
+
102
+ ## About
103
+
104
+ This project was created by [Wenzel Jakob](http://rgl.epfl.ch/people/wjakob).
105
+ Significant features and/or improvements to the code were contributed by
106
+ Jonas Adler,
107
+ Lori A. Burns,
108
+ Sylvain Corlay,
109
+ Trent Houliston,
110
+ Axel Huebl,
111
+ @hulucc,
112
+ Sergey Lyskov
113
+ Johan Mabille,
114
+ Tomasz Miąsko,
115
+ Dean Moldovan,
116
+ Ben Pritchard,
117
+ Jason Rhinelander,
118
+ Boris Schäling,
119
+ Pim Schellart,
120
+ Henry Schreiner,
121
+ Ivan Smirnov, and
122
+ Patrick Stewart.
123
+
124
+ ### Contributing
125
+
126
+ See the [contributing guide][] for information on building and contributing to
127
+ pybind11.
128
+
129
+
130
+ ### License
131
+
132
+ pybind11 is provided under a BSD-style license that can be found in the
133
+ [`LICENSE`][] file. By using, distributing, or contributing to this project,
134
+ you agree to the terms and conditions of this license.
135
+
136
+
137
+ [pybind11.readthedocs.org]: http://pybind11.readthedocs.org/en/master
138
+ [docs-pdf]: https://media.readthedocs.org/pdf/pybind11/master/pybind11.pdf
139
+ [Boost.Python]: http://www.boost.org/doc/libs/1_58_0/libs/python/doc/
140
+ [pyrosetta-report]: http://graylab.jhu.edu/RosettaCon2016/PyRosetta-4.pdf
141
+ [contributing guide]: https://github.com/pybind/pybind11/blob/master/.github/CONTRIBUTING.md
142
+ [`LICENSE`]: https://github.com/pybind/pybind11/blob/master/LICENSE
143
+ [intel-15-workaround]: https://github.com/pybind/pybind11/issues/276
DiffVG/pybind11/docs/Doxyfile ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ PROJECT_NAME = pybind11
2
+ INPUT = ../include/pybind11/
3
+ RECURSIVE = YES
4
+
5
+ GENERATE_HTML = NO
6
+ GENERATE_LATEX = NO
7
+ GENERATE_XML = YES
8
+ XML_OUTPUT = .build/doxygenxml
9
+ XML_PROGRAMLISTING = YES
10
+
11
+ MACRO_EXPANSION = YES
12
+ EXPAND_ONLY_PREDEF = YES
13
+ EXPAND_AS_DEFINED = PYBIND11_RUNTIME_EXCEPTION
14
+
15
+ ALIASES = "rst=\verbatim embed:rst"
16
+ ALIASES += "endrst=\endverbatim"
17
+
18
+ QUIET = YES
19
+ WARNINGS = YES
20
+ WARN_IF_UNDOCUMENTED = NO
21
+ PREDEFINED = DOXYGEN_SHOULD_SKIP_THIS \
22
+ PY_MAJOR_VERSION=3
DiffVG/pybind11/docs/_static/theme_overrides.css ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .wy-table-responsive table td,
2
+ .wy-table-responsive table th {
3
+ white-space: initial !important;
4
+ }
5
+ .rst-content table.docutils td {
6
+ vertical-align: top !important;
7
+ }
8
+ div[class^='highlight'] pre {
9
+ white-space: pre;
10
+ white-space: pre-wrap;
11
+ }
DiffVG/pybind11/docs/advanced/cast/chrono.rst ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Chrono
2
+ ======
3
+
4
+ When including the additional header file :file:`pybind11/chrono.h` conversions
5
+ from C++11 chrono datatypes to python datetime objects are automatically enabled.
6
+ This header also enables conversions of python floats (often from sources such
7
+ as ``time.monotonic()``, ``time.perf_counter()`` and ``time.process_time()``)
8
+ into durations.
9
+
10
+ An overview of clocks in C++11
11
+ ------------------------------
12
+
13
+ A point of confusion when using these conversions is the differences between
14
+ clocks provided in C++11. There are three clock types defined by the C++11
15
+ standard and users can define their own if needed. Each of these clocks have
16
+ different properties and when converting to and from python will give different
17
+ results.
18
+
19
+ The first clock defined by the standard is ``std::chrono::system_clock``. This
20
+ clock measures the current date and time. However, this clock changes with to
21
+ updates to the operating system time. For example, if your time is synchronised
22
+ with a time server this clock will change. This makes this clock a poor choice
23
+ for timing purposes but good for measuring the wall time.
24
+
25
+ The second clock defined in the standard is ``std::chrono::steady_clock``.
26
+ This clock ticks at a steady rate and is never adjusted. This makes it excellent
27
+ for timing purposes, however the value in this clock does not correspond to the
28
+ current date and time. Often this clock will be the amount of time your system
29
+ has been on, although it does not have to be. This clock will never be the same
30
+ clock as the system clock as the system clock can change but steady clocks
31
+ cannot.
32
+
33
+ The third clock defined in the standard is ``std::chrono::high_resolution_clock``.
34
+ This clock is the clock that has the highest resolution out of the clocks in the
35
+ system. It is normally a typedef to either the system clock or the steady clock
36
+ but can be its own independent clock. This is important as when using these
37
+ conversions as the types you get in python for this clock might be different
38
+ depending on the system.
39
+ If it is a typedef of the system clock, python will get datetime objects, but if
40
+ it is a different clock they will be timedelta objects.
41
+
42
+ Provided conversions
43
+ --------------------
44
+
45
+ .. rubric:: C++ to Python
46
+
47
+ - ``std::chrono::system_clock::time_point`` → ``datetime.datetime``
48
+ System clock times are converted to python datetime instances. They are
49
+ in the local timezone, but do not have any timezone information attached
50
+ to them (they are naive datetime objects).
51
+
52
+ - ``std::chrono::duration`` → ``datetime.timedelta``
53
+ Durations are converted to timedeltas, any precision in the duration
54
+ greater than microseconds is lost by rounding towards zero.
55
+
56
+ - ``std::chrono::[other_clocks]::time_point`` → ``datetime.timedelta``
57
+ Any clock time that is not the system clock is converted to a time delta.
58
+ This timedelta measures the time from the clocks epoch to now.
59
+
60
+ .. rubric:: Python to C++
61
+
62
+ - ``datetime.datetime`` or ``datetime.date`` or ``datetime.time`` → ``std::chrono::system_clock::time_point``
63
+ Date/time objects are converted into system clock timepoints. Any
64
+ timezone information is ignored and the type is treated as a naive
65
+ object.
66
+
67
+ - ``datetime.timedelta`` → ``std::chrono::duration``
68
+ Time delta are converted into durations with microsecond precision.
69
+
70
+ - ``datetime.timedelta`` → ``std::chrono::[other_clocks]::time_point``
71
+ Time deltas that are converted into clock timepoints are treated as
72
+ the amount of time from the start of the clocks epoch.
73
+
74
+ - ``float`` → ``std::chrono::duration``
75
+ Floats that are passed to C++ as durations be interpreted as a number of
76
+ seconds. These will be converted to the duration using ``duration_cast``
77
+ from the float.
78
+
79
+ - ``float`` → ``std::chrono::[other_clocks]::time_point``
80
+ Floats that are passed to C++ as time points will be interpreted as the
81
+ number of seconds from the start of the clocks epoch.
DiffVG/pybind11/docs/advanced/cast/custom.rst ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Custom type casters
2
+ ===================
3
+
4
+ In very rare cases, applications may require custom type casters that cannot be
5
+ expressed using the abstractions provided by pybind11, thus requiring raw
6
+ Python C API calls. This is fairly advanced usage and should only be pursued by
7
+ experts who are familiar with the intricacies of Python reference counting.
8
+
9
+ The following snippets demonstrate how this works for a very simple ``inty``
10
+ type that that should be convertible from Python types that provide a
11
+ ``__int__(self)`` method.
12
+
13
+ .. code-block:: cpp
14
+
15
+ struct inty { long long_value; };
16
+
17
+ void print(inty s) {
18
+ std::cout << s.long_value << std::endl;
19
+ }
20
+
21
+ The following Python snippet demonstrates the intended usage from the Python side:
22
+
23
+ .. code-block:: python
24
+
25
+ class A:
26
+ def __int__(self):
27
+ return 123
28
+
29
+ from example import print
30
+ print(A())
31
+
32
+ To register the necessary conversion routines, it is necessary to add
33
+ a partial overload to the ``pybind11::detail::type_caster<T>`` template.
34
+ Although this is an implementation detail, adding partial overloads to this
35
+ type is explicitly allowed.
36
+
37
+ .. code-block:: cpp
38
+
39
+ namespace pybind11 { namespace detail {
40
+ template <> struct type_caster<inty> {
41
+ public:
42
+ /**
43
+ * This macro establishes the name 'inty' in
44
+ * function signatures and declares a local variable
45
+ * 'value' of type inty
46
+ */
47
+ PYBIND11_TYPE_CASTER(inty, _("inty"));
48
+
49
+ /**
50
+ * Conversion part 1 (Python->C++): convert a PyObject into a inty
51
+ * instance or return false upon failure. The second argument
52
+ * indicates whether implicit conversions should be applied.
53
+ */
54
+ bool load(handle src, bool) {
55
+ /* Extract PyObject from handle */
56
+ PyObject *source = src.ptr();
57
+ /* Try converting into a Python integer value */
58
+ PyObject *tmp = PyNumber_Long(source);
59
+ if (!tmp)
60
+ return false;
61
+ /* Now try to convert into a C++ int */
62
+ value.long_value = PyLong_AsLong(tmp);
63
+ Py_DECREF(tmp);
64
+ /* Ensure return code was OK (to avoid out-of-range errors etc) */
65
+ return !(value.long_value == -1 && !PyErr_Occurred());
66
+ }
67
+
68
+ /**
69
+ * Conversion part 2 (C++ -> Python): convert an inty instance into
70
+ * a Python object. The second and third arguments are used to
71
+ * indicate the return value policy and parent object (for
72
+ * ``return_value_policy::reference_internal``) and are generally
73
+ * ignored by implicit casters.
74
+ */
75
+ static handle cast(inty src, return_value_policy /* policy */, handle /* parent */) {
76
+ return PyLong_FromLong(src.long_value);
77
+ }
78
+ };
79
+ }} // namespace pybind11::detail
80
+
81
+ .. note::
82
+
83
+ A ``type_caster<T>`` defined with ``PYBIND11_TYPE_CASTER(T, ...)`` requires
84
+ that ``T`` is default-constructible (``value`` is first default constructed
85
+ and then ``load()`` assigns to it).
86
+
87
+ .. warning::
88
+
89
+ When using custom type casters, it's important to declare them consistently
90
+ in every compilation unit of the Python extension module. Otherwise,
91
+ undefined behavior can ensue.
DiffVG/pybind11/docs/advanced/cast/eigen.rst ADDED
@@ -0,0 +1,310 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Eigen
2
+ #####
3
+
4
+ `Eigen <http://eigen.tuxfamily.org>`_ is C++ header-based library for dense and
5
+ sparse linear algebra. Due to its popularity and widespread adoption, pybind11
6
+ provides transparent conversion and limited mapping support between Eigen and
7
+ Scientific Python linear algebra data types.
8
+
9
+ To enable the built-in Eigen support you must include the optional header file
10
+ :file:`pybind11/eigen.h`.
11
+
12
+ Pass-by-value
13
+ =============
14
+
15
+ When binding a function with ordinary Eigen dense object arguments (for
16
+ example, ``Eigen::MatrixXd``), pybind11 will accept any input value that is
17
+ already (or convertible to) a ``numpy.ndarray`` with dimensions compatible with
18
+ the Eigen type, copy its values into a temporary Eigen variable of the
19
+ appropriate type, then call the function with this temporary variable.
20
+
21
+ Sparse matrices are similarly copied to or from
22
+ ``scipy.sparse.csr_matrix``/``scipy.sparse.csc_matrix`` objects.
23
+
24
+ Pass-by-reference
25
+ =================
26
+
27
+ One major limitation of the above is that every data conversion implicitly
28
+ involves a copy, which can be both expensive (for large matrices) and disallows
29
+ binding functions that change their (Matrix) arguments. Pybind11 allows you to
30
+ work around this by using Eigen's ``Eigen::Ref<MatrixType>`` class much as you
31
+ would when writing a function taking a generic type in Eigen itself (subject to
32
+ some limitations discussed below).
33
+
34
+ When calling a bound function accepting a ``Eigen::Ref<const MatrixType>``
35
+ type, pybind11 will attempt to avoid copying by using an ``Eigen::Map`` object
36
+ that maps into the source ``numpy.ndarray`` data: this requires both that the
37
+ data types are the same (e.g. ``dtype='float64'`` and ``MatrixType::Scalar`` is
38
+ ``double``); and that the storage is layout compatible. The latter limitation
39
+ is discussed in detail in the section below, and requires careful
40
+ consideration: by default, numpy matrices and Eigen matrices are *not* storage
41
+ compatible.
42
+
43
+ If the numpy matrix cannot be used as is (either because its types differ, e.g.
44
+ passing an array of integers to an Eigen parameter requiring doubles, or
45
+ because the storage is incompatible), pybind11 makes a temporary copy and
46
+ passes the copy instead.
47
+
48
+ When a bound function parameter is instead ``Eigen::Ref<MatrixType>`` (note the
49
+ lack of ``const``), pybind11 will only allow the function to be called if it
50
+ can be mapped *and* if the numpy array is writeable (that is
51
+ ``a.flags.writeable`` is true). Any access (including modification) made to
52
+ the passed variable will be transparently carried out directly on the
53
+ ``numpy.ndarray``.
54
+
55
+ This means you can can write code such as the following and have it work as
56
+ expected:
57
+
58
+ .. code-block:: cpp
59
+
60
+ void scale_by_2(Eigen::Ref<Eigen::VectorXd> v) {
61
+ v *= 2;
62
+ }
63
+
64
+ Note, however, that you will likely run into limitations due to numpy and
65
+ Eigen's difference default storage order for data; see the below section on
66
+ :ref:`storage_orders` for details on how to bind code that won't run into such
67
+ limitations.
68
+
69
+ .. note::
70
+
71
+ Passing by reference is not supported for sparse types.
72
+
73
+ Returning values to Python
74
+ ==========================
75
+
76
+ When returning an ordinary dense Eigen matrix type to numpy (e.g.
77
+ ``Eigen::MatrixXd`` or ``Eigen::RowVectorXf``) pybind11 keeps the matrix and
78
+ returns a numpy array that directly references the Eigen matrix: no copy of the
79
+ data is performed. The numpy array will have ``array.flags.owndata`` set to
80
+ ``False`` to indicate that it does not own the data, and the lifetime of the
81
+ stored Eigen matrix will be tied to the returned ``array``.
82
+
83
+ If you bind a function with a non-reference, ``const`` return type (e.g.
84
+ ``const Eigen::MatrixXd``), the same thing happens except that pybind11 also
85
+ sets the numpy array's ``writeable`` flag to false.
86
+
87
+ If you return an lvalue reference or pointer, the usual pybind11 rules apply,
88
+ as dictated by the binding function's return value policy (see the
89
+ documentation on :ref:`return_value_policies` for full details). That means,
90
+ without an explicit return value policy, lvalue references will be copied and
91
+ pointers will be managed by pybind11. In order to avoid copying, you should
92
+ explicitly specify an appropriate return value policy, as in the following
93
+ example:
94
+
95
+ .. code-block:: cpp
96
+
97
+ class MyClass {
98
+ Eigen::MatrixXd big_mat = Eigen::MatrixXd::Zero(10000, 10000);
99
+ public:
100
+ Eigen::MatrixXd &getMatrix() { return big_mat; }
101
+ const Eigen::MatrixXd &viewMatrix() { return big_mat; }
102
+ };
103
+
104
+ // Later, in binding code:
105
+ py::class_<MyClass>(m, "MyClass")
106
+ .def(py::init<>())
107
+ .def("copy_matrix", &MyClass::getMatrix) // Makes a copy!
108
+ .def("get_matrix", &MyClass::getMatrix, py::return_value_policy::reference_internal)
109
+ .def("view_matrix", &MyClass::viewMatrix, py::return_value_policy::reference_internal)
110
+ ;
111
+
112
+ .. code-block:: python
113
+
114
+ a = MyClass()
115
+ m = a.get_matrix() # flags.writeable = True, flags.owndata = False
116
+ v = a.view_matrix() # flags.writeable = False, flags.owndata = False
117
+ c = a.copy_matrix() # flags.writeable = True, flags.owndata = True
118
+ # m[5,6] and v[5,6] refer to the same element, c[5,6] does not.
119
+
120
+ Note in this example that ``py::return_value_policy::reference_internal`` is
121
+ used to tie the life of the MyClass object to the life of the returned arrays.
122
+
123
+ You may also return an ``Eigen::Ref``, ``Eigen::Map`` or other map-like Eigen
124
+ object (for example, the return value of ``matrix.block()`` and related
125
+ methods) that map into a dense Eigen type. When doing so, the default
126
+ behaviour of pybind11 is to simply reference the returned data: you must take
127
+ care to ensure that this data remains valid! You may ask pybind11 to
128
+ explicitly *copy* such a return value by using the
129
+ ``py::return_value_policy::copy`` policy when binding the function. You may
130
+ also use ``py::return_value_policy::reference_internal`` or a
131
+ ``py::keep_alive`` to ensure the data stays valid as long as the returned numpy
132
+ array does.
133
+
134
+ When returning such a reference of map, pybind11 additionally respects the
135
+ readonly-status of the returned value, marking the numpy array as non-writeable
136
+ if the reference or map was itself read-only.
137
+
138
+ .. note::
139
+
140
+ Sparse types are always copied when returned.
141
+
142
+ .. _storage_orders:
143
+
144
+ Storage orders
145
+ ==============
146
+
147
+ Passing arguments via ``Eigen::Ref`` has some limitations that you must be
148
+ aware of in order to effectively pass matrices by reference. First and
149
+ foremost is that the default ``Eigen::Ref<MatrixType>`` class requires
150
+ contiguous storage along columns (for column-major types, the default in Eigen)
151
+ or rows if ``MatrixType`` is specifically an ``Eigen::RowMajor`` storage type.
152
+ The former, Eigen's default, is incompatible with ``numpy``'s default row-major
153
+ storage, and so you will not be able to pass numpy arrays to Eigen by reference
154
+ without making one of two changes.
155
+
156
+ (Note that this does not apply to vectors (or column or row matrices): for such
157
+ types the "row-major" and "column-major" distinction is meaningless).
158
+
159
+ The first approach is to change the use of ``Eigen::Ref<MatrixType>`` to the
160
+ more general ``Eigen::Ref<MatrixType, 0, Eigen::Stride<Eigen::Dynamic,
161
+ Eigen::Dynamic>>`` (or similar type with a fully dynamic stride type in the
162
+ third template argument). Since this is a rather cumbersome type, pybind11
163
+ provides a ``py::EigenDRef<MatrixType>`` type alias for your convenience (along
164
+ with EigenDMap for the equivalent Map, and EigenDStride for just the stride
165
+ type).
166
+
167
+ This type allows Eigen to map into any arbitrary storage order. This is not
168
+ the default in Eigen for performance reasons: contiguous storage allows
169
+ vectorization that cannot be done when storage is not known to be contiguous at
170
+ compile time. The default ``Eigen::Ref`` stride type allows non-contiguous
171
+ storage along the outer dimension (that is, the rows of a column-major matrix
172
+ or columns of a row-major matrix), but not along the inner dimension.
173
+
174
+ This type, however, has the added benefit of also being able to map numpy array
175
+ slices. For example, the following (contrived) example uses Eigen with a numpy
176
+ slice to multiply by 2 all coefficients that are both on even rows (0, 2, 4,
177
+ ...) and in columns 2, 5, or 8:
178
+
179
+ .. code-block:: cpp
180
+
181
+ m.def("scale", [](py::EigenDRef<Eigen::MatrixXd> m, double c) { m *= c; });
182
+
183
+ .. code-block:: python
184
+
185
+ # a = np.array(...)
186
+ scale_by_2(myarray[0::2, 2:9:3])
187
+
188
+ The second approach to avoid copying is more intrusive: rearranging the
189
+ underlying data types to not run into the non-contiguous storage problem in the
190
+ first place. In particular, that means using matrices with ``Eigen::RowMajor``
191
+ storage, where appropriate, such as:
192
+
193
+ .. code-block:: cpp
194
+
195
+ using RowMatrixXd = Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;
196
+ // Use RowMatrixXd instead of MatrixXd
197
+
198
+ Now bound functions accepting ``Eigen::Ref<RowMatrixXd>`` arguments will be
199
+ callable with numpy's (default) arrays without involving a copying.
200
+
201
+ You can, alternatively, change the storage order that numpy arrays use by
202
+ adding the ``order='F'`` option when creating an array:
203
+
204
+ .. code-block:: python
205
+
206
+ myarray = np.array(source, order='F')
207
+
208
+ Such an object will be passable to a bound function accepting an
209
+ ``Eigen::Ref<MatrixXd>`` (or similar column-major Eigen type).
210
+
211
+ One major caveat with this approach, however, is that it is not entirely as
212
+ easy as simply flipping all Eigen or numpy usage from one to the other: some
213
+ operations may alter the storage order of a numpy array. For example, ``a2 =
214
+ array.transpose()`` results in ``a2`` being a view of ``array`` that references
215
+ the same data, but in the opposite storage order!
216
+
217
+ While this approach allows fully optimized vectorized calculations in Eigen, it
218
+ cannot be used with array slices, unlike the first approach.
219
+
220
+ When *returning* a matrix to Python (either a regular matrix, a reference via
221
+ ``Eigen::Ref<>``, or a map/block into a matrix), no special storage
222
+ consideration is required: the created numpy array will have the required
223
+ stride that allows numpy to properly interpret the array, whatever its storage
224
+ order.
225
+
226
+ Failing rather than copying
227
+ ===========================
228
+
229
+ The default behaviour when binding ``Eigen::Ref<const MatrixType>`` Eigen
230
+ references is to copy matrix values when passed a numpy array that does not
231
+ conform to the element type of ``MatrixType`` or does not have a compatible
232
+ stride layout. If you want to explicitly avoid copying in such a case, you
233
+ should bind arguments using the ``py::arg().noconvert()`` annotation (as
234
+ described in the :ref:`nonconverting_arguments` documentation).
235
+
236
+ The following example shows an example of arguments that don't allow data
237
+ copying to take place:
238
+
239
+ .. code-block:: cpp
240
+
241
+ // The method and function to be bound:
242
+ class MyClass {
243
+ // ...
244
+ double some_method(const Eigen::Ref<const MatrixXd> &matrix) { /* ... */ }
245
+ };
246
+ float some_function(const Eigen::Ref<const MatrixXf> &big,
247
+ const Eigen::Ref<const MatrixXf> &small) {
248
+ // ...
249
+ }
250
+
251
+ // The associated binding code:
252
+ using namespace pybind11::literals; // for "arg"_a
253
+ py::class_<MyClass>(m, "MyClass")
254
+ // ... other class definitions
255
+ .def("some_method", &MyClass::some_method, py::arg().noconvert());
256
+
257
+ m.def("some_function", &some_function,
258
+ "big"_a.noconvert(), // <- Don't allow copying for this arg
259
+ "small"_a // <- This one can be copied if needed
260
+ );
261
+
262
+ With the above binding code, attempting to call the the ``some_method(m)``
263
+ method on a ``MyClass`` object, or attempting to call ``some_function(m, m2)``
264
+ will raise a ``RuntimeError`` rather than making a temporary copy of the array.
265
+ It will, however, allow the ``m2`` argument to be copied into a temporary if
266
+ necessary.
267
+
268
+ Note that explicitly specifying ``.noconvert()`` is not required for *mutable*
269
+ Eigen references (e.g. ``Eigen::Ref<MatrixXd>`` without ``const`` on the
270
+ ``MatrixXd``): mutable references will never be called with a temporary copy.
271
+
272
+ Vectors versus column/row matrices
273
+ ==================================
274
+
275
+ Eigen and numpy have fundamentally different notions of a vector. In Eigen, a
276
+ vector is simply a matrix with the number of columns or rows set to 1 at
277
+ compile time (for a column vector or row vector, respectively). Numpy, in
278
+ contrast, has comparable 2-dimensional 1xN and Nx1 arrays, but *also* has
279
+ 1-dimensional arrays of size N.
280
+
281
+ When passing a 2-dimensional 1xN or Nx1 array to Eigen, the Eigen type must
282
+ have matching dimensions: That is, you cannot pass a 2-dimensional Nx1 numpy
283
+ array to an Eigen value expecting a row vector, or a 1xN numpy array as a
284
+ column vector argument.
285
+
286
+ On the other hand, pybind11 allows you to pass 1-dimensional arrays of length N
287
+ as Eigen parameters. If the Eigen type can hold a column vector of length N it
288
+ will be passed as such a column vector. If not, but the Eigen type constraints
289
+ will accept a row vector, it will be passed as a row vector. (The column
290
+ vector takes precedence when both are supported, for example, when passing a
291
+ 1D numpy array to a MatrixXd argument). Note that the type need not be
292
+ explicitly a vector: it is permitted to pass a 1D numpy array of size 5 to an
293
+ Eigen ``Matrix<double, Dynamic, 5>``: you would end up with a 1x5 Eigen matrix.
294
+ Passing the same to an ``Eigen::MatrixXd`` would result in a 5x1 Eigen matrix.
295
+
296
+ When returning an Eigen vector to numpy, the conversion is ambiguous: a row
297
+ vector of length 4 could be returned as either a 1D array of length 4, or as a
298
+ 2D array of size 1x4. When encountering such a situation, pybind11 compromises
299
+ by considering the returned Eigen type: if it is a compile-time vector--that
300
+ is, the type has either the number of rows or columns set to 1 at compile
301
+ time--pybind11 converts to a 1D numpy array when returning the value. For
302
+ instances that are a vector only at run-time (e.g. ``MatrixXd``,
303
+ ``Matrix<float, Dynamic, 4>``), pybind11 returns the vector as a 2D array to
304
+ numpy. If this isn't want you want, you can use ``array.reshape(...)`` to get
305
+ a view of the same data in the desired dimensions.
306
+
307
+ .. seealso::
308
+
309
+ The file :file:`tests/test_eigen.cpp` contains a complete example that
310
+ shows how to pass Eigen sparse and dense data types in more detail.
DiffVG/pybind11/docs/advanced/cast/functional.rst ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Functional
2
+ ##########
3
+
4
+ The following features must be enabled by including :file:`pybind11/functional.h`.
5
+
6
+
7
+ Callbacks and passing anonymous functions
8
+ =========================================
9
+
10
+ The C++11 standard brought lambda functions and the generic polymorphic
11
+ function wrapper ``std::function<>`` to the C++ programming language, which
12
+ enable powerful new ways of working with functions. Lambda functions come in
13
+ two flavors: stateless lambda function resemble classic function pointers that
14
+ link to an anonymous piece of code, while stateful lambda functions
15
+ additionally depend on captured variables that are stored in an anonymous
16
+ *lambda closure object*.
17
+
18
+ Here is a simple example of a C++ function that takes an arbitrary function
19
+ (stateful or stateless) with signature ``int -> int`` as an argument and runs
20
+ it with the value 10.
21
+
22
+ .. code-block:: cpp
23
+
24
+ int func_arg(const std::function<int(int)> &f) {
25
+ return f(10);
26
+ }
27
+
28
+ The example below is more involved: it takes a function of signature ``int -> int``
29
+ and returns another function of the same kind. The return value is a stateful
30
+ lambda function, which stores the value ``f`` in the capture object and adds 1 to
31
+ its return value upon execution.
32
+
33
+ .. code-block:: cpp
34
+
35
+ std::function<int(int)> func_ret(const std::function<int(int)> &f) {
36
+ return [f](int i) {
37
+ return f(i) + 1;
38
+ };
39
+ }
40
+
41
+ This example demonstrates using python named parameters in C++ callbacks which
42
+ requires using ``py::cpp_function`` as a wrapper. Usage is similar to defining
43
+ methods of classes:
44
+
45
+ .. code-block:: cpp
46
+
47
+ py::cpp_function func_cpp() {
48
+ return py::cpp_function([](int i) { return i+1; },
49
+ py::arg("number"));
50
+ }
51
+
52
+ After including the extra header file :file:`pybind11/functional.h`, it is almost
53
+ trivial to generate binding code for all of these functions.
54
+
55
+ .. code-block:: cpp
56
+
57
+ #include <pybind11/functional.h>
58
+
59
+ PYBIND11_MODULE(example, m) {
60
+ m.def("func_arg", &func_arg);
61
+ m.def("func_ret", &func_ret);
62
+ m.def("func_cpp", &func_cpp);
63
+ }
64
+
65
+ The following interactive session shows how to call them from Python.
66
+
67
+ .. code-block:: pycon
68
+
69
+ $ python
70
+ >>> import example
71
+ >>> def square(i):
72
+ ... return i * i
73
+ ...
74
+ >>> example.func_arg(square)
75
+ 100L
76
+ >>> square_plus_1 = example.func_ret(square)
77
+ >>> square_plus_1(4)
78
+ 17L
79
+ >>> plus_1 = func_cpp()
80
+ >>> plus_1(number=43)
81
+ 44L
82
+
83
+ .. warning::
84
+
85
+ Keep in mind that passing a function from C++ to Python (or vice versa)
86
+ will instantiate a piece of wrapper code that translates function
87
+ invocations between the two languages. Naturally, this translation
88
+ increases the computational cost of each function call somewhat. A
89
+ problematic situation can arise when a function is copied back and forth
90
+ between Python and C++ many times in a row, in which case the underlying
91
+ wrappers will accumulate correspondingly. The resulting long sequence of
92
+ C++ -> Python -> C++ -> ... roundtrips can significantly decrease
93
+ performance.
94
+
95
+ There is one exception: pybind11 detects case where a stateless function
96
+ (i.e. a function pointer or a lambda function without captured variables)
97
+ is passed as an argument to another C++ function exposed in Python. In this
98
+ case, there is no overhead. Pybind11 will extract the underlying C++
99
+ function pointer from the wrapped function to sidestep a potential C++ ->
100
+ Python -> C++ roundtrip. This is demonstrated in :file:`tests/test_callbacks.cpp`.
101
+
102
+ .. note::
103
+
104
+ This functionality is very useful when generating bindings for callbacks in
105
+ C++ libraries (e.g. GUI libraries, asynchronous networking libraries, etc.).
106
+
107
+ The file :file:`tests/test_callbacks.cpp` contains a complete example
108
+ that demonstrates how to work with callbacks and anonymous functions in
109
+ more detail.
DiffVG/pybind11/docs/advanced/cast/index.rst ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Type conversions
2
+ ################
3
+
4
+ Apart from enabling cross-language function calls, a fundamental problem
5
+ that a binding tool like pybind11 must address is to provide access to
6
+ native Python types in C++ and vice versa. There are three fundamentally
7
+ different ways to do this—which approach is preferable for a particular type
8
+ depends on the situation at hand.
9
+
10
+ 1. Use a native C++ type everywhere. In this case, the type must be wrapped
11
+ using pybind11-generated bindings so that Python can interact with it.
12
+
13
+ 2. Use a native Python type everywhere. It will need to be wrapped so that
14
+ C++ functions can interact with it.
15
+
16
+ 3. Use a native C++ type on the C++ side and a native Python type on the
17
+ Python side. pybind11 refers to this as a *type conversion*.
18
+
19
+ Type conversions are the most "natural" option in the sense that native
20
+ (non-wrapped) types are used everywhere. The main downside is that a copy
21
+ of the data must be made on every Python ↔ C++ transition: this is
22
+ needed since the C++ and Python versions of the same type generally won't
23
+ have the same memory layout.
24
+
25
+ pybind11 can perform many kinds of conversions automatically. An overview
26
+ is provided in the table ":ref:`conversion_table`".
27
+
28
+ The following subsections discuss the differences between these options in more
29
+ detail. The main focus in this section is on type conversions, which represent
30
+ the last case of the above list.
31
+
32
+ .. toctree::
33
+ :maxdepth: 1
34
+
35
+ overview
36
+ strings
37
+ stl
38
+ functional
39
+ chrono
40
+ eigen
41
+ custom
DiffVG/pybind11/docs/advanced/cast/overview.rst ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Overview
2
+ ########
3
+
4
+ .. rubric:: 1. Native type in C++, wrapper in Python
5
+
6
+ Exposing a custom C++ type using :class:`py::class_` was covered in detail
7
+ in the :doc:`/classes` section. There, the underlying data structure is
8
+ always the original C++ class while the :class:`py::class_` wrapper provides
9
+ a Python interface. Internally, when an object like this is sent from C++ to
10
+ Python, pybind11 will just add the outer wrapper layer over the native C++
11
+ object. Getting it back from Python is just a matter of peeling off the
12
+ wrapper.
13
+
14
+ .. rubric:: 2. Wrapper in C++, native type in Python
15
+
16
+ This is the exact opposite situation. Now, we have a type which is native to
17
+ Python, like a ``tuple`` or a ``list``. One way to get this data into C++ is
18
+ with the :class:`py::object` family of wrappers. These are explained in more
19
+ detail in the :doc:`/advanced/pycpp/object` section. We'll just give a quick
20
+ example here:
21
+
22
+ .. code-block:: cpp
23
+
24
+ void print_list(py::list my_list) {
25
+ for (auto item : my_list)
26
+ std::cout << item << " ";
27
+ }
28
+
29
+ .. code-block:: pycon
30
+
31
+ >>> print_list([1, 2, 3])
32
+ 1 2 3
33
+
34
+ The Python ``list`` is not converted in any way -- it's just wrapped in a C++
35
+ :class:`py::list` class. At its core it's still a Python object. Copying a
36
+ :class:`py::list` will do the usual reference-counting like in Python.
37
+ Returning the object to Python will just remove the thin wrapper.
38
+
39
+ .. rubric:: 3. Converting between native C++ and Python types
40
+
41
+ In the previous two cases we had a native type in one language and a wrapper in
42
+ the other. Now, we have native types on both sides and we convert between them.
43
+
44
+ .. code-block:: cpp
45
+
46
+ void print_vector(const std::vector<int> &v) {
47
+ for (auto item : v)
48
+ std::cout << item << "\n";
49
+ }
50
+
51
+ .. code-block:: pycon
52
+
53
+ >>> print_vector([1, 2, 3])
54
+ 1 2 3
55
+
56
+ In this case, pybind11 will construct a new ``std::vector<int>`` and copy each
57
+ element from the Python ``list``. The newly constructed object will be passed
58
+ to ``print_vector``. The same thing happens in the other direction: a new
59
+ ``list`` is made to match the value returned from C++.
60
+
61
+ Lots of these conversions are supported out of the box, as shown in the table
62
+ below. They are very convenient, but keep in mind that these conversions are
63
+ fundamentally based on copying data. This is perfectly fine for small immutable
64
+ types but it may become quite expensive for large data structures. This can be
65
+ avoided by overriding the automatic conversion with a custom wrapper (i.e. the
66
+ above-mentioned approach 1). This requires some manual effort and more details
67
+ are available in the :ref:`opaque` section.
68
+
69
+ .. _conversion_table:
70
+
71
+ List of all builtin conversions
72
+ -------------------------------
73
+
74
+ The following basic data types are supported out of the box (some may require
75
+ an additional extension header to be included). To pass other data structures
76
+ as arguments and return values, refer to the section on binding :ref:`classes`.
77
+
78
+ +------------------------------------+---------------------------+-------------------------------+
79
+ | Data type | Description | Header file |
80
+ +====================================+===========================+===============================+
81
+ | ``int8_t``, ``uint8_t`` | 8-bit integers | :file:`pybind11/pybind11.h` |
82
+ +------------------------------------+---------------------------+-------------------------------+
83
+ | ``int16_t``, ``uint16_t`` | 16-bit integers | :file:`pybind11/pybind11.h` |
84
+ +------------------------------------+---------------------------+-------------------------------+
85
+ | ``int32_t``, ``uint32_t`` | 32-bit integers | :file:`pybind11/pybind11.h` |
86
+ +------------------------------------+---------------------------+-------------------------------+
87
+ | ``int64_t``, ``uint64_t`` | 64-bit integers | :file:`pybind11/pybind11.h` |
88
+ +------------------------------------+---------------------------+-------------------------------+
89
+ | ``ssize_t``, ``size_t`` | Platform-dependent size | :file:`pybind11/pybind11.h` |
90
+ +------------------------------------+---------------------------+-------------------------------+
91
+ | ``float``, ``double`` | Floating point types | :file:`pybind11/pybind11.h` |
92
+ +------------------------------------+---------------------------+-------------------------------+
93
+ | ``bool`` | Two-state Boolean type | :file:`pybind11/pybind11.h` |
94
+ +------------------------------------+---------------------------+-------------------------------+
95
+ | ``char`` | Character literal | :file:`pybind11/pybind11.h` |
96
+ +------------------------------------+---------------------------+-------------------------------+
97
+ | ``char16_t`` | UTF-16 character literal | :file:`pybind11/pybind11.h` |
98
+ +------------------------------------+---------------------------+-------------------------------+
99
+ | ``char32_t`` | UTF-32 character literal | :file:`pybind11/pybind11.h` |
100
+ +------------------------------------+---------------------------+-------------------------------+
101
+ | ``wchar_t`` | Wide character literal | :file:`pybind11/pybind11.h` |
102
+ +------------------------------------+---------------------------+-------------------------------+
103
+ | ``const char *`` | UTF-8 string literal | :file:`pybind11/pybind11.h` |
104
+ +------------------------------------+---------------------------+-------------------------------+
105
+ | ``const char16_t *`` | UTF-16 string literal | :file:`pybind11/pybind11.h` |
106
+ +------------------------------------+---------------------------+-------------------------------+
107
+ | ``const char32_t *`` | UTF-32 string literal | :file:`pybind11/pybind11.h` |
108
+ +------------------------------------+---------------------------+-------------------------------+
109
+ | ``const wchar_t *`` | Wide string literal | :file:`pybind11/pybind11.h` |
110
+ +------------------------------------+---------------------------+-------------------------------+
111
+ | ``std::string`` | STL dynamic UTF-8 string | :file:`pybind11/pybind11.h` |
112
+ +------------------------------------+---------------------------+-------------------------------+
113
+ | ``std::u16string`` | STL dynamic UTF-16 string | :file:`pybind11/pybind11.h` |
114
+ +------------------------------------+---------------------------+-------------------------------+
115
+ | ``std::u32string`` | STL dynamic UTF-32 string | :file:`pybind11/pybind11.h` |
116
+ +------------------------------------+---------------------------+-------------------------------+
117
+ | ``std::wstring`` | STL dynamic wide string | :file:`pybind11/pybind11.h` |
118
+ +------------------------------------+---------------------------+-------------------------------+
119
+ | ``std::string_view``, | STL C++17 string views | :file:`pybind11/pybind11.h` |
120
+ | ``std::u16string_view``, etc. | | |
121
+ +------------------------------------+---------------------------+-------------------------------+
122
+ | ``std::pair<T1, T2>`` | Pair of two custom types | :file:`pybind11/pybind11.h` |
123
+ +------------------------------------+---------------------------+-------------------------------+
124
+ | ``std::tuple<...>`` | Arbitrary tuple of types | :file:`pybind11/pybind11.h` |
125
+ +------------------------------------+---------------------------+-------------------------------+
126
+ | ``std::reference_wrapper<...>`` | Reference type wrapper | :file:`pybind11/pybind11.h` |
127
+ +------------------------------------+---------------------------+-------------------------------+
128
+ | ``std::complex<T>`` | Complex numbers | :file:`pybind11/complex.h` |
129
+ +------------------------------------+---------------------------+-------------------------------+
130
+ | ``std::array<T, Size>`` | STL static array | :file:`pybind11/stl.h` |
131
+ +------------------------------------+---------------------------+-------------------------------+
132
+ | ``std::vector<T>`` | STL dynamic array | :file:`pybind11/stl.h` |
133
+ +------------------------------------+---------------------------+-------------------------------+
134
+ | ``std::deque<T>`` | STL double-ended queue | :file:`pybind11/stl.h` |
135
+ +------------------------------------+---------------------------+-------------------------------+
136
+ | ``std::valarray<T>`` | STL value array | :file:`pybind11/stl.h` |
137
+ +------------------------------------+---------------------------+-------------------------------+
138
+ | ``std::list<T>`` | STL linked list | :file:`pybind11/stl.h` |
139
+ +------------------------------------+---------------------------+-------------------------------+
140
+ | ``std::map<T1, T2>`` | STL ordered map | :file:`pybind11/stl.h` |
141
+ +------------------------------------+---------------------------+-------------------------------+
142
+ | ``std::unordered_map<T1, T2>`` | STL unordered map | :file:`pybind11/stl.h` |
143
+ +------------------------------------+---------------------------+-------------------------------+
144
+ | ``std::set<T>`` | STL ordered set | :file:`pybind11/stl.h` |
145
+ +------------------------------------+---------------------------+-------------------------------+
146
+ | ``std::unordered_set<T>`` | STL unordered set | :file:`pybind11/stl.h` |
147
+ +------------------------------------+---------------------------+-------------------------------+
148
+ | ``std::optional<T>`` | STL optional type (C++17) | :file:`pybind11/stl.h` |
149
+ +------------------------------------+---------------------------+-------------------------------+
150
+ | ``std::experimental::optional<T>`` | STL optional type (exp.) | :file:`pybind11/stl.h` |
151
+ +------------------------------------+---------------------------+-------------------------------+
152
+ | ``std::variant<...>`` | Type-safe union (C++17) | :file:`pybind11/stl.h` |
153
+ +------------------------------------+---------------------------+-------------------------------+
154
+ | ``std::function<...>`` | STL polymorphic function | :file:`pybind11/functional.h` |
155
+ +------------------------------------+---------------------------+-------------------------------+
156
+ | ``std::chrono::duration<...>`` | STL time duration | :file:`pybind11/chrono.h` |
157
+ +------------------------------------+---------------------------+-------------------------------+
158
+ | ``std::chrono::time_point<...>`` | STL date/time | :file:`pybind11/chrono.h` |
159
+ +------------------------------------+---------------------------+-------------------------------+
160
+ | ``Eigen::Matrix<...>`` | Eigen: dense matrix | :file:`pybind11/eigen.h` |
161
+ +------------------------------------+---------------------------+-------------------------------+
162
+ | ``Eigen::Map<...>`` | Eigen: mapped memory | :file:`pybind11/eigen.h` |
163
+ +------------------------------------+---------------------------+-------------------------------+
164
+ | ``Eigen::SparseMatrix<...>`` | Eigen: sparse matrix | :file:`pybind11/eigen.h` |
165
+ +------------------------------------+---------------------------+-------------------------------+
DiffVG/pybind11/docs/advanced/cast/stl.rst ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ STL containers
2
+ ##############
3
+
4
+ Automatic conversion
5
+ ====================
6
+
7
+ When including the additional header file :file:`pybind11/stl.h`, conversions
8
+ between ``std::vector<>``/``std::deque<>``/``std::list<>``/``std::array<>``,
9
+ ``std::set<>``/``std::unordered_set<>``, and
10
+ ``std::map<>``/``std::unordered_map<>`` and the Python ``list``, ``set`` and
11
+ ``dict`` data structures are automatically enabled. The types ``std::pair<>``
12
+ and ``std::tuple<>`` are already supported out of the box with just the core
13
+ :file:`pybind11/pybind11.h` header.
14
+
15
+ The major downside of these implicit conversions is that containers must be
16
+ converted (i.e. copied) on every Python->C++ and C++->Python transition, which
17
+ can have implications on the program semantics and performance. Please read the
18
+ next sections for more details and alternative approaches that avoid this.
19
+
20
+ .. note::
21
+
22
+ Arbitrary nesting of any of these types is possible.
23
+
24
+ .. seealso::
25
+
26
+ The file :file:`tests/test_stl.cpp` contains a complete
27
+ example that demonstrates how to pass STL data types in more detail.
28
+
29
+ .. _cpp17_container_casters:
30
+
31
+ C++17 library containers
32
+ ========================
33
+
34
+ The :file:`pybind11/stl.h` header also includes support for ``std::optional<>``
35
+ and ``std::variant<>``. These require a C++17 compiler and standard library.
36
+ In C++14 mode, ``std::experimental::optional<>`` is supported if available.
37
+
38
+ Various versions of these containers also exist for C++11 (e.g. in Boost).
39
+ pybind11 provides an easy way to specialize the ``type_caster`` for such
40
+ types:
41
+
42
+ .. code-block:: cpp
43
+
44
+ // `boost::optional` as an example -- can be any `std::optional`-like container
45
+ namespace pybind11 { namespace detail {
46
+ template <typename T>
47
+ struct type_caster<boost::optional<T>> : optional_caster<boost::optional<T>> {};
48
+ }}
49
+
50
+ The above should be placed in a header file and included in all translation units
51
+ where automatic conversion is needed. Similarly, a specialization can be provided
52
+ for custom variant types:
53
+
54
+ .. code-block:: cpp
55
+
56
+ // `boost::variant` as an example -- can be any `std::variant`-like container
57
+ namespace pybind11 { namespace detail {
58
+ template <typename... Ts>
59
+ struct type_caster<boost::variant<Ts...>> : variant_caster<boost::variant<Ts...>> {};
60
+
61
+ // Specifies the function used to visit the variant -- `apply_visitor` instead of `visit`
62
+ template <>
63
+ struct visit_helper<boost::variant> {
64
+ template <typename... Args>
65
+ static auto call(Args &&...args) -> decltype(boost::apply_visitor(args...)) {
66
+ return boost::apply_visitor(args...);
67
+ }
68
+ };
69
+ }} // namespace pybind11::detail
70
+
71
+ The ``visit_helper`` specialization is not required if your ``name::variant`` provides
72
+ a ``name::visit()`` function. For any other function name, the specialization must be
73
+ included to tell pybind11 how to visit the variant.
74
+
75
+ .. note::
76
+
77
+ pybind11 only supports the modern implementation of ``boost::variant``
78
+ which makes use of variadic templates. This requires Boost 1.56 or newer.
79
+ Additionally, on Windows, MSVC 2017 is required because ``boost::variant``
80
+ falls back to the old non-variadic implementation on MSVC 2015.
81
+
82
+ .. _opaque:
83
+
84
+ Making opaque types
85
+ ===================
86
+
87
+ pybind11 heavily relies on a template matching mechanism to convert parameters
88
+ and return values that are constructed from STL data types such as vectors,
89
+ linked lists, hash tables, etc. This even works in a recursive manner, for
90
+ instance to deal with lists of hash maps of pairs of elementary and custom
91
+ types, etc.
92
+
93
+ However, a fundamental limitation of this approach is that internal conversions
94
+ between Python and C++ types involve a copy operation that prevents
95
+ pass-by-reference semantics. What does this mean?
96
+
97
+ Suppose we bind the following function
98
+
99
+ .. code-block:: cpp
100
+
101
+ void append_1(std::vector<int> &v) {
102
+ v.push_back(1);
103
+ }
104
+
105
+ and call it from Python, the following happens:
106
+
107
+ .. code-block:: pycon
108
+
109
+ >>> v = [5, 6]
110
+ >>> append_1(v)
111
+ >>> print(v)
112
+ [5, 6]
113
+
114
+ As you can see, when passing STL data structures by reference, modifications
115
+ are not propagated back the Python side. A similar situation arises when
116
+ exposing STL data structures using the ``def_readwrite`` or ``def_readonly``
117
+ functions:
118
+
119
+ .. code-block:: cpp
120
+
121
+ /* ... definition ... */
122
+
123
+ class MyClass {
124
+ std::vector<int> contents;
125
+ };
126
+
127
+ /* ... binding code ... */
128
+
129
+ py::class_<MyClass>(m, "MyClass")
130
+ .def(py::init<>())
131
+ .def_readwrite("contents", &MyClass::contents);
132
+
133
+ In this case, properties can be read and written in their entirety. However, an
134
+ ``append`` operation involving such a list type has no effect:
135
+
136
+ .. code-block:: pycon
137
+
138
+ >>> m = MyClass()
139
+ >>> m.contents = [5, 6]
140
+ >>> print(m.contents)
141
+ [5, 6]
142
+ >>> m.contents.append(7)
143
+ >>> print(m.contents)
144
+ [5, 6]
145
+
146
+ Finally, the involved copy operations can be costly when dealing with very
147
+ large lists. To deal with all of the above situations, pybind11 provides a
148
+ macro named ``PYBIND11_MAKE_OPAQUE(T)`` that disables the template-based
149
+ conversion machinery of types, thus rendering them *opaque*. The contents of
150
+ opaque objects are never inspected or extracted, hence they *can* be passed by
151
+ reference. For instance, to turn ``std::vector<int>`` into an opaque type, add
152
+ the declaration
153
+
154
+ .. code-block:: cpp
155
+
156
+ PYBIND11_MAKE_OPAQUE(std::vector<int>);
157
+
158
+ before any binding code (e.g. invocations to ``class_::def()``, etc.). This
159
+ macro must be specified at the top level (and outside of any namespaces), since
160
+ it instantiates a partial template overload. If your binding code consists of
161
+ multiple compilation units, it must be present in every file (typically via a
162
+ common header) preceding any usage of ``std::vector<int>``. Opaque types must
163
+ also have a corresponding ``class_`` declaration to associate them with a name
164
+ in Python, and to define a set of available operations, e.g.:
165
+
166
+ .. code-block:: cpp
167
+
168
+ py::class_<std::vector<int>>(m, "IntVector")
169
+ .def(py::init<>())
170
+ .def("clear", &std::vector<int>::clear)
171
+ .def("pop_back", &std::vector<int>::pop_back)
172
+ .def("__len__", [](const std::vector<int> &v) { return v.size(); })
173
+ .def("__iter__", [](std::vector<int> &v) {
174
+ return py::make_iterator(v.begin(), v.end());
175
+ }, py::keep_alive<0, 1>()) /* Keep vector alive while iterator is used */
176
+ // ....
177
+
178
+ .. seealso::
179
+
180
+ The file :file:`tests/test_opaque_types.cpp` contains a complete
181
+ example that demonstrates how to create and expose opaque types using
182
+ pybind11 in more detail.
183
+
184
+ .. _stl_bind:
185
+
186
+ Binding STL containers
187
+ ======================
188
+
189
+ The ability to expose STL containers as native Python objects is a fairly
190
+ common request, hence pybind11 also provides an optional header file named
191
+ :file:`pybind11/stl_bind.h` that does exactly this. The mapped containers try
192
+ to match the behavior of their native Python counterparts as much as possible.
193
+
194
+ The following example showcases usage of :file:`pybind11/stl_bind.h`:
195
+
196
+ .. code-block:: cpp
197
+
198
+ // Don't forget this
199
+ #include <pybind11/stl_bind.h>
200
+
201
+ PYBIND11_MAKE_OPAQUE(std::vector<int>);
202
+ PYBIND11_MAKE_OPAQUE(std::map<std::string, double>);
203
+
204
+ // ...
205
+
206
+ // later in binding code:
207
+ py::bind_vector<std::vector<int>>(m, "VectorInt");
208
+ py::bind_map<std::map<std::string, double>>(m, "MapStringDouble");
209
+
210
+ When binding STL containers pybind11 considers the types of the container's
211
+ elements to decide whether the container should be confined to the local module
212
+ (via the :ref:`module_local` feature). If the container element types are
213
+ anything other than already-bound custom types bound without
214
+ ``py::module_local()`` the container binding will have ``py::module_local()``
215
+ applied. This includes converting types such as numeric types, strings, Eigen
216
+ types; and types that have not yet been bound at the time of the stl container
217
+ binding. This module-local binding is designed to avoid potential conflicts
218
+ between module bindings (for example, from two separate modules each attempting
219
+ to bind ``std::vector<int>`` as a python type).
220
+
221
+ It is possible to override this behavior to force a definition to be either
222
+ module-local or global. To do so, you can pass the attributes
223
+ ``py::module_local()`` (to make the binding module-local) or
224
+ ``py::module_local(false)`` (to make the binding global) into the
225
+ ``py::bind_vector`` or ``py::bind_map`` arguments:
226
+
227
+ .. code-block:: cpp
228
+
229
+ py::bind_vector<std::vector<int>>(m, "VectorInt", py::module_local(false));
230
+
231
+ Note, however, that such a global binding would make it impossible to load this
232
+ module at the same time as any other pybind module that also attempts to bind
233
+ the same container type (``std::vector<int>`` in the above example).
234
+
235
+ See :ref:`module_local` for more details on module-local bindings.
236
+
237
+ .. seealso::
238
+
239
+ The file :file:`tests/test_stl_binders.cpp` shows how to use the
240
+ convenience STL container wrappers.
DiffVG/pybind11/docs/advanced/cast/strings.rst ADDED
@@ -0,0 +1,305 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Strings, bytes and Unicode conversions
2
+ ######################################
3
+
4
+ .. note::
5
+
6
+ This section discusses string handling in terms of Python 3 strings. For
7
+ Python 2.7, replace all occurrences of ``str`` with ``unicode`` and
8
+ ``bytes`` with ``str``. Python 2.7 users may find it best to use ``from
9
+ __future__ import unicode_literals`` to avoid unintentionally using ``str``
10
+ instead of ``unicode``.
11
+
12
+ Passing Python strings to C++
13
+ =============================
14
+
15
+ When a Python ``str`` is passed from Python to a C++ function that accepts
16
+ ``std::string`` or ``char *`` as arguments, pybind11 will encode the Python
17
+ string to UTF-8. All Python ``str`` can be encoded in UTF-8, so this operation
18
+ does not fail.
19
+
20
+ The C++ language is encoding agnostic. It is the responsibility of the
21
+ programmer to track encodings. It's often easiest to simply `use UTF-8
22
+ everywhere <http://utf8everywhere.org/>`_.
23
+
24
+ .. code-block:: c++
25
+
26
+ m.def("utf8_test",
27
+ [](const std::string &s) {
28
+ cout << "utf-8 is icing on the cake.\n";
29
+ cout << s;
30
+ }
31
+ );
32
+ m.def("utf8_charptr",
33
+ [](const char *s) {
34
+ cout << "My favorite food is\n";
35
+ cout << s;
36
+ }
37
+ );
38
+
39
+ .. code-block:: python
40
+
41
+ >>> utf8_test('🎂')
42
+ utf-8 is icing on the cake.
43
+ 🎂
44
+
45
+ >>> utf8_charptr('🍕')
46
+ My favorite food is
47
+ 🍕
48
+
49
+ .. note::
50
+
51
+ Some terminal emulators do not support UTF-8 or emoji fonts and may not
52
+ display the example above correctly.
53
+
54
+ The results are the same whether the C++ function accepts arguments by value or
55
+ reference, and whether or not ``const`` is used.
56
+
57
+ Passing bytes to C++
58
+ --------------------
59
+
60
+ A Python ``bytes`` object will be passed to C++ functions that accept
61
+ ``std::string`` or ``char*`` *without* conversion. On Python 3, in order to
62
+ make a function *only* accept ``bytes`` (and not ``str``), declare it as taking
63
+ a ``py::bytes`` argument.
64
+
65
+
66
+ Returning C++ strings to Python
67
+ ===============================
68
+
69
+ When a C++ function returns a ``std::string`` or ``char*`` to a Python caller,
70
+ **pybind11 will assume that the string is valid UTF-8** and will decode it to a
71
+ native Python ``str``, using the same API as Python uses to perform
72
+ ``bytes.decode('utf-8')``. If this implicit conversion fails, pybind11 will
73
+ raise a ``UnicodeDecodeError``.
74
+
75
+ .. code-block:: c++
76
+
77
+ m.def("std_string_return",
78
+ []() {
79
+ return std::string("This string needs to be UTF-8 encoded");
80
+ }
81
+ );
82
+
83
+ .. code-block:: python
84
+
85
+ >>> isinstance(example.std_string_return(), str)
86
+ True
87
+
88
+
89
+ Because UTF-8 is inclusive of pure ASCII, there is never any issue with
90
+ returning a pure ASCII string to Python. If there is any possibility that the
91
+ string is not pure ASCII, it is necessary to ensure the encoding is valid
92
+ UTF-8.
93
+
94
+ .. warning::
95
+
96
+ Implicit conversion assumes that a returned ``char *`` is null-terminated.
97
+ If there is no null terminator a buffer overrun will occur.
98
+
99
+ Explicit conversions
100
+ --------------------
101
+
102
+ If some C++ code constructs a ``std::string`` that is not a UTF-8 string, one
103
+ can perform a explicit conversion and return a ``py::str`` object. Explicit
104
+ conversion has the same overhead as implicit conversion.
105
+
106
+ .. code-block:: c++
107
+
108
+ // This uses the Python C API to convert Latin-1 to Unicode
109
+ m.def("str_output",
110
+ []() {
111
+ std::string s = "Send your r\xe9sum\xe9 to Alice in HR"; // Latin-1
112
+ py::str py_s = PyUnicode_DecodeLatin1(s.data(), s.length());
113
+ return py_s;
114
+ }
115
+ );
116
+
117
+ .. code-block:: python
118
+
119
+ >>> str_output()
120
+ 'Send your résumé to Alice in HR'
121
+
122
+ The `Python C API
123
+ <https://docs.python.org/3/c-api/unicode.html#built-in-codecs>`_ provides
124
+ several built-in codecs.
125
+
126
+
127
+ One could also use a third party encoding library such as libiconv to transcode
128
+ to UTF-8.
129
+
130
+ Return C++ strings without conversion
131
+ -------------------------------------
132
+
133
+ If the data in a C++ ``std::string`` does not represent text and should be
134
+ returned to Python as ``bytes``, then one can return the data as a
135
+ ``py::bytes`` object.
136
+
137
+ .. code-block:: c++
138
+
139
+ m.def("return_bytes",
140
+ []() {
141
+ std::string s("\xba\xd0\xba\xd0"); // Not valid UTF-8
142
+ return py::bytes(s); // Return the data without transcoding
143
+ }
144
+ );
145
+
146
+ .. code-block:: python
147
+
148
+ >>> example.return_bytes()
149
+ b'\xba\xd0\xba\xd0'
150
+
151
+
152
+ Note the asymmetry: pybind11 will convert ``bytes`` to ``std::string`` without
153
+ encoding, but cannot convert ``std::string`` back to ``bytes`` implicitly.
154
+
155
+ .. code-block:: c++
156
+
157
+ m.def("asymmetry",
158
+ [](std::string s) { // Accepts str or bytes from Python
159
+ return s; // Looks harmless, but implicitly converts to str
160
+ }
161
+ );
162
+
163
+ .. code-block:: python
164
+
165
+ >>> isinstance(example.asymmetry(b"have some bytes"), str)
166
+ True
167
+
168
+ >>> example.asymmetry(b"\xba\xd0\xba\xd0") # invalid utf-8 as bytes
169
+ UnicodeDecodeError: 'utf-8' codec can't decode byte 0xba in position 0: invalid start byte
170
+
171
+
172
+ Wide character strings
173
+ ======================
174
+
175
+ When a Python ``str`` is passed to a C++ function expecting ``std::wstring``,
176
+ ``wchar_t*``, ``std::u16string`` or ``std::u32string``, the ``str`` will be
177
+ encoded to UTF-16 or UTF-32 depending on how the C++ compiler implements each
178
+ type, in the platform's native endianness. When strings of these types are
179
+ returned, they are assumed to contain valid UTF-16 or UTF-32, and will be
180
+ decoded to Python ``str``.
181
+
182
+ .. code-block:: c++
183
+
184
+ #define UNICODE
185
+ #include <windows.h>
186
+
187
+ m.def("set_window_text",
188
+ [](HWND hwnd, std::wstring s) {
189
+ // Call SetWindowText with null-terminated UTF-16 string
190
+ ::SetWindowText(hwnd, s.c_str());
191
+ }
192
+ );
193
+ m.def("get_window_text",
194
+ [](HWND hwnd) {
195
+ const int buffer_size = ::GetWindowTextLength(hwnd) + 1;
196
+ auto buffer = std::make_unique< wchar_t[] >(buffer_size);
197
+
198
+ ::GetWindowText(hwnd, buffer.data(), buffer_size);
199
+
200
+ std::wstring text(buffer.get());
201
+
202
+ // wstring will be converted to Python str
203
+ return text;
204
+ }
205
+ );
206
+
207
+ .. warning::
208
+
209
+ Wide character strings may not work as described on Python 2.7 or Python
210
+ 3.3 compiled with ``--enable-unicode=ucs2``.
211
+
212
+ Strings in multibyte encodings such as Shift-JIS must transcoded to a
213
+ UTF-8/16/32 before being returned to Python.
214
+
215
+
216
+ Character literals
217
+ ==================
218
+
219
+ C++ functions that accept character literals as input will receive the first
220
+ character of a Python ``str`` as their input. If the string is longer than one
221
+ Unicode character, trailing characters will be ignored.
222
+
223
+ When a character literal is returned from C++ (such as a ``char`` or a
224
+ ``wchar_t``), it will be converted to a ``str`` that represents the single
225
+ character.
226
+
227
+ .. code-block:: c++
228
+
229
+ m.def("pass_char", [](char c) { return c; });
230
+ m.def("pass_wchar", [](wchar_t w) { return w; });
231
+
232
+ .. code-block:: python
233
+
234
+ >>> example.pass_char('A')
235
+ 'A'
236
+
237
+ While C++ will cast integers to character types (``char c = 0x65;``), pybind11
238
+ does not convert Python integers to characters implicitly. The Python function
239
+ ``chr()`` can be used to convert integers to characters.
240
+
241
+ .. code-block:: python
242
+
243
+ >>> example.pass_char(0x65)
244
+ TypeError
245
+
246
+ >>> example.pass_char(chr(0x65))
247
+ 'A'
248
+
249
+ If the desire is to work with an 8-bit integer, use ``int8_t`` or ``uint8_t``
250
+ as the argument type.
251
+
252
+ Grapheme clusters
253
+ -----------------
254
+
255
+ A single grapheme may be represented by two or more Unicode characters. For
256
+ example 'é' is usually represented as U+00E9 but can also be expressed as the
257
+ combining character sequence U+0065 U+0301 (that is, the letter 'e' followed by
258
+ a combining acute accent). The combining character will be lost if the
259
+ two-character sequence is passed as an argument, even though it renders as a
260
+ single grapheme.
261
+
262
+ .. code-block:: python
263
+
264
+ >>> example.pass_wchar('é')
265
+ 'é'
266
+
267
+ >>> combining_e_acute = 'e' + '\u0301'
268
+
269
+ >>> combining_e_acute
270
+ 'é'
271
+
272
+ >>> combining_e_acute == 'é'
273
+ False
274
+
275
+ >>> example.pass_wchar(combining_e_acute)
276
+ 'e'
277
+
278
+ Normalizing combining characters before passing the character literal to C++
279
+ may resolve *some* of these issues:
280
+
281
+ .. code-block:: python
282
+
283
+ >>> example.pass_wchar(unicodedata.normalize('NFC', combining_e_acute))
284
+ 'é'
285
+
286
+ In some languages (Thai for example), there are `graphemes that cannot be
287
+ expressed as a single Unicode code point
288
+ <http://unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries>`_, so there is
289
+ no way to capture them in a C++ character type.
290
+
291
+
292
+ C++17 string views
293
+ ==================
294
+
295
+ C++17 string views are automatically supported when compiling in C++17 mode.
296
+ They follow the same rules for encoding and decoding as the corresponding STL
297
+ string type (for example, a ``std::u16string_view`` argument will be passed
298
+ UTF-16-encoded data, and a returned ``std::string_view`` will be decoded as
299
+ UTF-8).
300
+
301
+ References
302
+ ==========
303
+
304
+ * `The Absolute Minimum Every Software Developer Absolutely, Positively Must Know About Unicode and Character Sets (No Excuses!) <https://www.joelonsoftware.com/2003/10/08/the-absolute-minimum-every-software-developer-absolutely-positively-must-know-about-unicode-and-character-sets-no-excuses/>`_
305
+ * `C++ - Using STL Strings at Win32 API Boundaries <https://msdn.microsoft.com/en-ca/magazine/mt238407.aspx>`_