Spaces:

CVPR
/

LIVE

Runtime error

App Files Files Community

Xu Ma commited on Jun 9, 2022

Commit

28958dc

1 Parent(s): b60c0af

upload all files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.DS_Store +0 -0
CMakeLists.txt +140 -0
LIVE/LICENSE +661 -0
LIVE/README.md +44 -0
LIVE/colab.py +687 -0
LIVE/env.yml +164 -0
LIVE/example.png +0 -0
LIVE/system_info.txt +1 -0
LIVE/user_study_state.csv +148 -0
README.md +7 -6
__init__.py +2 -0
aabb.h +67 -0
app.py +375 -0
atomic.cpp +27 -0
atomic.h +139 -0
cdf.h +29 -0
cls_name/cls_name.csv +80 -0
cls_name/cls_name.yaml +7 -0
cmake/FindTensorFlow.cmake +34 -0
cmake/FindThrust.cmake +40 -0
color.cpp +25 -0
color.h +63 -0
compute_distance.h +949 -0
config/base.yaml +91 -0
cuda_utils.h +53 -0
data/demo1.png +0 -0
data/demo2.jpg +0 -0
data/demo3.png +0 -0
diffvg.cpp +1792 -0
diffvg.h +156 -0
edge_query.h +7 -0
examples/1.png +0 -0
examples/2.png +0 -0
examples/3.jpg +0 -0
examples/4.png +0 -0
examples/5.png +0 -0
figures/smile.png +0 -0
filter.h +106 -0
icon/logo.ico +0 -0
img_example/Millenial-at-work.jpg +0 -0
img_example/bus.jpg +0 -0
img_example/zidane.jpg +0 -0
main.py +1040 -0
matrix.h +544 -0
model_config/model_name_p5_all.csv +5 -0
model_config/model_name_p5_all.yaml +1 -0
model_config/model_name_p5_n.csv +1 -0
model_config/model_name_p5_n.yaml +1 -0
model_config/model_name_p6_all.csv +5 -0
model_config/model_name_p6_all.yaml +1 -0

.DS_Store ADDED Viewed

Binary file (8.2 kB). View file

CMakeLists.txt ADDED Viewed

	@@ -0,0 +1,140 @@

+cmake_minimum_required(VERSION 3.12)
+project(diffvg VERSION 0.0.1 DESCRIPTION "Differentiable Vector Graphics")
+set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/")
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+if(WIN32)
+    find_package(Python 3.6 COMPONENTS Development REQUIRED)
+else()
+    find_package(Python 3.7 COMPONENTS Development REQUIRED)
+endif()
+add_subdirectory(pybind11)
+option(DIFFVG_CUDA "Build diffvg with GPU code path?" ON)
+if(DIFFVG_CUDA)
+    message(STATUS "Build with CUDA support")
+    find_package(CUDA 10 REQUIRED)
+    set(CMAKE_CUDA_STANDARD 11)
+    if(NOT WIN32)
+        # Hack: for some reason the line above doesn't work on some Linux systems.
+        set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -std=c++11")
+        #set(CUDA_NVCC_FLAGS_DEBUG "-g -G")
+    endif()
+else()
+    message(STATUS "Build without CUDA support")
+    find_package(Thrust REQUIRED)
+endif()
+# include_directories(${CMAKE_SOURCE_DIR}/pybind11/include)
+include_directories(${PYTHON_INCLUDE_PATH})
+find_package(PythonLibs REQUIRED)
+include_directories(${PYTHON_INCLUDE_PATH})
+include_directories(${PYTHON_INCLUDE_DIRS})
+include_directories(pybind11/include)
+if(DIFFVG_CUDA)
+    link_directories(${CUDA_LIBRARIES})
+else()
+    include_directories(${THRUST_INCLUDE_DIR})
+endif()
+if(NOT MSVC)
+  # These compile definitions are not meaningful for MSVC
+  add_compile_options(-Wall -g -O3 -fvisibility=hidden -Wno-unknown-pragmas)
+else()
+  add_compile_options(/Wall /Zi)
+  add_link_options(/DEBUG)
+endif()
+if(NOT DIFFVG_CUDA)
+    add_compile_options("-DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_CPP")
+endif()
+set(SRCS atomic.h
+         color.h
+         cdf.h
+         cuda_utils.h
+         diffvg.h
+         edge_query.h
+         filter.h
+         matrix.h
+         parallel.h
+         pcg.h
+         ptr.h
+         sample_boundary.h
+         scene.h
+         shape.h
+         solve.h
+         vector.h
+         within_distance.h
+         winding_number.h
+         atomic.cpp
+         color.cpp
+         diffvg.cpp
+         parallel.cpp
+         scene.cpp
+         shape.cpp)
+if(DIFFVG_CUDA)
+    add_compile_definitions(COMPILE_WITH_CUDA)
+    set_source_files_properties(
+        diffvg.cpp
+        scene.cpp
+        PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ)
+    cuda_add_library(diffvg MODULE ${SRCS})
+else()
+    add_library(diffvg MODULE ${SRCS})
+endif()
+if(APPLE)
+    # The "-undefined dynamic_lookup" is a hack for systems with
+    # multiple Python installed. If we link a particular Python version
+    # here, and we import it with a different Python version later.
+    # likely a segmentation fault.
+    # The solution for Linux Mac OS machines, as mentioned in
+    # https://github.com/pybind/pybind11/blob/master/tools/pybind11Tools.cmake
+    # is to not link against Python library at all and resolve the symbols
+    # at compile time.
+    set(DYNAMIC_LOOKUP "-undefined dynamic_lookup")
+endif()
+target_link_libraries(diffvg ${DYNAMIC_LOOKUP})
+if(WIN32)
+    # See: https://pybind11.readthedocs.io/en/master/compiling.html#advanced-interface-library-target
+    target_link_libraries(diffvg pybind11::module)
+    set_target_properties(diffvg PROPERTIES PREFIX "${PYTHON_MODULE_PREFIX}"
+                                            SUFFIX "${PYTHON_MODULE_EXTENSION}")
+endif()
+set_target_properties(diffvg PROPERTIES SKIP_BUILD_RPATH FALSE)
+set_target_properties(diffvg PROPERTIES BUILD_WITH_INSTALL_RPATH TRUE)
+if(UNIX AND NOT APPLE)
+    set_target_properties(diffvg PROPERTIES INSTALL_RPATH "$ORIGIN")
+elseif(APPLE)
+    set_target_properties(diffvg PROPERTIES INSTALL_RPATH "@loader_path")
+endif()
+set_property(TARGET diffvg PROPERTY CXX_STANDARD 11)
+set_target_properties(diffvg PROPERTIES PREFIX "")
+# Still enable assertion in release mode
+string( REPLACE "/DNDEBUG" "" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}")
+string( REPLACE "-DNDEBUG" "" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}")
+string( REPLACE "/DNDEBUG" "" CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO}")
+string( REPLACE "-DNDEBUG" "" CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO}")
+string( REPLACE "/DNDEBUG" "" CMAKE_C_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}")
+string( REPLACE "-DNDEBUG" "" CMAKE_C_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}")
+string( REPLACE "/DNDEBUG" "" CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO}")
+string( REPLACE "-DNDEBUG" "" CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO}")
+if(NOT WIN32)
+    find_package(TensorFlow)
+    if(TensorFlow_FOUND)
+        add_subdirectory(pydiffvg_tensorflow/custom_ops)
+    else()
+        message(INFO " Building without TensorFlow support (not found)")
+    endif()
+endif()

LIVE/LICENSE ADDED Viewed

	@@ -0,0 +1,661 @@

+                    GNU AFFERO GENERAL PUBLIC LICENSE
+                       Version 3, 19 November 2007
+ Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+                            Preamble
+  The GNU Affero General Public License is a free, copyleft license for
+software and other kinds of works, specifically designed to ensure
+cooperation with the community in the case of network server software.
+  The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+our General Public Licenses are intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+  Developers that use our General Public Licenses protect your rights
+with two steps: (1) assert copyright on the software, and (2) offer
+you this License which gives you legal permission to copy, distribute
+and/or modify the software.
+  A secondary benefit of defending all users' freedom is that
+improvements made in alternate versions of the program, if they
+receive widespread use, become available for other developers to
+incorporate.  Many developers of free software are heartened and
+encouraged by the resulting cooperation.  However, in the case of
+software used on network servers, this result may fail to come about.
+The GNU General Public License permits making a modified version and
+letting the public access it on a server without ever releasing its
+source code to the public.
+  The GNU Affero General Public License is designed specifically to
+ensure that, in such cases, the modified source code becomes available
+to the community.  It requires the operator of a network server to
+provide the source code of the modified version running there to the
+users of that server.  Therefore, public use of a modified version, on
+a publicly accessible server, gives the public access to the source
+code of the modified version.
+  An older license, called the Affero General Public License and
+published by Affero, was designed to accomplish similar goals.  This is
+a different license, not a version of the Affero GPL, but Affero has
+released a new version of the Affero GPL which permits relicensing under
+this license.
+  The precise terms and conditions for copying, distribution and
+modification follow.
+                       TERMS AND CONDITIONS
+  0. Definitions.
+  "This License" refers to version 3 of the GNU Affero General Public License.
+  "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+  "The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+  To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+  A "covered work" means either the unmodified Program or a work based
+on the Program.
+  To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+  To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+  An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+  1. Source Code.
+  The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+  A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+  The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+  The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+  The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+  The Corresponding Source for a work in source code form is that
+same work.
+  2. Basic Permissions.
+  All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+  You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+  Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+  No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+  When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+  4. Conveying Verbatim Copies.
+  You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+  You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+  5. Conveying Modified Source Versions.
+  You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+    a) The work must carry prominent notices stating that you modified
+    it, and giving a relevant date.
+    b) The work must carry prominent notices stating that it is
+    released under this License and any conditions added under section
+    7.  This requirement modifies the requirement in section 4 to
+    "keep intact all notices".
+    c) You must license the entire work, as a whole, under this
+    License to anyone who comes into possession of a copy.  This
+    License will therefore apply, along with any applicable section 7
+    additional terms, to the whole of the work, and all its parts,
+    regardless of how they are packaged.  This License gives no
+    permission to license the work in any other way, but it does not
+    invalidate such permission if you have separately received it.
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your
+    work need not make them do so.
+  A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+  6. Conveying Non-Source Forms.
+  You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium
+    customarily used for software interchange.
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a
+    written offer, valid for at least three years and valid for as
+    long as you offer spare parts or customer support for that product
+    model, to give anyone who possesses the object code either (1) a
+    copy of the Corresponding Source for all the software in the
+    product that is covered by this License, on a durable physical
+    medium customarily used for software interchange, for a price no
+    more than your reasonable cost of physically performing this
+    conveying of source, or (2) access to copy the
+    Corresponding Source from a network server at no charge.
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source.  This
+    alternative is allowed only occasionally and noncommercially, and
+    only if you received the object code with such an offer, in accord
+    with subsection 6b.
+    d) Convey the object code by offering access from a designated
+    place (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge.  You need not require recipients to copy the
+    Corresponding Source along with the object code.  If the place to
+    copy the object code is a network server, the Corresponding Source
+    may be on a different server (operated by you or a third party)
+    that supports equivalent copying facilities, provided you maintain
+    clear directions next to the object code saying where to find the
+    Corresponding Source.  Regardless of what server hosts the
+    Corresponding Source, you remain obligated to ensure that it is
+    available for as long as needed to satisfy these requirements.
+    e) Convey the object code using peer-to-peer transmission, provided
+    you inform other peers where the object code and Corresponding
+    Source of the work are being offered to the general public at no
+    charge under subsection 6d.
+  A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+  A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+  "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+  If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+  The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+  Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+  7. Additional Terms.
+  "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+  When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+  Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+    e) Declining to grant rights under trademark law for use of some
+    trade names, trademarks, or service marks; or
+    f) Requiring indemnification of licensors and authors of that
+    material by anyone who conveys the material (or modified versions of
+    it) with contractual assumptions of liability to the recipient, for
+    any liability that these contractual assumptions directly impose on
+    those licensors and authors.
+  All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+  If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+  Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+  8. Termination.
+  You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+  However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+  Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+  Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+  9. Acceptance Not Required for Having Copies.
+  You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+  10. Automatic Licensing of Downstream Recipients.
+  Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+  An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+  You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+  11. Patents.
+  A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+  A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+  In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+  If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+  If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+  A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+  Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+  12. No Surrender of Others' Freedom.
+  If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+  13. Remote Network Interaction; Use with the GNU General Public License.
+  Notwithstanding any other provision of this License, if you modify the
+Program, your modified version must prominently offer all users
+interacting with it remotely through a computer network (if your version
+supports such interaction) an opportunity to receive the Corresponding
+Source of your version by providing access to the Corresponding Source
+from a network server at no charge, through some standard or customary
+means of facilitating copying of software.  This Corresponding Source
+shall include the Corresponding Source for any work covered by version 3
+of the GNU General Public License that is incorporated pursuant to the
+following paragraph.
+  Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the work with which it is combined will remain governed by version
+3 of the GNU General Public License.
+  14. Revised Versions of this License.
+  The Free Software Foundation may publish revised and/or new versions of
+the GNU Affero General Public License from time to time.  Such new versions
+will be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+  Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU Affero General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU Affero General Public License, you may choose any version ever published
+by the Free Software Foundation.
+  If the Program specifies that a proxy can decide which future
+versions of the GNU Affero General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+  Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+  15. Disclaimer of Warranty.
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+  16. Limitation of Liability.
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+  17. Interpretation of Sections 15 and 16.
+  If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+                     END OF TERMS AND CONDITIONS
+            How to Apply These Terms to Your New Programs
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published
+    by the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+Also add information on how to contact you by electronic and paper mail.
+  If your software can interact with users remotely through a computer
+network, you should also make sure that it provides a way for users to
+get its source.  For example, if your program is a web application, its
+interface could display a "Source" link that leads users to an archive
+of the code.  There are many ways you could offer source, and different
+solutions will be better for different programs; see section 13 for the
+specific requirements.
+  You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU AGPL, see
+<https://www.gnu.org/licenses/>.

LIVE/README.md ADDED Viewed

	@@ -0,0 +1,44 @@

+# LIVE-pytorch
+Towards Layer-wise Image Vectorization
+### Updated for rebuttal (Jan/28/2022)：
+#### User study
+We create a [user study](https://wj.qq.com/s2/9665341/19ed) as suggested. A more complex user study will be added in the revised version.
+The results are collected here: [user study details](user_study_state.csv)
+#### Code installation
+we added  detailed [conda env file](env.yml) and collected detail [system information](system_info.txt) to help the installation.
+A more detailed docker and Google Colab demo will be provided.
+<div align="center">
+  <img src="example.png" width="650px" height="300px">
+</div>
+LIVE is able to explicitly presents a Layer-wise representation for simple images.
+## Installation
+```bash
+pip3 install torch torchvision
+pip install svgwrite
+pip install svgpathtools
+pip install cssutils
+pip install numba
+pip install torch-tools
+pip install visdom
+pip install scikit-fmm
+pip install opencv-python==4.5.4.60
+pip install easydict
+pip install scikit-fmm
+```
+Next, please refer DiffVG to install [pydiffvg](https://github.com/BachiLi/diffvg)
+## Run
+```bash
+python main.py --config config/all.yaml --experiment experiment_8x1 --signature demo1 --target data/demo1.png
+```
+Please modify the config files to change configurations.

LIVE/colab.py ADDED Viewed

	@@ -0,0 +1,687 @@

+"""
+Here are some use cases:
+python main.py --config config/all.yaml --experiment experiment_8x1 --signature demo1 --target data/demo1.png
+"""
+import pydiffvg
+import torch
+import cv2
+import matplotlib.pyplot as plt
+import random
+import argparse
+import math
+import errno
+from tqdm import tqdm
+from torch.optim.lr_scheduler import CosineAnnealingLR, LambdaLR
+from torch.nn.functional import adaptive_avg_pool2d
+import warnings
+warnings.filterwarnings("ignore")
+import PIL
+import PIL.Image
+import os
+import os.path as osp
+import numpy as np
+import numpy.random as npr
+import shutil
+import copy
+# import skfmm
+from xing_loss import xing_loss
+import yaml
+from easydict import EasyDict as edict
+pydiffvg.set_print_timing(False)
+gamma = 1.0
+##########
+# helper #
+##########
+from utils import \
+    get_experiment_id, \
+    get_path_schedule, \
+    edict_2_dict, \
+    check_and_create_dir
+def get_bezier_circle(radius=1, segments=4, bias=None):
+    points = []
+    if bias is None:
+        bias = (random.random(), random.random())
+    avg_degree = 360 / (segments*3)
+    for i in range(0, segments*3):
+        point = (np.cos(np.deg2rad(i * avg_degree)),
+                    np.sin(np.deg2rad(i * avg_degree)))
+        points.append(point)
+    points = torch.tensor(points)
+    points = (points)*radius + torch.tensor(bias).unsqueeze(dim=0)
+    points = points.type(torch.FloatTensor)
+    return points
+def get_sdf(phi, method='skfmm', **kwargs):
+    if method == 'skfmm':
+        import skfmm
+        phi = (phi-0.5)*2
+        if (phi.max() <= 0) or (phi.min() >= 0):
+            return np.zeros(phi.shape).astype(np.float32)
+        sd = skfmm.distance(phi, dx=1)
+        flip_negative = kwargs.get('flip_negative', True)
+        if flip_negative:
+            sd = np.abs(sd)
+        truncate = kwargs.get('truncate', 10)
+        sd = np.clip(sd, -truncate, truncate)
+        # print(f"max sd value is: {sd.max()}")
+        zero2max = kwargs.get('zero2max', True)
+        if zero2max and flip_negative:
+            sd = sd.max() - sd
+        elif zero2max:
+            raise ValueError
+        normalize = kwargs.get('normalize', 'sum')
+        if normalize == 'sum':
+            sd /= sd.sum()
+        elif normalize == 'to1':
+            sd /= sd.max()
+        return sd
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--debug', action='store_true', default=False)
+    parser.add_argument("--config", type=str)
+    parser.add_argument("--experiment", type=str)
+    parser.add_argument("--seed", type=int)
+    parser.add_argument("--target", type=str, help="target image path")
+    parser.add_argument('--log_dir', metavar='DIR', default="log/debug")
+    parser.add_argument('--initial', type=str, default="random", choices=['random', 'circle'])
+    parser.add_argument('--signature', nargs='+', type=str)
+    parser.add_argument('--seginit', nargs='+', type=str)
+    parser.add_argument("--num_segments", type=int, default=4)
+    # parser.add_argument("--num_paths", type=str, default="1,1,1")
+    # parser.add_argument("--num_iter", type=int, default=500)
+    # parser.add_argument('--free', action='store_true')
+    # Please ensure that image resolution is divisible by pool_size; otherwise the performance would drop a lot.
+    # parser.add_argument('--pool_size', type=int, default=40, help="the pooled image size for next path initialization")
+    # parser.add_argument('--save_loss', action='store_true')
+    # parser.add_argument('--save_init', action='store_true')
+    # parser.add_argument('--save_image', action='store_true')
+    # parser.add_argument('--save_video', action='store_true')
+    # parser.add_argument('--print_weight', action='store_true')
+    # parser.add_argument('--circle_init_radius',  type=float)
+    cfg = edict()
+    args = parser.parse_args()
+    cfg.debug = args.debug
+    cfg.config = args.config
+    cfg.experiment = args.experiment
+    cfg.seed = args.seed
+    cfg.target = args.target
+    cfg.log_dir = args.log_dir
+    cfg.initial = args.initial
+    cfg.signature = args.signature
+    # set cfg num_segments in command
+    cfg.num_segments = args.num_segments
+    if args.seginit is not None:
+        cfg.seginit = edict()
+        cfg.seginit.type = args.seginit[0]
+        if cfg.seginit.type == 'circle':
+            cfg.seginit.radius = float(args.seginit[1])
+    return cfg
+def ycrcb_conversion(im, format='[bs x 3 x 2D]', reverse=False):
+    mat = torch.FloatTensor([
+        [ 65.481/255, 128.553/255,  24.966/255], # ranged_from [0, 219/255]
+        [-37.797/255, -74.203/255, 112.000/255], # ranged_from [-112/255, 112/255]
+        [112.000/255, -93.786/255, -18.214/255], # ranged_from [-112/255, 112/255]
+    ]).to(im.device)
+    if reverse:
+        mat = mat.inverse()
+    if format == '[bs x 3 x 2D]':
+        im = im.permute(0, 2, 3, 1)
+        im = torch.matmul(im, mat.T)
+        im = im.permute(0, 3, 1, 2).contiguous()
+        return im
+    elif format == '[2D x 3]':
+        im = torch.matmul(im, mat.T)
+        return im
+    else:
+        raise ValueError
+class random_coord_init():
+    def __init__(self, canvas_size):
+        self.canvas_size = canvas_size
+    def __call__(self):
+        h, w = self.canvas_size
+        return [npr.uniform(0, 1)*w, npr.uniform(0, 1)*h]
+class naive_coord_init():
+    def __init__(self, pred, gt, format='[bs x c x 2D]', replace_sampling=True):
+        if isinstance(pred, torch.Tensor):
+            pred = pred.detach().cpu().numpy()
+        if isinstance(gt, torch.Tensor):
+            gt = gt.detach().cpu().numpy()
+        if format == '[bs x c x 2D]':
+            self.map = ((pred[0] - gt[0])**2).sum(0)
+        elif format == ['[2D x c]']:
+            self.map = ((pred - gt)**2).sum(-1)
+        else:
+            raise ValueError
+        self.replace_sampling = replace_sampling
+    def __call__(self):
+        coord = np.where(self.map == self.map.max())
+        coord_h, coord_w = coord[0][0], coord[1][0]
+        if self.replace_sampling:
+            self.map[coord_h, coord_w] = -1
+        return [coord_w, coord_h]
+class sparse_coord_init():
+    def __init__(self, pred, gt, format='[bs x c x 2D]', quantile_interval=200, nodiff_thres=0.1):
+        if isinstance(pred, torch.Tensor):
+            pred = pred.detach().cpu().numpy()
+        if isinstance(gt, torch.Tensor):
+            gt = gt.detach().cpu().numpy()
+        if format == '[bs x c x 2D]':
+            self.map = ((pred[0] - gt[0])**2).sum(0)
+            self.reference_gt = copy.deepcopy(
+                np.transpose(gt[0], (1, 2, 0)))
+        elif format == ['[2D x c]']:
+            self.map = (np.abs(pred - gt)).sum(-1)
+            self.reference_gt = copy.deepcopy(gt[0])
+        else:
+            raise ValueError
+        # OptionA: Zero too small errors to avoid the error too small deadloop
+        self.map[self.map < nodiff_thres] = 0
+        quantile_interval = np.linspace(0., 1., quantile_interval)
+        quantized_interval = np.quantile(self.map, quantile_interval)
+        # remove redundant
+        quantized_interval = np.unique(quantized_interval)
+        quantized_interval = sorted(quantized_interval[1:-1])
+        self.map = np.digitize(self.map, quantized_interval, right=False)
+        self.map = np.clip(self.map, 0, 255).astype(np.uint8)
+        self.idcnt = {}
+        for idi in sorted(np.unique(self.map)):
+            self.idcnt[idi] = (self.map==idi).sum()
+        self.idcnt.pop(min(self.idcnt.keys()))
+        # remove smallest one to remove the correct region
+    def __call__(self):
+        if len(self.idcnt) == 0:
+            h, w = self.map.shape
+            return [npr.uniform(0, 1)*w, npr.uniform(0, 1)*h]
+        target_id = max(self.idcnt, key=self.idcnt.get)
+        _, component, cstats, ccenter = cv2.connectedComponentsWithStats(
+            (self.map==target_id).astype(np.uint8), connectivity=4)
+        # remove cid = 0, it is the invalid area
+        csize = [ci[-1] for ci in cstats[1:]]
+        target_cid = csize.index(max(csize))+1
+        center = ccenter[target_cid][::-1]
+        coord = np.stack(np.where(component == target_cid)).T
+        dist = np.linalg.norm(coord-center, axis=1)
+        target_coord_id = np.argmin(dist)
+        coord_h, coord_w = coord[target_coord_id]
+        # replace_sampling
+        self.idcnt[target_id] -= max(csize)
+        if self.idcnt[target_id] == 0:
+            self.idcnt.pop(target_id)
+        self.map[component == target_cid] = 0
+        return [coord_w, coord_h]
+def init_shapes(num_paths,
+                num_segments,
+                canvas_size,
+                seginit_cfg,
+                shape_cnt,
+                pos_init_method=None,
+                trainable_stroke=False,
+                **kwargs):
+    shapes = []
+    shape_groups = []
+    h, w = canvas_size
+    # change path init location
+    if pos_init_method is None:
+        pos_init_method = random_coord_init(canvas_size=canvas_size)
+    for i in range(num_paths):
+        num_control_points = [2] * num_segments
+        if seginit_cfg.type=="random":
+            points = []
+            p0 = pos_init_method()
+            color_ref = copy.deepcopy(p0)
+            points.append(p0)
+            for j in range(num_segments):
+                radius = seginit_cfg.radius
+                p1 = (p0[0] + radius * npr.uniform(-0.5, 0.5),
+                      p0[1] + radius * npr.uniform(-0.5, 0.5))
+                p2 = (p1[0] + radius * npr.uniform(-0.5, 0.5),
+                      p1[1] + radius * npr.uniform(-0.5, 0.5))
+                p3 = (p2[0] + radius * npr.uniform(-0.5, 0.5),
+                      p2[1] + radius * npr.uniform(-0.5, 0.5))
+                points.append(p1)
+                points.append(p2)
+                if j < num_segments - 1:
+                    points.append(p3)
+                    p0 = p3
+            points = torch.FloatTensor(points)
+        # circle points initialization
+        elif seginit_cfg.type=="circle":
+            radius = seginit_cfg.radius
+            if radius is None:
+                radius = npr.uniform(0.5, 1)
+            center = pos_init_method()
+            color_ref = copy.deepcopy(center)
+            points = get_bezier_circle(
+                radius=radius, segments=num_segments,
+                bias=center)
+        path = pydiffvg.Path(num_control_points = torch.LongTensor(num_control_points),
+                             points = points,
+                             stroke_width = torch.tensor(0.0),
+                             is_closed = True)
+        shapes.append(path)
+        # !!!!!!problem is here. the shape group shape_ids is wrong
+        if 'gt' in kwargs:
+            wref, href = color_ref
+            wref = max(0, min(int(wref), w-1))
+            href = max(0, min(int(href), h-1))
+            fill_color_init = list(gt[0, :, href, wref]) + [1.]
+            fill_color_init = torch.FloatTensor(fill_color_init)
+            stroke_color_init = torch.FloatTensor(npr.uniform(size=[4]))
+        else:
+            fill_color_init = torch.FloatTensor(npr.uniform(size=[4]))
+            stroke_color_init = torch.FloatTensor(npr.uniform(size=[4]))
+        path_group = pydiffvg.ShapeGroup(
+            shape_ids = torch.LongTensor([shape_cnt+i]),
+            fill_color = fill_color_init,
+            stroke_color = stroke_color_init,
+        )
+        shape_groups.append(path_group)
+    point_var = []
+    color_var = []
+    for path in shapes:
+        path.points.requires_grad = True
+        point_var.append(path.points)
+    for group in shape_groups:
+        group.fill_color.requires_grad = True
+        color_var.append(group.fill_color)
+    if trainable_stroke:
+        stroke_width_var = []
+        stroke_color_var = []
+        for path in shapes:
+            path.stroke_width.requires_grad = True
+            stroke_width_var.append(path.stroke_width)
+        for group in shape_groups:
+            group.stroke_color.requires_grad = True
+            stroke_color_var.append(group.stroke_color)
+        return shapes, shape_groups, point_var, color_var, stroke_width_var, stroke_color_var
+    else:
+        return shapes, shape_groups, point_var, color_var
+class linear_decay_lrlambda_f(object):
+    def __init__(self, decay_every, decay_ratio):
+        self.decay_every = decay_every
+        self.decay_ratio = decay_ratio
+    def __call__(self, n):
+        decay_time = n//self.decay_every
+        decay_step = n %self.decay_every
+        lr_s = self.decay_ratio**decay_time
+        lr_e = self.decay_ratio**(decay_time+1)
+        r = decay_step/self.decay_every
+        lr = lr_s * (1-r) + lr_e * r
+        return lr
+if __name__ == "__main__":
+    ###############
+    # make config #
+    ###############
+    cfg_arg = parse_args()
+    with open(cfg_arg.config, 'r') as f:
+        cfg = yaml.load(f, Loader=yaml.FullLoader)
+    cfg_default = edict(cfg['default'])
+    cfg = edict(cfg[cfg_arg.experiment])
+    cfg.update(cfg_default)
+    cfg.update(cfg_arg)
+    cfg.exid = get_experiment_id(cfg.debug)
+    cfg.experiment_dir = \
+        osp.join(cfg.log_dir, '{}_{}'.format(cfg.exid, '_'.join(cfg.signature)))
+    configfile = osp.join(cfg.experiment_dir, 'config.yaml')
+    check_and_create_dir(configfile)
+    with open(osp.join(configfile), 'w') as f:
+        yaml.dump(edict_2_dict(cfg), f)
+    # Use GPU if available
+    pydiffvg.set_use_gpu(torch.cuda.is_available())
+    device = pydiffvg.get_device()
+    gt = np.array(PIL.Image.open(cfg.target))
+    print(f"Input image shape is: {gt.shape}")
+    if len(gt.shape) == 2:
+        print("Converting the gray-scale image to RGB.")
+        gt = gt.unsqueeze(dim=-1).repeat(1,1,3)
+    if gt.shape[2] == 4:
+        print("Input image includes alpha channel, simply dropout alpha channel.")
+        gt = gt[:, :, :3]
+    gt = (gt/255).astype(np.float32)
+    gt = torch.FloatTensor(gt).permute(2, 0, 1)[None].to(device)
+    if cfg.use_ycrcb:
+        gt = ycrcb_conversion(gt)
+    h, w = gt.shape[2:]
+    path_schedule = get_path_schedule(**cfg.path_schedule)
+    if cfg.seed is not None:
+        random.seed(cfg.seed)
+        npr.seed(cfg.seed)
+        torch.manual_seed(cfg.seed)
+    render = pydiffvg.RenderFunction.apply
+    shapes_record, shape_groups_record = [], []
+    region_loss = None
+    loss_matrix = []
+    para_point, para_color = {}, {}
+    if cfg.trainable.stroke:
+        para_stroke_width, para_stroke_color = {}, {}
+    pathn_record = []
+    # Background
+    if cfg.trainable.bg:
+        # meancolor = gt.mean([2, 3])[0]
+        para_bg = torch.tensor([1., 1., 1.], requires_grad=True, device=device)
+    else:
+        if cfg.use_ycrcb:
+            para_bg = torch.tensor([219/255, 0, 0], requires_grad=False, device=device)
+        else:
+            para_bg = torch.tensor([1., 1., 1.], requires_grad=False, device=device)
+    ##################
+    # start_training #
+    ##################
+    loss_weight = None
+    loss_weight_keep = 0
+    if cfg.coord_init.type == 'naive':
+        pos_init_method = naive_coord_init(
+            para_bg.view(1, -1, 1, 1).repeat(1, 1, h, w), gt)
+    elif cfg.coord_init.type == 'sparse':
+        pos_init_method = sparse_coord_init(
+            para_bg.view(1, -1, 1, 1).repeat(1, 1, h, w), gt)
+    elif cfg.coord_init.type == 'random':
+        pos_init_method = random_coord_init([h, w])
+    else:
+        raise ValueError
+    lrlambda_f = linear_decay_lrlambda_f(cfg.num_iter, 0.4)
+    optim_schedular_dict = {}
+    for path_idx, pathn in enumerate(path_schedule):
+        loss_list = []
+        print("=> Adding [{}] paths, [{}] ...".format(pathn, cfg.seginit.type))
+        pathn_record.append(pathn)
+        pathn_record_str = '-'.join([str(i) for i in pathn_record])
+        # initialize new shapes related stuffs.
+        if cfg.trainable.stroke:
+            shapes, shape_groups, point_var, color_var, stroke_width_var, stroke_color_var = init_shapes(
+                pathn, cfg.num_segments, (h, w),
+                cfg.seginit, len(shapes_record),
+                pos_init_method,
+                trainable_stroke=True,
+                gt=gt, )
+            para_stroke_width[path_idx] = stroke_width_var
+            para_stroke_color[path_idx] = stroke_color_var
+        else:
+            shapes, shape_groups, point_var, color_var = init_shapes(
+                pathn, cfg.num_segments, (h, w),
+                cfg.seginit, len(shapes_record),
+                pos_init_method,
+                trainable_stroke=False,
+                gt=gt, )
+        shapes_record += shapes
+        shape_groups_record += shape_groups
+        if cfg.save.init:
+            filename = os.path.join(
+                cfg.experiment_dir, "svg-init",
+                "{}-init.svg".format(pathn_record_str))
+            check_and_create_dir(filename)
+            pydiffvg.save_svg(
+                filename, w, h,
+                shapes_record, shape_groups_record)
+        para = {}
+        if (cfg.trainable.bg) and (path_idx == 0):
+            para['bg'] = [para_bg]
+        para['point'] = point_var
+        para['color'] = color_var
+        if cfg.trainable.stroke:
+            para['stroke_width'] = stroke_width_var
+            para['stroke_color'] = stroke_color_var
+        pg = [{'params' : para[ki], 'lr' : cfg.lr_base[ki]} for ki in sorted(para.keys())]
+        optim = torch.optim.Adam(pg)
+        if cfg.trainable.record:
+            scheduler = LambdaLR(
+                optim, lr_lambda=lrlambda_f, last_epoch=-1)
+        else:
+            scheduler = LambdaLR(
+                optim, lr_lambda=lrlambda_f, last_epoch=cfg.num_iter)
+        optim_schedular_dict[path_idx] = (optim, scheduler)
+        # Inner loop training
+        t_range = tqdm(range(cfg.num_iter))
+        for t in t_range:
+            for _, (optim, _) in optim_schedular_dict.items():
+                optim.zero_grad()
+            # Forward pass: render the image.
+            scene_args = pydiffvg.RenderFunction.serialize_scene(
+                w, h, shapes_record, shape_groups_record)
+            img = render(w, h, 2, 2, t, None, *scene_args)
+            # Compose img with white background
+            img = img[:, :, 3:4] * img[:, :, :3] + \
+                para_bg * (1 - img[:, :, 3:4])
+            if cfg.save.video:
+                filename = os.path.join(
+                    cfg.experiment_dir, "video-png",
+                    "{}-iter{}.png".format(pathn_record_str, t))
+                check_and_create_dir(filename)
+                if cfg.use_ycrcb:
+                    imshow = ycrcb_conversion(
+                        img, format='[2D x 3]', reverse=True).detach().cpu()
+                else:
+                    imshow = img.detach().cpu()
+                pydiffvg.imwrite(imshow, filename, gamma=gamma)
+            x = img.unsqueeze(0).permute(0, 3, 1, 2) # HWC -> NCHW
+            if cfg.use_ycrcb:
+                color_reweight = torch.FloatTensor([255/219, 255/224, 255/255]).to(device)
+                loss = ((x-gt)*(color_reweight.view(1, -1, 1, 1)))**2
+            else:
+                loss = ((x-gt)**2)
+            if cfg.loss.use_l1_loss:
+                loss = abs(x-gt)
+            if cfg.loss.use_distance_weighted_loss:
+                if cfg.use_ycrcb:
+                    raise ValueError
+                shapes_forsdf = copy.deepcopy(shapes)
+                shape_groups_forsdf = copy.deepcopy(shape_groups)
+                for si in shapes_forsdf:
+                    si.stroke_width = torch.FloatTensor([0]).to(device)
+                for sg_idx, sgi in enumerate(shape_groups_forsdf):
+                    sgi.fill_color = torch.FloatTensor([1, 1, 1, 1]).to(device)
+                    sgi.shape_ids = torch.LongTensor([sg_idx]).to(device)
+                sargs_forsdf = pydiffvg.RenderFunction.serialize_scene(
+                    w, h, shapes_forsdf, shape_groups_forsdf)
+                with torch.no_grad():
+                    im_forsdf = render(w, h, 2, 2, 0, None, *sargs_forsdf)
+                # use alpha channel is a trick to get 0-1 image
+                im_forsdf = (im_forsdf[:, :, 3]).detach().cpu().numpy()
+                loss_weight = get_sdf(im_forsdf, normalize='to1')
+                loss_weight += loss_weight_keep
+                loss_weight = np.clip(loss_weight, 0, 1)
+                loss_weight = torch.FloatTensor(loss_weight).to(device)
+            if cfg.save.loss:
+                save_loss = loss.squeeze(dim=0).mean(dim=0,keepdim=False).cpu().detach().numpy()
+                save_weight = loss_weight.cpu().detach().numpy()
+                save_weighted_loss = save_loss*save_weight
+                # normalize to [0,1]
+                save_loss = (save_loss - np.min(save_loss))/np.ptp(save_loss)
+                save_weight = (save_weight - np.min(save_weight))/np.ptp(save_weight)
+                save_weighted_loss = (save_weighted_loss - np.min(save_weighted_loss))/np.ptp(save_weighted_loss)
+                # save
+                plt.imshow(save_loss, cmap='Reds')
+                plt.axis('off')
+                # plt.colorbar()
+                filename = os.path.join(cfg.experiment_dir, "loss", "{}-iter{}-mseloss.png".format(pathn_record_str, t))
+                check_and_create_dir(filename)
+                plt.savefig(filename, dpi=800)
+                plt.close()
+                plt.imshow(save_weight, cmap='Greys')
+                plt.axis('off')
+                # plt.colorbar()
+                filename = os.path.join(cfg.experiment_dir, "loss", "{}-iter{}-sdfweight.png".format(pathn_record_str, t))
+                plt.savefig(filename, dpi=800)
+                plt.close()
+                plt.imshow(save_weighted_loss, cmap='Reds')
+                plt.axis('off')
+                # plt.colorbar()
+                filename = os.path.join(cfg.experiment_dir, "loss", "{}-iter{}-weightedloss.png".format(pathn_record_str, t))
+                plt.savefig(filename, dpi=800)
+                plt.close()
+            if loss_weight is None:
+                loss = loss.sum(1).mean()
+            else:
+                loss = (loss.sum(1)*loss_weight).mean()
+            # if (cfg.loss.bis_loss_weight is not None)  and (cfg.loss.bis_loss_weight > 0):
+            #     loss_bis = bezier_intersection_loss(point_var[0]) * cfg.loss.bis_loss_weight
+            #     loss = loss + loss_bis
+            if (cfg.loss.xing_loss_weight is not None) \
+                    and (cfg.loss.xing_loss_weight > 0):
+                loss_xing = xing_loss(point_var) * cfg.loss.xing_loss_weight
+                loss = loss + loss_xing
+            loss_list.append(loss.item())
+            t_range.set_postfix({'loss': loss.item()})
+            loss.backward()
+            # step
+            for _, (optim, scheduler) in optim_schedular_dict.items():
+                optim.step()
+                scheduler.step()
+            for group in shape_groups_record:
+                group.fill_color.data.clamp_(0.0, 1.0)
+        if cfg.loss.use_distance_weighted_loss:
+            loss_weight_keep = loss_weight.detach().cpu().numpy() * 1
+        if not cfg.trainable.record:
+            for _, pi in pg.items():
+                for ppi in pi:
+                    pi.require_grad = False
+            optim_schedular_dict = {}
+        if cfg.save.image:
+            filename = os.path.join(
+                cfg.experiment_dir, "demo-png", "{}.png".format(pathn_record_str))
+            check_and_create_dir(filename)
+            if cfg.use_ycrcb:
+                imshow = ycrcb_conversion(
+                    img, format='[2D x 3]', reverse=True).detach().cpu()
+            else:
+                imshow = img.detach().cpu()
+            pydiffvg.imwrite(imshow, filename, gamma=gamma)
+        if cfg.save.output:
+            filename = os.path.join(
+                cfg.experiment_dir, "output-svg", "{}.svg".format(pathn_record_str))
+            check_and_create_dir(filename)
+            pydiffvg.save_svg(filename, w, h, shapes_record, shape_groups_record)
+        loss_matrix.append(loss_list)
+        # calculate the pixel loss
+        # pixel_loss = ((x-gt)**2).sum(dim=1, keepdim=True).sqrt_() # [N,1,H, W]
+        # region_loss = adaptive_avg_pool2d(pixel_loss, cfg.region_loss_pool_size)
+        # loss_weight = torch.softmax(region_loss.reshape(1, 1, -1), dim=-1)\
+        #     .reshape_as(region_loss)
+        pos_init_method = naive_coord_init(x, gt)
+        if cfg.coord_init.type == 'naive':
+            pos_init_method = naive_coord_init(x, gt)
+        elif cfg.coord_init.type == 'sparse':
+            pos_init_method = sparse_coord_init(x, gt)
+        elif cfg.coord_init.type == 'random':
+            pos_init_method = random_coord_init([h, w])
+        else:
+            raise ValueError
+        if cfg.save.video:
+            print("saving iteration video...")
+            img_array = []
+            for ii in range(0, cfg.num_iter):
+                filename = os.path.join(
+                    cfg.experiment_dir, "video-png",
+                    "{}-iter{}.png".format(pathn_record_str, ii))
+                img = cv2.imread(filename)
+                # cv2.putText(
+                #     img, "Path:{} \nIteration:{}".format(pathn_record_str, ii),
+                #     (10, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1)
+                img_array.append(img)
+            videoname = os.path.join(
+                cfg.experiment_dir, "video-mp4",
+                "{}.mp4".format(pathn_record_str))
+            check_and_create_dir(videoname)
+            out = cv2.VideoWriter(
+                videoname,
+                cv2.VideoWriter_fourcc(*'mp4v'),
+                # cv2.VideoWriter_fourcc(*'FFV1'),
+                20.0, (w, h))
+            for iii in range(len(img_array)):
+                out.write(img_array[iii])
+            out.release()
+            # shutil.rmtree(os.path.join(cfg.experiment_dir, "video-png"))
+    print("The last loss is: {}".format(loss.item()))

LIVE/env.yml ADDED Viewed

	@@ -0,0 +1,164 @@

+name: live
+channels:
+  - pytorch
+  - anaconda
+  - conda-forge
+  - defaults
+dependencies:
+  - _libgcc_mutex=0.1=main
+  - _openmp_mutex=4.5=1_gnu
+  - blas=1.0=mkl
+  - bzip2=1.0.8=h7b6447c_0
+  - ca-certificates=2021.5.30=ha878542_0
+  - certifi=2021.5.30=py37h06a4308_0
+  - cloudpickle=1.6.0=py_0
+  - cmake=3.18.2=ha30ef3c_0
+  - cudatoolkit=10.2.89=hfd86e86_1
+  - cycler=0.10.0=py37_0
+  - cytoolz=0.11.0=py37h7b6447c_0
+  - dask-core=2021.6.2=pyhd3eb1b0_0
+  - decorator=5.0.9=pyhd3eb1b0_0
+  - expat=2.2.10=he6710b0_2
+  - ffmpeg=4.3=hf484d3e_0
+  - freetype=2.10.4=h5ab3b9f_0
+  - gmp=6.2.1=h2531618_2
+  - gnutls=3.6.15=he1e5248_0
+  - imageio=2.9.0=pyhd3eb1b0_0
+  - intel-openmp=2021.2.0=h06a4308_610
+  - jpeg=9b=h024ee3a_2
+  - kiwisolver=1.3.1=py37h2531618_0
+  - krb5=1.18.2=h173b8e3_0
+  - lame=3.100=h7b6447c_0
+  - lcms2=2.12=h3be6417_0
+  - ld_impl_linux-64=2.35.1=h7274673_9
+  - libcurl=7.71.1=h20c2e04_1
+  - libedit=3.1.20191231=h14c3975_1
+  - libffi=3.3=he6710b0_2
+  - libgcc-ng=9.3.0=h5101ec6_17
+  - libgfortran-ng=7.5.0=ha8ba4b0_17
+  - libgfortran4=7.5.0=ha8ba4b0_17
+  - libgomp=9.3.0=h5101ec6_17
+  - libiconv=1.15=h63c8f33_5
+  - libidn2=2.3.1=h27cfd23_0
+  - libpng=1.6.37=hbc83047_0
+  - libssh2=1.9.0=h1ba5d50_1
+  - libstdcxx-ng=9.3.0=hd4cf53a_17
+  - libtasn1=4.16.0=h27cfd23_0
+  - libtiff=4.2.0=h85742a9_0
+  - libunistring=0.9.10=h27cfd23_0
+  - libuv=1.40.0=h7b6447c_0
+  - libwebp-base=1.2.0=h27cfd23_0
+  - locket=0.2.1=py37h06a4308_1
+  - lz4-c=1.9.3=h2531618_0
+  - matplotlib-base=3.3.4=py37h62a2d02_0
+  - mkl=2021.2.0=h06a4308_296
+  - mkl-service=2.3.0=py37h27cfd23_1
+  - mkl_fft=1.3.0=py37h42c9631_2
+  - mkl_random=1.2.1=py37ha9443f7_2
+  - ncurses=6.2=he6710b0_1
+  - nettle=3.7.3=hbbd107a_1
+  - networkx=2.2=py37_1
+  - ninja=1.10.2=hff7bd54_1
+  - numpy=1.20.2=py37h2d18471_0
+  - numpy-base=1.20.2=py37hfae3a4d_0
+  - olefile=0.46=py37_0
+  - openh264=2.1.0=hd408876_0
+  - openssl=1.1.1k=h27cfd23_0
+  - partd=1.2.0=pyhd3eb1b0_0
+  - pillow=8.2.0=py37he98fc37_0
+  - pip=21.1.3=py37h06a4308_0
+  - pyparsing=2.4.7=pyhd3eb1b0_0
+  - python=3.7.10=h12debd9_4
+  - python-dateutil=2.8.1=pyhd3eb1b0_0
+  - pytorch=1.9.0=py3.7_cuda10.2_cudnn7.6.5_0
+  - pywavelets=1.1.1=py37h7b6447c_2
+  - pyyaml=5.4.1=py37h27cfd23_1
+  - readline=8.1=h27cfd23_0
+  - rhash=1.4.0=h1ba5d50_0
+  - scikit-image=0.18.1=py37ha9443f7_0
+  - scipy=1.6.2=py37had2a1c9_1
+  - setuptools=52.0.0=py37h06a4308_0
+  - six=1.16.0=pyhd3eb1b0_0
+  - sqlite=3.36.0=hc218d9a_0
+  - tifffile=2020.10.1=py37hdd07704_2
+  - tk=8.6.10=hbc83047_0
+  - toolz=0.11.1=pyhd3eb1b0_0
+  - torchvision=0.10.0=py37_cu102
+  - tornado=6.1=py37h27cfd23_0
+  - typing_extensions=3.10.0.0=pyh06a4308_0
+  - wheel=0.36.2=pyhd3eb1b0_0
+  - xz=5.2.5=h7b6447c_0
+  - yaml=0.2.5=h7b6447c_0
+  - zlib=1.2.11=h7b6447c_3
+  - zstd=1.4.5=h9ceee32_0
+  - pip:
+    - absl-py==0.13.0
+    - aiohttp==3.7.4.post0
+    - async-timeout==3.0.1
+    - attrs==21.2.0
+    - cachetools==4.2.2
+    - cffi==1.14.5
+    - chardet==4.0.0
+    - coloredlogs==15.0.1
+    - cssutils==2.3.0
+    - diffvg==0.0.1
+    - easydict==1.9
+    - einops==0.3.0
+    - fsspec==2021.6.1
+    - future==0.18.2
+    - google-auth==1.32.1
+    - google-auth-oauthlib==0.4.4
+    - greenlet==1.1.0
+    - grpcio==1.38.1
+    - humanfriendly==9.2
+    - idna==2.10
+    - imageio-ffmpeg==0.4.4
+    - importlib-metadata==4.6.0
+    - jinja2==3.0.1
+    - jsonpatch==1.32
+    - jsonpointer==2.1
+    - kornia==0.1.4
+    - llvmlite==0.36.0
+    - markdown==3.3.4
+    - markupsafe==2.0.1
+    - multidict==5.1.0
+    - numba==0.53.1
+    - oauthlib==3.1.1
+    - opencv-python==4.5.3.56
+    - packaging==20.9
+    - pandas==1.3.0
+    - protobuf==3.17.3
+    - pyaml==20.4.0
+    - pyasn1==0.4.8
+    - pyasn1-modules==0.2.8
+    - pybind11==2.6.2
+    - pycparser==2.20
+    - pydeprecate==0.3.0
+    - pypng==0.0.20
+    - pytorch-lightning==1.3.8
+    - pytorch-ranger==0.1.1
+    - pytz==2021.1
+    - pyzmq==22.1.0
+    - requests==2.25.1
+    - requests-oauthlib==1.3.0
+    - rsa==4.7.2
+    - scikit-fmm==2021.10.29
+    - seaborn==0.11.1
+    - sqlalchemy==1.4.20
+    - svgpathtools==1.4.1
+    - svgwrite==1.4.1
+    - tensorboard==2.4.1
+    - tensorboard-plugin-wit==1.8.0
+    - torch-optimizer==0.0.1a15
+    - torch-tools==0.1.5
+    - torchfile==0.1.0
+    - torchmetrics==0.4.0
+    - tqdm==4.61.1
+    - urllib3==1.26.6
+    - visdom==0.1.8.9
+    - websocket-client==1.1.0
+    - werkzeug==2.0.1
+    - yarl==1.6.3
+    - zipp==3.4.1
+prefix: /home/UserName/.conda/envs/live

LIVE/example.png ADDED Viewed

LIVE/system_info.txt ADDED Viewed

	@@ -0,0 +1 @@

+ {'sys.platform': 'linux', 'Python': '3.7.10 (default, Jun 4 2021, 14:48:32) [GCC 7.5.0]', 'CUDA available': True, 'GPU 0': 'Tesla V100-SXM2-32GB', 'GCC': 'gcc (GCC) 8.1.0', 'PyTorch': '1.9.0', 'PyTorch compiling details': 'PyTorch built with:\n - GCC 7.3\n - C++ Version: 201402\n - Intel(R) oneAPI Math Kernel Library Version 2021.2-Product Build 20210312 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v2.1.2 (Git Hash 98be7e8afa711dc9b66c8ff3504129cb82013cdb)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 10.2\n - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_37,code=compute_37\n - CuDNN 7.6.5\n - Magma 2.5.2\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=10.2, CUDNN_VERSION=7.6.5, CXX_COMPILER=/opt/rh/devtoolset-7/root/usr/bin/c++, CXX_FLAGS= -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.9.0, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, \n', 'TorchVision': '0.10.0'}

LIVE/user_study_state.csv ADDED Viewed

	@@ -0,0 +1,148 @@

+Page 1,,
+,,
+"1. Please carefully select the method that best rebuilds the original image ""progressively""", showing a human-like interpretation.,
+Option,Percentage%,Count
+DiffVG,20.00%,4
+Painting,25.00%,5
+LIVE,55.00%,11
+Total,,20
+,,
+2. Same question,,
+Option,Percentage%,Count
+DiffVG,25.00%,5
+Painting,15.00%,3
+LIVE,60.00%,12
+Total,,20
+,,
+3. Same question,,
+Option,Percentage%,Count
+DiffVG,10.00%,2
+Painting,10.00%,2
+LIVE,80.00%,16
+Total,,20
+,,
+4. Same question,,
+Option,Percentage%,Count
+DiffVG,40.00%,8
+Painting,0.00%,0
+LIVE,60.00%,12
+Total,,20
+,,
+5. Same question,,
+Option,Percentage%,Count
+DiffVG,20.00%,4
+Painting,5.00%,1
+LIVE,75.00%,15
+Total,,20
+,,
+6. Same Question,,
+Option,Percentage%,Count
+DiffVG,20.00%,4
+Painting,15.00%,3
+LIVE,65.00%,13
+Total,,20
+,,
+7. Same question,,
+Option,Percentage%,Count
+DiffVG,5.00%,1
+Painting,10.00%,2
+LIVE,85.00%,17
+Total,,20
+,,
+8. Same question,,
+Option,Percentage%,Count
+DiffVG,25.00%,5
+Painting,10.00%,2
+LIVE,65.00%,13
+Total,,20
+,,
+9. Same question,,
+Option,Percentage%,Count
+DiffVG,15.00%,3
+Painting,5.00%,1
+LIVE,80.00%,16
+Total,,20
+,,
+10. Same question,,
+Option,Percentage%,Count
+DiffVG,25.00%,5
+Painting,5.00%,1
+LIVE,70.00%,14
+Total,,20
+,,
+11. Same question,,
+Option,Percentage%,Count
+DiffVG,10.00%,2
+Painting,15.00%,3
+LIVE,75.00%,15
+Total,,20
+,,
+12. Same question,,
+Option,Percentage%,Count
+DiffVG,15.00%,3
+Painting,10.00%,2
+LIVE,75.00%,15
+Total,,20
+,,
+13. Same question,,
+Option,Percentage%,Count
+DiffVG,25.00%,5
+Painting,15.00%,3
+LIVE,60.00%,12
+Total,,20
+,,
+14. Same question,,
+Option,Percentage%,Count
+DiffVG,5.00%,1
+Painting,15.00%,3
+LIVE,80.00%,16
+Total,,20
+,,
+15. Same question,,
+Option,Percentage%,Count
+DiffVG,40.00%,8
+Painting,5.00%,1
+LIVE,55.00%,11
+Total,,20
+,,
+16. Same question,,
+Option,Percentage%,Count
+DiffVG,0.00%,0
+Painting,15.00%,3
+LIVE,85.00%,17
+Total,,20
+,,
+17. Same question,,
+Option,Percentage%,Count
+DiffVG,0.00%,0
+Painting,15.00%,3
+LIVE,85.00%,17
+Total,,20
+,,
+18. Same question,,
+Option,Percentage%,Count
+DiffVG,0.00%,0
+Painting,15.00%,3
+LIVE,85.00%,17
+Total,,20
+,,
+19. Same question,,
+Option,Percentage%,Count
+DiffVG,0.00%,0
+Painting,15.00%,3
+LIVE,85.00%,17
+Total,,20
+,,
+20. Same question,,
+Option,Percentage%,Count
+DiffVG,0.00%,0
+Painting,15.00%,3
+LIVE,85.00%,17
+Total,,20
+,,
+21. Same question,,
+Option,Percentage%,Count
+DiffVG,0.00%,0
+Painting,15.00%,3
+LIVE,85.00%,17
+Total,,20

README.md CHANGED Viewed

@@ -1,13 +1,14 @@
 ---
 title: LIVE
-emoji: 📈
-colorFrom: purple
-colorTo: pink
 sdk: gradio
-sdk_version: 3.0.13
 app_file: app.py
 pinned: false
-license: mit
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: LIVE
+emoji: 📊
+colorFrom: pink
+colorTo: indigo
 sdk: gradio
+sdk_version: 2.9.1
 app_file: app.py
 pinned: false
+license: gpl-3.0
 ---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference

__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ __author__ = "Xu Ma"
2	+ __email__ = "ma.xu1@northeastern.edu"

aabb.h ADDED Viewed

	@@ -0,0 +1,67 @@

+#pragma once
+#include "diffvg.h"
+#include "cuda_utils.h"
+#include "vector.h"
+#include "matrix.h"
+struct AABB {
+    DEVICE
+    inline AABB(const Vector2f &p_min = Vector2f{infinity<float>(), infinity<float>()},
+                const Vector2f &p_max = Vector2f{-infinity<float>(), -infinity<float>()})
+        : p_min(p_min), p_max(p_max) {}
+    Vector2f p_min, p_max;
+};
+DEVICE
+inline
+AABB merge(const AABB &box, const Vector2f &p) {
+    return AABB{Vector2f{min(p.x, box.p_min.x), min(p.y, box.p_min.y)},
+                Vector2f{max(p.x, box.p_max.x), max(p.y, box.p_max.y)}};
+}
+DEVICE
+inline
+AABB merge(const AABB &box0, const AABB &box1) {
+    return AABB{Vector2f{min(box0.p_min.x, box1.p_min.x), min(box0.p_min.y, box1.p_min.y)},
+                Vector2f{max(box0.p_max.x, box1.p_max.x), max(box0.p_max.y, box1.p_max.y)}};
+}
+DEVICE
+inline
+bool inside(const AABB &box, const Vector2f &p) {
+    return p.x >= box.p_min.x && p.x <= box.p_max.x &&
+           p.y >= box.p_min.y && p.y <= box.p_max.y;
+}
+DEVICE
+inline
+bool inside(const AABB &box, const Vector2f &p, float radius) {
+    return p.x >= box.p_min.x - radius && p.x <= box.p_max.x + radius &&
+           p.y >= box.p_min.y - radius && p.y <= box.p_max.y + radius;
+}
+DEVICE
+inline
+AABB enlarge(const AABB &box, float width) {
+    return AABB{Vector2f{box.p_min.x - width, box.p_min.y - width},
+                Vector2f{box.p_max.x + width, box.p_max.y + width}};
+}
+DEVICE
+inline
+AABB transform(const Matrix3x3f &xform, const AABB &box) {
+    auto ret = AABB();
+    ret = merge(ret, xform_pt(xform, Vector2f{box.p_min.x, box.p_min.y}));
+    ret = merge(ret, xform_pt(xform, Vector2f{box.p_min.x, box.p_max.y}));
+    ret = merge(ret, xform_pt(xform, Vector2f{box.p_max.x, box.p_min.y}));
+    ret = merge(ret, xform_pt(xform, Vector2f{box.p_max.x, box.p_max.y}));
+    return ret;
+}
+DEVICE
+inline
+bool within_distance(const AABB &box, const Vector2f &pt, float r) {
+    return pt.x >= box.p_min.x - r && pt.x <= box.p_max.x + r &&
+           pt.y >= box.p_min.y - r && pt.y <= box.p_max.y + r;
+}

app.py ADDED Viewed

	@@ -0,0 +1,375 @@

+import os
+os.system('python setup.py install --user')
+import argparse
+import csv
+import numpy as np
+import sys
+sys.path.append("/home/user/.local/lib/python3.8/site-packages/diffvg-0.0.1-py3.8-linux-x86_64.egg")
+print(sys.path)
+from pathlib import Path
+import gradio as gr
+import torch
+import yaml
+from PIL import Image
+from subprocess import call
+import torch
+import cv2
+import matplotlib.pyplot as plt
+import random
+import argparse
+import math
+import errno
+from tqdm import tqdm
+import yaml
+from easydict import EasyDict as edict
+def run_cmd(command):
+    try:
+        print(command)
+        call(command, shell=True)
+    except KeyboardInterrupt:
+        print("Process interrupted")
+        sys.exit(1)
+# run_cmd("gcc --version")
+# run_cmd("pwd")
+# run_cmd("ls")
+# run_cmd("git submodule update --init --recursive")
+# run_cmd("python setup.py install --user")
+# run_cmd("pip3 list")
+# import pydiffvg
+#
+# print("Sccuessfuly import diffvg ")
+# run_cmd("pwd")
+# run_cmd("ls")
+# run_cmd("git submodule update --init --recursive")
+# run_cmd("python setup.py install --user")
+# run_cmd("python main.py --config config/base.yaml --experiment experiment_5x1 --signature smile --target figures/smile.png --log_dir log/")
+from main import main_func
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--debug', action='store_true', default=False)
+    parser.add_argument("--config", default="config/base.yaml", type=str)
+    parser.add_argument("--experiment", type=str)
+    parser.add_argument("--seed", type=int)
+    parser.add_argument("--target", type=str, help="target image path")
+    parser.add_argument('--log_dir', metavar='DIR', default="log/")
+    parser.add_argument('--initial', type=str, default="random", choices=['random', 'circle'])
+    parser.add_argument('--signature', default="demo", nargs='+', type=str)
+    parser.add_argument('--seginit', nargs='+', type=str)
+    parser.add_argument("--num_segments", type=int, default=4)
+    # parser.add_argument("--num_paths", type=str, default="1,1,1")
+    # parser.add_argument("--num_iter", type=int, default=500)
+    # parser.add_argument('--free', action='store_true')
+    # Please ensure that image resolution is divisible by pool_size; otherwise the performance would drop a lot.
+    # parser.add_argument('--pool_size', type=int, default=40, help="the pooled image size for next path initialization")
+    # parser.add_argument('--save_loss', action='store_true')
+    # parser.add_argument('--save_init', action='store_true')
+    # parser.add_argument('--save_image', action='store_true')
+    # parser.add_argument('--save_video', action='store_true')
+    # parser.add_argument('--print_weight', action='store_true')
+    # parser.add_argument('--circle_init_radius',  type=float)
+    cfg = edict()
+    args = parser.parse_args()
+    cfg.debug = args.debug
+    cfg.config = args.config
+    cfg.experiment = args.experiment
+    cfg.seed = args.seed
+    cfg.target = args.target
+    cfg.log_dir = args.log_dir
+    cfg.initial = args.initial
+    cfg.signature = args.signature
+    # set cfg num_segments in command
+    cfg.num_segments = args.num_segments
+    if args.seginit is not None:
+        cfg.seginit = edict()
+        cfg.seginit.type = args.seginit[0]
+        if cfg.seginit.type == 'circle':
+            cfg.seginit.radius = float(args.seginit[1])
+    return cfg
+def app_experiment_change(experiment_id):
+    if experiment_id == "add [1] total 1 path for demonstration":
+        return "experiment_1x1"
+    if experiment_id == "add [1, 1, 1, 1, 1] total 5 paths one by one":
+        return "experiment_5x1"
+    elif experiment_id == "add [1, 1, 1, 1, 1, 1, 1, 1] total 8 paths one by one":
+        return "experiment_8x1"
+    elif experiment_id == "add [1,2,4,8,16,32, ...] total 128 paths":
+        return "experiment_exp2_128"
+    elif experiment_id == "add [1,2,4,8,16,32, ...] total 256 paths":
+        return "experiment_exp2_256"
+cfg_arg = parse_args()
+temp_image = np.random.rand(224,224,3)
+temp_text = "start"
+temp_input = np.random.rand(224,224,3)
+def run_live(img, experiment_id, num_iter, cfg_arg=cfg_arg):
+    experiment = app_experiment_change(experiment_id)
+    cfg_arg.target = img
+    cfg_arg.experiment = experiment
+    img, text = main_func(img, experiment_id, num_iter, cfg_arg=cfg_arg)
+    return img, text
+# ROOT_PATH = sys.path[0]  # 根目录
+# # 模型路径
+# model_path = "ultralytics/yolov5"
+# # 模型名称临时变量
+# model_name_tmp = ""
+# # 设备临时变量
+# device_tmp = ""
+# # 文件后缀
+# suffix_list = [".csv", ".yaml"]
+# def parse_args(known=False):
+#     parser = argparse.ArgumentParser(description="Gradio LIVE")
+#     parser.add_argument(
+#         "--model_name", "-mn", default="yolov5s", type=str, help="model name"
+#     )
+#     parser.add_argument(
+#         "--model_cfg",
+#         "-mc",
+#         default="./model_config/model_name_p5_all.yaml",
+#         type=str,
+#         help="model config",
+#     )
+#     parser.add_argument(
+#         "--cls_name",
+#         "-cls",
+#         default="./cls_name/cls_name.yaml",
+#         type=str,
+#         help="cls name",
+#     )
+#     parser.add_argument(
+#         "--nms_conf",
+#         "-conf",
+#         default=0.5,
+#         type=float,
+#         help="model NMS confidence threshold",
+#     )
+#     parser.add_argument(
+#         "--nms_iou", "-iou", default=0.45, type=float, help="model NMS IoU threshold"
+#     )
+#
+#     parser.add_argument(
+#         "--label_dnt_show",
+#         "-lds",
+#         action="store_false",
+#         default=True,
+#         help="label show",
+#     )
+#     parser.add_argument(
+#         "--device",
+#         "-dev",
+#         default="cpu",
+#         type=str,
+#         help="cuda or cpu, hugging face only cpu",
+#     )
+#     parser.add_argument(
+#         "--inference_size", "-isz", default=640, type=int, help="model inference size"
+#     )
+#
+#     args = parser.parse_known_args()[0] if known else parser.parse_args()
+#     return args
+# #  模型加载
+# def model_loading(model_name, device):
+#
+#     # 加载本地模型
+#     model = torch.hub.load(model_path, model_name, force_reload=True, device=device)
+#
+#     return model
+# # 检测信息
+# def export_json(results, model, img_size):
+#
+#     return [
+#         [
+#             {
+#                 "id": int(i),
+#                 "class": int(result[i][5]),
+#                 "class_name": model.model.names[int(result[i][5])],
+#                 "normalized_box": {
+#                     "x0": round(result[i][:4].tolist()[0], 6),
+#                     "y0": round(result[i][:4].tolist()[1], 6),
+#                     "x1": round(result[i][:4].tolist()[2], 6),
+#                     "y1": round(result[i][:4].tolist()[3], 6),
+#                 },
+#                 "confidence": round(float(result[i][4]), 2),
+#                 "fps": round(1000 / float(results.t[1]), 2),
+#                 "width": img_size[0],
+#                 "height": img_size[1],
+#             }
+#             for i in range(len(result))
+#         ]
+#         for result in results.xyxyn
+#     ]
+# def yolo_det(img, experiment_id, device=None, model_name=None, inference_size=None, conf=None, iou=None, label_opt=None, model_cls=None):
+#
+#     global model, model_name_tmp, device_tmp
+#
+#     if model_name_tmp != model_name:
+#         # 模型判断，避免反复加载
+#         model_name_tmp = model_name
+#         model = model_loading(model_name_tmp, device)
+#     elif device_tmp != device:
+#         device_tmp = device
+#         model = model_loading(model_name_tmp, device)
+#
+#     # -----------模型调参-----------
+#     model.conf = conf  # NMS 置信度阈值
+#     model.iou = iou  # NMS IOU阈值
+#     model.max_det = 1000  # 最大检测框数
+#     model.classes = model_cls  # 模型类别
+#
+#     results = model(img, size=inference_size)  # 检测
+#     results.render(labels=label_opt)  # 渲染
+#
+#     det_img = Image.fromarray(results.imgs[0])  # 检测图片
+#
+#     det_json = export_json(results, model, img.size)[0]  # 检测信息
+#
+#     return det_img, det_json
+# def run_cmd(command):
+#     try:
+#         print(command)
+#         call(command, shell=True)
+#     except KeyboardInterrupt:
+#         print("Process interrupted")
+#         sys.exit(1)
+#
+# run_cmd("gcc --version")
+# run_cmd("pwd")
+# run_cmd("ls")
+# run_cmd("git submodule update --init --recursive")
+# run_cmd("python setup.py install --user")
+# run_cmd("ls")
+# run_cmd("python main.py --config config/base.yaml --experiment experiment_5x1 --signature smile --target figures/smile.png --log_dir log/")
+# # yaml文件解析
+# def yaml_parse(file_path):
+#     return yaml.safe_load(open(file_path, "r", encoding="utf-8").read())
+#
+#
+# # yaml csv 文件解析
+# def yaml_csv(file_path, file_tag):
+#     file_suffix = Path(file_path).suffix
+#     if file_suffix == suffix_list[0]:
+#         # 模型名称
+#         file_names = [i[0] for i in list(csv.reader(open(file_path)))]  # csv版
+#     elif file_suffix == suffix_list[1]:
+#         # 模型名称
+#         file_names = yaml_parse(file_path).get(file_tag)  # yaml版
+#     else:
+#         print(f"{file_path}格式不正确！程序退出！")
+#         sys.exit()
+#
+#     return file_names
+def main(args):
+    gr.close_all()
+    # -------------------Inputs-------------------
+    inputs_iteration = gr.inputs.Slider(
+        label="Optimization Iteration",
+        default=500, maximum=600, minimum=100, step=100)
+    inputs_img = gr.inputs.Image(type="pil", label="Input Image", shape=[160, 160])
+    experiment_id = gr.inputs.Radio(
+        choices=[
+            "add [1] total 1 path for demonstration",
+            "add [1, 1, 1, 1, 1] total 5 paths one by one",
+            "add [1, 1, 1, 1, 1, 1, 1, 1] total 8 paths one by one",
+            "add [1,2,4,8,16,32, ...] total 128 paths",
+            "add [1,2,4,8,16,32, ...] total 256 paths"], type="value", default="add [1, 1, 1, 1, 1] total 5 paths one by one", label="Path Adding Scheduler"
+    )
+    # inputs
+    inputs = [
+        inputs_img,  # input image
+        experiment_id, # path adding scheduler
+        inputs_iteration, # input iteration
+    ]
+    # outputs
+    outputs = gr.outputs.Image(type="numpy", label="Vectorized Image")
+    outputs02 = gr.outputs.File(label="Generated SVG output")
+    # title
+    title = "LIVE: Towards Layer-wise Image Vectorization"
+    # description
+    description = "<div align='center'>(CVPR 2022 Oral Presentation)</div>" \
+                  "<div align='center'>Without GPUs, LIVE will cost longer time.</div>" \
+                  "<div align='center'>For efficiency, we rescale input to 160x160 (smaller size and fewer iterations will decrease the reconstructions).</div> "
+    # examples
+    examples = [
+        [
+            "./examples/1.png",
+            "add [1] total 1 path for demonstration",
+            100,
+        ],
+        [
+            "./examples/2.png",
+            "add [1, 1, 1, 1, 1] total 5 paths one by one",
+            300,
+        ],
+        [
+            "./examples/3.jpg",
+            "add [1,2,4,8,16,32, ...] total 128 paths",
+            300,
+        ],
+        [
+            "./examples/4.png",
+            "add [1,2,4,8,16,32, ...] total 256 paths",
+            300,
+        ],
+        [
+            "./examples/5.png",
+            "add [1, 1, 1, 1, 1] total 5 paths one by one",
+            300,
+        ],
+    ]
+    # Interface
+    gr.Interface(
+        fn=run_live,
+        inputs=inputs,
+        outputs=[outputs, outputs02],
+        title=title,
+        description=description,
+        examples=examples,
+        theme="seafoam",
+        # live=True, # 实时变更输出
+        flagging_dir="log"  # 输出目录
+        # ).launch(inbrowser=True, auth=['admin', 'admin'])
+    ).launch(
+        inbrowser=True,  # 自动打开默认浏览器
+        show_tips=True,  # 自动显示gradio最新功能
+        enable_queue=True
+        # favicon_path="./icon/logo.ico",
+    )
+if __name__ == "__main__":
+    args = parse_args()
+    main(args)

atomic.cpp ADDED Viewed

	@@ -0,0 +1,27 @@

+//A hacky solution to get around the Ellipse include
+#ifdef WIN32
+#include <windows.h>
+#include <cstdint>
+float win_atomic_add(float &target, float source) {
+	union { int i; float f; } old_val;
+	union { int i; float f; } new_val;
+	do {
+		old_val.f = target;
+		new_val.f = old_val.f + (float)source;
+	} while (InterlockedCompareExchange((LONG*)&target, (LONG)new_val.i, (LONG)old_val.i) != old_val.i);
+	return old_val.f;
+}
+double win_atomic_add(double &target, double source) {
+	union { int64_t i; double f; } old_val;
+	union { int64_t i; double f; } new_val;
+	do {
+		old_val.f = target;
+		new_val.f = old_val.f + (double)source;
+	} while (InterlockedCompareExchange64((LONG64*)&target, (LONG64)new_val.i, (LONG64)old_val.i) != old_val.i);
+	return old_val.f;
+}
+#endif

atomic.h ADDED Viewed

	@@ -0,0 +1,139 @@

+#pragma once
+#include "diffvg.h"
+#include "vector.h"
+#include "matrix.h"
+// https://stackoverflow.com/questions/39274472/error-function-atomicadddouble-double-has-already-been-defined
+#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600
+#else
+static inline DEVICE double atomicAdd(double *address, double val) {
+    unsigned long long int* address_as_ull = (unsigned long long int*)address;
+    unsigned long long int old = *address_as_ull, assumed;
+    if (val == 0.0)
+        return __longlong_as_double(old);
+    do {
+        assumed = old;
+        old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val +__longlong_as_double(assumed)));
+    } while (assumed != old);
+    return __longlong_as_double(old);
+}
+#endif
+#ifndef WIN32
+    template <typename T0, typename T1>
+    DEVICE
+    inline T0 atomic_add_(T0 &target, T1 source) {
+    #ifdef __CUDA_ARCH__
+        return atomicAdd(&target, (T0)source);
+    #else
+        T0 old_val;
+        T0 new_val;
+        do {
+            old_val = target;
+            new_val = old_val + source;
+        } while (!__atomic_compare_exchange(&target, &old_val, &new_val, true,
+            std::memory_order::memory_order_seq_cst,
+            std::memory_order::memory_order_seq_cst));
+        return old_val;
+    #endif
+    }
+    DEVICE
+    inline
+    float atomic_add(float &target, float source) {
+        return atomic_add_(target, source);
+    }
+    DEVICE
+    inline
+    double atomic_add(double &target, double source) {
+        return atomic_add_(target, source);
+    }
+#else
+	float win_atomic_add(float &target, float source);
+	double win_atomic_add(double &target, double source);
+    DEVICE
+    static float atomic_add(float &target, float source) {
+    #ifdef __CUDA_ARCH__
+        return atomicAdd(&target, source);
+    #else
+		return win_atomic_add(target, source);
+    #endif
+    }
+    DEVICE
+    static double atomic_add(double &target, double source) {
+    #ifdef __CUDA_ARCH__
+        return atomicAdd(&target, (double)source);
+    #else
+		return win_atomic_add(target, source);
+    #endif
+    }
+#endif
+template <typename T0, typename T1>
+DEVICE
+inline T0 atomic_add(T0 *target, T1 source) {
+    return atomic_add(*target, (T0)source);
+}
+template <typename T0, typename T1>
+DEVICE
+inline TVector2<T0> atomic_add(TVector2<T0> &target, const TVector2<T1> &source) {
+    atomic_add(target[0], source[0]);
+    atomic_add(target[1], source[1]);
+    return target;
+}
+template <typename T0, typename T1>
+DEVICE
+inline void atomic_add(T0 *target, const TVector2<T1> &source) {
+    atomic_add(target[0], (T0)source[0]);
+    atomic_add(target[1], (T0)source[1]);
+}
+template <typename T0, typename T1>
+DEVICE
+inline TVector3<T0> atomic_add(TVector3<T0> &target, const TVector3<T1> &source) {
+    atomic_add(target[0], source[0]);
+    atomic_add(target[1], source[1]);
+    atomic_add(target[2], source[2]);
+    return target;
+}
+template <typename T0, typename T1>
+DEVICE
+inline void atomic_add(T0 *target, const TVector3<T1> &source) {
+    atomic_add(target[0], (T0)source[0]);
+    atomic_add(target[1], (T0)source[1]);
+    atomic_add(target[2], (T0)source[2]);
+}
+template <typename T0, typename T1>
+DEVICE
+inline TVector4<T0> atomic_add(TVector4<T0> &target, const TVector4<T1> &source) {
+    atomic_add(target[0], source[0]);
+    atomic_add(target[1], source[1]);
+    atomic_add(target[2], source[2]);
+    atomic_add(target[3], source[3]);
+    return target;
+}
+template <typename T0, typename T1>
+DEVICE
+inline void atomic_add(T0 *target, const TVector4<T1> &source) {
+    atomic_add(target[0], (T0)source[0]);
+    atomic_add(target[1], (T0)source[1]);
+    atomic_add(target[2], (T0)source[2]);
+    atomic_add(target[3], (T0)source[3]);
+}
+template <typename T0, typename T1>
+DEVICE
+inline void atomic_add(T0 *target, const TMatrix3x3<T1> &source) {
+    for (int i = 0; i < 3; i++) {
+        for (int j = 0; j < 3; j++) {
+            atomic_add(target[3 * i + j], (T0)source(i, j));
+        }
+    }
+}

cdf.h ADDED Viewed

	@@ -0,0 +1,29 @@

+#pragma once
+#include "diffvg.h"
+DEVICE int sample(const float *cdf, int num_entries, float u, float *updated_u = nullptr) {
+    // Binary search the cdf
+    auto lb = 0;
+    auto len = num_entries - 1 - lb;
+    while (len > 0) {
+        auto half_len = len / 2;
+        auto mid = lb + half_len;
+        assert(mid >= 0 && mid < num_entries);
+        if (u < cdf[mid]) {
+            len = half_len;
+        } else {
+            lb = mid + 1;
+            len = len - half_len - 1;
+        }
+    }
+    lb = clamp(lb, 0, num_entries - 1);
+    if (updated_u != nullptr) {
+    	if (lb > 0) {
+    		*updated_u = (u - cdf[lb - 1]) / (cdf[lb] - cdf[lb - 1]);
+    	} else {
+    		*updated_u = u / cdf[lb];
+    	}
+    }
+    return lb;
+}

cls_name/cls_name.csv ADDED Viewed

	@@ -0,0 +1,80 @@

+人
+自行车
+汽车
+摩托车
+飞机
+公交车
+火车
+卡车
+船
+红绿灯
+消防栓
+停止标志
+停车收费表
+长凳
+鸟
+猫
+狗
+马
+羊
+牛
+象
+熊
+斑马
+长颈鹿
+背包
+雨伞
+手提包
+领带
+手提箱
+飞盘
+滑雪板
+单板滑雪
+运动球
+风筝
+棒球棒
+棒球手套
+滑板
+冲浪板
+网球拍
+瓶子
+红酒杯
+杯子
+叉子
+刀
+勺
+碗
+香蕉
+苹果
+三明治
+橙子
+西兰花
+胡萝卜
+热狗
+比萨
+甜甜圈
+蛋糕
+椅子
+长椅
+盆栽
+床
+餐桌
+马桶
+电视
+笔记本电脑
+鼠标
+遥控器
+键盘
+手机
+微波炉
+烤箱
+烤面包机
+洗碗槽
+冰箱
+书
+时钟
+花瓶
+剪刀
+泰迪熊
+吹风机
+牙刷

cls_name/cls_name.yaml ADDED Viewed

	@@ -0,0 +1,7 @@

+model_cls_name: ['人', '自行车', '汽车', '摩托车', '飞机', '公交车', '火车', '卡车', '船', '红绿灯', '消防栓', '停止标志',
+                '停车收费表', '长凳', '鸟', '猫', '狗', '马', '羊', '牛', '象', '熊', '斑马', '长颈鹿', '背包', '雨伞', '手提包', '领带',
+                '手提箱', '飞盘', '滑雪板', '单板滑雪', '运动球', '风筝', '棒球棒', '棒球手套', '滑板', '冲浪板', '网球拍', '瓶子', '红酒杯',
+                '杯子', '叉子', '刀', '勺', '碗', '香蕉', '苹果', '三明治', '橙子', '西兰花', '胡萝卜', '热狗', '比萨', '甜甜圈', '蛋糕',
+                '椅子', '长椅', '盆栽', '床', '餐桌', '马桶', '电视', '笔记本电脑', '鼠标', '遥控器', '键盘', '手机', '微波炉', '烤箱',
+                '烤面包机', '洗碗槽', '冰箱', '书', '时钟', '花瓶', '剪刀', '泰迪熊', '吹风机', '牙刷'
+            ]

cmake/FindTensorFlow.cmake ADDED Viewed

	@@ -0,0 +1,34 @@

+# https://github.com/PatWie/tensorflow-cmake/blob/master/cmake/modules/FindTensorFlow.cmake
+execute_process(
+    COMMAND python -c "exec(\"try:\\n  import tensorflow as tf; print(tf.__version__); print(tf.__cxx11_abi_flag__);print(tf.sysconfig.get_include()); print(tf.sysconfig.get_lib())\\nexcept ImportError:\\n  exit(1)\")"
+    OUTPUT_VARIABLE TF_INFORMATION_STRING
+    OUTPUT_STRIP_TRAILING_WHITESPACE
+    RESULT_VARIABLE retcode)
+if("${retcode}" STREQUAL "0")
+    string(REPLACE "\n" ";" TF_INFORMATION_LIST ${TF_INFORMATION_STRING})
+    list(GET TF_INFORMATION_LIST 0 TF_DETECTED_VERSION)
+    list(GET TF_INFORMATION_LIST 1 TF_DETECTED_ABI)
+    list(GET TF_INFORMATION_LIST 2 TF_DETECTED_INCLUDE_DIR)
+    list(GET TF_INFORMATION_LIST 3 TF_DETECTED_LIBRARY_DIR)
+    if(WIN32)
+        find_library(TF_DETECTED_LIBRARY NAMES _pywrap_tensorflow_internal PATHS
+            ${TF_DETECTED_LIBRARY_DIR}/python)
+    else()
+        # For some reason my tensorflow doesn't have a .so file
+        list(APPEND CMAKE_FIND_LIBRARY_SUFFIXES .so.1)
+        list(APPEND CMAKE_FIND_LIBRARY_SUFFIXES .so.2)
+        find_library(TF_DETECTED_LIBRARY NAMES tensorflow_framework PATHS
+            ${TF_DETECTED_LIBRARY_DIR})
+    endif()
+    set(TensorFlow_VERSION ${TF_DETECTED_VERSION})
+    set(TensorFlow_ABI ${TF_DETECTED_ABI})
+    set(TensorFlow_INCLUDE_DIR ${TF_DETECTED_INCLUDE_DIR})
+    set(TensorFlow_LIBRARY ${TF_DETECTED_LIBRARY})
+    if(TensorFlow_LIBRARY AND TensorFlow_INCLUDE_DIR)
+        set(TensorFlow_FOUND TRUE)
+    else()
+        set(TensorFlow_FOUND FALSE)
+    endif()
+endif()

cmake/FindThrust.cmake ADDED Viewed

	@@ -0,0 +1,40 @@

+##=============================================================================
+##
+##  Copyright (c) Kitware, Inc.
+##  All rights reserved.
+##  See LICENSE.txt for details.
+##
+##  This software is distributed WITHOUT ANY WARRANTY; without even
+##  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+##  PURPOSE.  See the above copyright notice for more information.
+##
+##  Copyright 2012 Sandia Corporation.
+##  Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+##  the U.S. Government retains certain rights in this software.
+##
+##=============================================================================
+#
+# FindThrust
+#
+# This module finds the Thrust header files and extrats their version.  It
+# sets the following variables.
+#
+# THRUST_INCLUDE_DIR -  Include directory for thrust header files.  (All header
+#                       files will actually be in the thrust subdirectory.)
+# THRUST_VERSION -      Version of thrust in the form "major.minor.patch".
+#
+find_path(THRUST_INCLUDE_DIR
+	HINTS /usr/include/cuda
+	      /usr/local/include
+	      /usr/local/cuda/include
+	      ${CUDA_INCLUDE_DIRS}
+	      ./thrust
+	      ../thrust
+	NAMES thrust/version.h
+)
+if (THRUST_INCLUDE_DIR)
+  set(THRUST_FOUND TRUE)
+endif ()

color.cpp ADDED Viewed

	@@ -0,0 +1,25 @@

+#include "color.h"
+void LinearGradient::copy_to(ptr<float> stop_offsets,
+                             ptr<float> stop_colors) const {
+    float *o = stop_offsets.get();
+    float *c = stop_colors.get();
+    for (int i = 0; i < num_stops; i++) {
+        o[i] = this->stop_offsets[i];
+    }
+    for (int i = 0; i < 4 * num_stops; i++) {
+        c[i] = this->stop_colors[i];
+    }
+}
+void RadialGradient::copy_to(ptr<float> stop_offsets,
+                             ptr<float> stop_colors) const {
+    float *o = stop_offsets.get();
+    float *c = stop_colors.get();
+    for (int i = 0; i < num_stops; i++) {
+        o[i] = this->stop_offsets[i];
+    }
+    for (int i = 0; i < 4 * num_stops; i++) {
+        c[i] = this->stop_colors[i];
+    }
+}

color.h ADDED Viewed

	@@ -0,0 +1,63 @@

+#pragma once
+#include "diffvg.h"
+#include "vector.h"
+#include "ptr.h"
+enum class ColorType {
+    Constant,
+    LinearGradient,
+    RadialGradient
+};
+struct Constant {
+    Vector4f color;
+    ptr<void> get_ptr() {
+        return ptr<void>(this);
+    }
+};
+struct LinearGradient {
+    LinearGradient(const Vector2f &begin,
+                   const Vector2f &end,
+                   int num_stops,
+                   ptr<float> stop_offsets,
+                   ptr<float> stop_colors)
+        : begin(begin), end(end), num_stops(num_stops),
+          stop_offsets(stop_offsets.get()), stop_colors(stop_colors.get()) {}
+    ptr<void> get_ptr() {
+        return ptr<void>(this);
+    }
+    void copy_to(ptr<float> stop_offset,
+                 ptr<float> stop_colors) const;
+    Vector2f begin, end;
+    int num_stops;
+    float *stop_offsets;
+    float *stop_colors; // rgba
+};
+struct RadialGradient {
+    RadialGradient(const Vector2f &center,
+                   const Vector2f &radius,
+                   int num_stops,
+                   ptr<float> stop_offsets,
+                   ptr<float> stop_colors)
+        : center(center), radius(radius), num_stops(num_stops),
+          stop_offsets(stop_offsets.get()), stop_colors(stop_colors.get()) {}
+    ptr<void> get_ptr() {
+        return ptr<void>(this);
+    }
+    void copy_to(ptr<float> stop_offset,
+                 ptr<float> stop_colors) const;
+    Vector2f center, radius;
+    int num_stops;
+    float *stop_offsets;
+    float *stop_colors; // rgba
+};

compute_distance.h ADDED Viewed

	@@ -0,0 +1,949 @@

+#pragma once
+#include "diffvg.h"
+#include "edge_query.h"
+#include "scene.h"
+#include "shape.h"
+#include "solve.h"
+#include "vector.h"
+#include <cassert>
+struct ClosestPointPathInfo {
+    int base_point_id;
+    int point_id;
+    float t_root;
+};
+DEVICE
+inline
+bool closest_point(const Circle &circle, const Vector2f &pt,
+                   Vector2f *result) {
+    *result = circle.center + circle.radius * normalize(pt - circle.center);
+    return false;
+}
+DEVICE
+inline
+bool closest_point(const Path &path, const BVHNode *bvh_nodes, const Vector2f &pt, float max_radius,
+                   ClosestPointPathInfo *path_info,
+                   Vector2f *result) {
+    auto min_dist = max_radius;
+    auto ret_pt = Vector2f{0, 0};
+    auto found = false;
+    auto num_segments = path.num_base_points;
+    constexpr auto max_bvh_size = 128;
+    int bvh_stack[max_bvh_size];
+    auto stack_size = 0;
+    bvh_stack[stack_size++] = 2 * num_segments - 2;
+    while (stack_size > 0) {
+        const BVHNode &node = bvh_nodes[bvh_stack[--stack_size]];
+        if (node.child1 < 0) {
+            // leaf
+            auto base_point_id = node.child0;
+            auto point_id = - node.child1 - 1;
+            assert(base_point_id < num_segments);
+            assert(point_id < path.num_points);
+            auto dist = 0.f;
+            auto closest_pt = Vector2f{0, 0};
+            auto t_root = 0.f;
+            if (path.num_control_points[base_point_id] == 0) {
+                // Straight line
+                auto i0 = point_id;
+                auto i1 = (point_id + 1) % path.num_points;
+                auto p0 = Vector2f{path.points[2 * i0], path.points[2 * i0 + 1]};
+                auto p1 = Vector2f{path.points[2 * i1], path.points[2 * i1 + 1]};
+                // project pt to line
+                auto t = dot(pt - p0, p1 - p0) / dot(p1 - p0, p1 - p0);
+                if (t < 0) {
+                    dist = distance(p0, pt);
+                    closest_pt = p0;
+                    t_root = 0;
+                } else if (t > 1) {
+                    dist = distance(p1, pt);
+                    closest_pt = p1;
+                    t_root = 1;
+                } else {
+                    dist = distance(p0 + t * (p1 - p0), pt);
+                    closest_pt = p0 + t * (p1 - p0);
+                    t_root = t;
+                }
+            } else if (path.num_control_points[base_point_id] == 1) {
+                // Quadratic Bezier curve
+                auto i0 = point_id;
+                auto i1 = point_id + 1;
+                auto i2 = (point_id + 2) % path.num_points;
+                auto p0 = Vector2f{path.points[2 * i0], path.points[2 * i0 + 1]};
+                auto p1 = Vector2f{path.points[2 * i1], path.points[2 * i1 + 1]};
+                auto p2 = Vector2f{path.points[2 * i2], path.points[2 * i2 + 1]};
+                if (path.use_distance_approx) {
+                    closest_pt = quadratic_closest_pt_approx(p0, p1, p2, pt, &t_root);
+                    dist = distance(closest_pt, pt);
+                } else {
+                    auto eval = [&](float t) -> Vector2f {
+                        auto tt = 1 - t;
+                        return (tt*tt)*p0 + (2*tt*t)*p1 + (t*t)*p2;
+                    };
+                    auto pt0 = eval(0);
+                    auto pt1 = eval(1);
+                    auto dist0 = distance(pt0, pt);
+                    auto dist1 = distance(pt1, pt);
+                    {
+                        dist = dist0;
+                        closest_pt = pt0;
+                        t_root = 0;
+                    }
+                    if (dist1 < dist) {
+                        dist = dist1;
+                        closest_pt = pt1;
+                        t_root = 1;
+                    }
+                    // The curve is (1-t)^2p0 + 2(1-t)tp1 + t^2p2
+                    // = (p0-2p1+p2)t^2+(-2p0+2p1)t+p0 = q
+                    // Want to solve (q - pt) dot q' = 0
+                    // q' = (p0-2p1+p2)t + (-p0+p1)
+                    // Expanding (p0-2p1+p2)^2 t^3 +
+                    //           3(p0-2p1+p2)(-p0+p1) t^2 +
+                    //           (2(-p0+p1)^2+(p0-2p1+p2)(p0-pt))t +
+                    //           (-p0+p1)(p0-pt) = 0
+                    auto A = sum((p0-2*p1+p2)*(p0-2*p1+p2));
+                    auto B = sum(3*(p0-2*p1+p2)*(-p0+p1));
+                    auto C = sum(2*(-p0+p1)*(-p0+p1)+(p0-2*p1+p2)*(p0-pt));
+                    auto D = sum((-p0+p1)*(p0-pt));
+                    float t[3];
+                    int num_sol = solve_cubic(A, B, C, D, t);
+                    for (int j = 0; j < num_sol; j++) {
+                        if (t[j] >= 0 && t[j] <= 1) {
+                            auto p = eval(t[j]);
+                            auto distp = distance(p, pt);
+                            if (distp < dist) {
+                                dist = distp;
+                                closest_pt = p;
+                                t_root = t[j];
+                            }
+                        }
+                    }
+                }
+            } else if (path.num_control_points[base_point_id] == 2) {
+                // Cubic Bezier curve
+                auto i0 = point_id;
+                auto i1 = point_id + 1;
+                auto i2 = point_id + 2;
+                auto i3 = (point_id + 3) % path.num_points;
+                auto p0 = Vector2f{path.points[2 * i0], path.points[2 * i0 + 1]};
+                auto p1 = Vector2f{path.points[2 * i1], path.points[2 * i1 + 1]};
+                auto p2 = Vector2f{path.points[2 * i2], path.points[2 * i2 + 1]};
+                auto p3 = Vector2f{path.points[2 * i3], path.points[2 * i3 + 1]};
+                auto eval = [&](float t) -> Vector2f {
+                    auto tt = 1 - t;
+                    return (tt*tt*tt)*p0 + (3*tt*tt*t)*p1 + (3*tt*t*t)*p2 + (t*t*t)*p3;
+                };
+                auto pt0 = eval(0);
+                auto pt1 = eval(1);
+                auto dist0 = distance(pt0, pt);
+                auto dist1 = distance(pt1, pt);
+                {
+                    dist = dist0;
+                    closest_pt = pt0;
+                    t_root = 0;
+                }
+                if (dist1 < dist) {
+                    dist = dist1;
+                    closest_pt = pt1;
+                    t_root = 1;
+                }
+                // The curve is (1 - t)^3 p0 + 3 * (1 - t)^2 t p1 + 3 * (1 - t) t^2 p2 + t^3 p3
+                // = (-p0+3p1-3p2+p3) t^3 + (3p0-6p1+3p2) t^2 + (-3p0+3p1) t + p0
+                // Want to solve (q - pt) dot q' = 0
+                // q' = 3*(-p0+3p1-3p2+p3)t^2 + 2*(3p0-6p1+3p2)t + (-3p0+3p1)
+                // Expanding
+                // 3*(-p0+3p1-3p2+p3)^2 t^5
+                // 5*(-p0+3p1-3p2+p3)(3p0-6p1+3p2) t^4
+                // 4*(-p0+3p1-3p2+p3)(-3p0+3p1) + 2*(3p0-6p1+3p2)^2 t^3
+                // 3*(3p0-6p1+3p2)(-3p0+3p1) + 3*(-p0+3p1-3p2+p3)(p0-pt) t^2
+                // (-3p0+3p1)^2+2(p0-pt)(3p0-6p1+3p2) t
+                // (p0-pt)(-3p0+3p1)
+                double A = 3*sum((-p0+3*p1-3*p2+p3)*(-p0+3*p1-3*p2+p3));
+                double B = 5*sum((-p0+3*p1-3*p2+p3)*(3*p0-6*p1+3*p2));
+                double C = 4*sum((-p0+3*p1-3*p2+p3)*(-3*p0+3*p1)) + 2*sum((3*p0-6*p1+3*p2)*(3*p0-6*p1+3*p2));
+                double D = 3*(sum((3*p0-6*p1+3*p2)*(-3*p0+3*p1)) + sum((-p0+3*p1-3*p2+p3)*(p0-pt)));
+                double E = sum((-3*p0+3*p1)*(-3*p0+3*p1)) + 2*sum((p0-pt)*(3*p0-6*p1+3*p2));
+                double F = sum((p0-pt)*(-3*p0+3*p1));
+                // normalize the polynomial
+                B /= A;
+                C /= A;
+                D /= A;
+                E /= A;
+                F /= A;
+                // Isolator Polynomials:
+                // https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.133.2233&rep=rep1&type=pdf
+                //                                       x/5 + B/25
+                //                                    /-----------------------------------------------------
+                // 5x^4 + 4B x^3 + 3C x^2 + 2D x + E /   x^5 +    B x^4 +       C x^3 +      D x^2 +      E x + F
+                //                                       x^5 + 4B/5 x^4 +    3C/5 x^3 +   2D/5 x^2 +    E/5 x
+                //                                      ----------------------------------------------------
+                //                                              B/5 x^4 +    2C/5 x^3 +   3D/5 x^2 +   4E/5 x + F
+                //                                              B/5 x^4 + 4B^2/25 x^3 + 3BC/25 x^2 + 2BD/25 x + BE/25
+                //                                      ----------------------------------------------------
+                //                                     (2C/5 - 4B^2/25)x^3 + (3D/5-3BC/25)x^2 + (4E/5-2BD/25) + (F-BE/25)
+                auto p1A = ((2 / 5.f) * C - (4 / 25.f) * B * B);
+                auto p1B = ((3 / 5.f) * D - (3 / 25.f) * B * C);
+                auto p1C = ((4 / 5.f) * E - (2 / 25.f) * B * D);
+                auto p1D = F - B * E / 25.f;
+                // auto q1A = 1 / 5.f;
+                // auto q1B = B / 25.f;
+                // x/5 + B/25 = 0
+                // x = -B/5
+                auto q_root = -B/5.f;
+                double p_roots[3];
+                int num_sol = solve_cubic(p1A, p1B, p1C, p1D, p_roots);
+                float intervals[4];
+                if (q_root >= 0 && q_root <= 1) {
+                    intervals[0] = q_root;
+                }
+                for (int j = 0; j < num_sol; j++) {
+                    intervals[j + 1] = p_roots[j];
+                }
+                auto num_intervals = 1 + num_sol;
+                // sort intervals
+                for (int j = 1; j < num_intervals; j++) {
+                    for (int k = j; k > 0 && intervals[k - 1] > intervals[k]; k--) {
+                        auto tmp = intervals[k];
+                        intervals[k] = intervals[k - 1];
+                        intervals[k - 1] = tmp;
+                    }
+                }
+                auto eval_polynomial = [&] (double t) {
+                    return t*t*t*t*t+
+                           B*t*t*t*t+
+                           C*t*t*t+
+                           D*t*t+
+                           E*t+
+                           F;
+                };
+                auto eval_polynomial_deriv = [&] (double t) {
+                    return 5*t*t*t*t+
+                           4*B*t*t*t+
+                           3*C*t*t+
+                           2*D*t+
+                           E;
+                };
+                auto lower_bound = 0.f;
+                for (int j = 0; j < num_intervals + 1; j++) {
+                    if (j < num_intervals && intervals[j] < 0.f) {
+                        continue;
+                    }
+                    auto upper_bound = j < num_intervals ?
+                        min(intervals[j], 1.f) : 1.f;
+                    auto lb = lower_bound;
+                    auto ub = upper_bound;
+                    auto lb_eval = eval_polynomial(lb);
+                    auto ub_eval = eval_polynomial(ub);
+                    if (lb_eval * ub_eval > 0) {
+                        // Doesn't have root
+                        continue;
+                    }
+                    if (lb_eval > ub_eval) {
+                        swap_(lb, ub);
+                    }
+                    auto t = 0.5f * (lb + ub);
+                    auto num_iter = 20;
+                    for (int it = 0; it < num_iter; it++) {
+                        if (!(t >= lb && t <= ub)) {
+                            t = 0.5f * (lb + ub);
+                        }
+                        auto value = eval_polynomial(t);
+                        if (fabs(value) < 1e-5f || it == num_iter - 1) {
+                            break;
+                        }
+                        // The derivative may not be entirely accurate,
+                        // but the bisection is going to handle this
+                        if (value > 0.f) {
+                            ub = t;
+                        } else {
+                            lb = t;
+                        }
+                        auto derivative = eval_polynomial_deriv(t);
+                        t -= value / derivative;
+                    }
+                    auto p = eval(t);
+                    auto distp = distance(p, pt);
+                    if (distp < dist) {
+                        dist = distp;
+                        closest_pt = p;
+                        t_root = t;
+                    }
+                    if (upper_bound >= 1.f) {
+                        break;
+                    }
+                    lower_bound = upper_bound;
+                }
+            } else {
+                assert(false);
+            }
+            if (dist < min_dist) {
+                min_dist = dist;
+                ret_pt = closest_pt;
+                path_info->base_point_id = base_point_id;
+                path_info->point_id = point_id;
+                path_info->t_root = t_root;
+                found = true;
+            }
+        } else {
+            assert(node.child0 >= 0 && node.child1 >= 0);
+            const AABB &b0 = bvh_nodes[node.child0].box;
+            if (within_distance(b0, pt, min_dist)) {
+                bvh_stack[stack_size++] = node.child0;
+            }
+            const AABB &b1 = bvh_nodes[node.child1].box;
+            if (within_distance(b1, pt, min_dist)) {
+                bvh_stack[stack_size++] = node.child1;
+            }
+            assert(stack_size <= max_bvh_size);
+        }
+    }
+    if (found) {
+        assert(path_info->base_point_id < num_segments);
+    }
+    *result = ret_pt;
+    return found;
+}
+DEVICE
+inline
+bool closest_point(const Rect &rect, const Vector2f &pt,
+                   Vector2f *result) {
+    auto min_dist = 0.f;
+    auto closest_pt = Vector2f{0, 0};
+    auto update = [&](const Vector2f &p0, const Vector2f &p1, bool first) {
+        // project pt to line
+        auto t = dot(pt - p0, p1 - p0) / dot(p1 - p0, p1 - p0);
+        if (t < 0) {
+            auto d = distance(p0, pt);
+            if (first || d < min_dist) {
+                min_dist = d;
+                closest_pt = p0;
+            }
+        } else if (t > 1) {
+            auto d = distance(p1, pt);
+            if (first || d < min_dist) {
+                min_dist = d;
+                closest_pt = p1;
+            }
+        } else {
+            auto p = p0 + t * (p1 - p0);
+            auto d = distance(p, pt);
+            if (first || d < min_dist) {
+                min_dist = d;
+                closest_pt = p0;
+            }
+        }
+    };
+    auto left_top = rect.p_min;
+    auto right_top = Vector2f{rect.p_max.x, rect.p_min.y};
+    auto left_bottom = Vector2f{rect.p_min.x, rect.p_max.y};
+    auto right_bottom = rect.p_max;
+    update(left_top, left_bottom, true);
+    update(left_top, right_top, false);
+    update(right_top, right_bottom, false);
+    update(left_bottom, right_bottom, false);
+    *result = closest_pt;
+    return true;
+}
+DEVICE
+inline
+bool closest_point(const Shape &shape, const BVHNode *bvh_nodes, const Vector2f &pt, float max_radius,
+                   ClosestPointPathInfo *path_info,
+                   Vector2f *result) {
+    switch (shape.type) {
+        case ShapeType::Circle:
+            return closest_point(*(const Circle *)shape.ptr, pt, result);
+        case ShapeType::Ellipse:
+            // https://www.geometrictools.com/Documentation/DistancePointEllipseEllipsoid.pdf
+            assert(false);
+            return false;
+        case ShapeType::Path:
+            return closest_point(*(const Path *)shape.ptr, bvh_nodes, pt, max_radius, path_info, result);
+        case ShapeType::Rect:
+            return closest_point(*(const Rect *)shape.ptr, pt, result);
+    }
+    assert(false);
+    return false;
+}
+DEVICE
+inline
+bool compute_distance(const SceneData &scene,
+                      int shape_group_id,
+                      const Vector2f &pt,
+                      float max_radius,
+                      int *min_shape_id,
+                      Vector2f *closest_pt_,
+                      ClosestPointPathInfo *path_info,
+                      float *result) {
+    const ShapeGroup &shape_group = scene.shape_groups[shape_group_id];
+    // pt is in canvas space, transform it to shape's local space
+    auto local_pt = xform_pt(shape_group.canvas_to_shape, pt);
+    constexpr auto max_bvh_stack_size = 64;
+    int bvh_stack[max_bvh_stack_size];
+    auto stack_size = 0;
+    bvh_stack[stack_size++] = 2 * shape_group.num_shapes - 2;
+    const auto &bvh_nodes = scene.shape_groups_bvh_nodes[shape_group_id];
+    auto min_dist = max_radius;
+    auto found = false;
+    while (stack_size > 0) {
+        const BVHNode &node = bvh_nodes[bvh_stack[--stack_size]];
+        if (node.child1 < 0) {
+            // leaf
+            auto shape_id = node.child0;
+            const auto &shape = scene.shapes[shape_id];
+            ClosestPointPathInfo local_path_info{-1, -1};
+            auto local_closest_pt = Vector2f{0, 0};
+            if (closest_point(shape, scene.path_bvhs[shape_id], local_pt, max_radius, &local_path_info, &local_closest_pt)) {
+                auto closest_pt = xform_pt(shape_group.shape_to_canvas, local_closest_pt);
+                auto dist = distance(closest_pt, pt);
+                if (!found || dist < min_dist) {
+                    found = true;
+                    min_dist = dist;
+                    if (min_shape_id != nullptr) {
+                        *min_shape_id = shape_id;
+                    }
+                    if (closest_pt_ != nullptr) {
+                        *closest_pt_ = closest_pt;
+                    }
+                    if (path_info != nullptr) {
+                        *path_info = local_path_info;
+                    }
+                }
+            }
+        } else {
+            assert(node.child0 >= 0 && node.child1 >= 0);
+            const AABB &b0 = bvh_nodes[node.child0].box;
+            if (inside(b0, local_pt, max_radius)) {
+                bvh_stack[stack_size++] = node.child0;
+            }
+            const AABB &b1 = bvh_nodes[node.child1].box;
+            if (inside(b1, local_pt, max_radius)) {
+                bvh_stack[stack_size++] = node.child1;
+            }
+            assert(stack_size <= max_bvh_stack_size);
+        }
+    }
+    *result = min_dist;
+    return found;
+}
+DEVICE
+inline
+void d_closest_point(const Circle &circle,
+                     const Vector2f &pt,
+                     const Vector2f &d_closest_pt,
+                     Circle &d_circle,
+                     Vector2f &d_pt) {
+    // return circle.center + circle.radius * normalize(pt - circle.center);
+    auto d_center = d_closest_pt *
+        (1 + d_normalize(pt - circle.center, circle.radius * d_closest_pt));
+    atomic_add(&d_circle.center.x, d_center);
+    atomic_add(&d_circle.radius, dot(d_closest_pt, normalize(pt - circle.center)));
+}
+DEVICE
+inline
+void d_closest_point(const Path &path,
+                     const Vector2f &pt,
+                     const Vector2f &d_closest_pt,
+                     const ClosestPointPathInfo &path_info,
+                     Path &d_path,
+                     Vector2f &d_pt) {
+    auto base_point_id = path_info.base_point_id;
+    auto point_id = path_info.point_id;
+    auto min_t_root = path_info.t_root;
+    if (path.num_control_points[base_point_id] == 0) {
+        // Straight line
+        auto i0 = point_id;
+        auto i1 = (point_id + 1) % path.num_points;
+        auto p0 = Vector2f{path.points[2 * i0], path.points[2 * i0 + 1]};
+        auto p1 = Vector2f{path.points[2 * i1], path.points[2 * i1 + 1]};
+        // project pt to line
+        auto t = dot(pt - p0, p1 - p0) / dot(p1 - p0, p1 - p0);
+        auto d_p0 = Vector2f{0, 0};
+        auto d_p1 = Vector2f{0, 0};
+        if (t < 0) {
+            d_p0 += d_closest_pt;
+        } else if (t > 1) {
+            d_p1 += d_closest_pt;
+        } else {
+            auto d_p = d_closest_pt;
+            // p = p0 + t * (p1 - p0)
+            d_p0 += d_p * (1 - t);
+            d_p1 += d_p * t;
+        }
+        atomic_add(d_path.points + 2 * i0, d_p0);
+        atomic_add(d_path.points + 2 * i1, d_p1);
+    } else if (path.num_control_points[base_point_id] == 1) {
+        // Quadratic Bezier curve
+        auto i0 = point_id;
+        auto i1 = point_id + 1;
+        auto i2 = (point_id + 2) % path.num_points;
+        auto p0 = Vector2f{path.points[2 * i0], path.points[2 * i0 + 1]};
+        auto p1 = Vector2f{path.points[2 * i1], path.points[2 * i1 + 1]};
+        auto p2 = Vector2f{path.points[2 * i2], path.points[2 * i2 + 1]};
+        // auto eval = [&](float t) -> Vector2f {
+        //     auto tt = 1 - t;
+        //     return (tt*tt)*p0 + (2*tt*t)*p1 + (t*t)*p2;
+        // };
+        // auto dist0 = distance(eval(0), pt);
+        // auto dist1 = distance(eval(1), pt);
+        auto d_p0 = Vector2f{0, 0};
+        auto d_p1 = Vector2f{0, 0};
+        auto d_p2 = Vector2f{0, 0};
+        auto t = min_t_root;
+        if (t == 0) {
+            d_p0 += d_closest_pt;
+        } else if (t == 1) {
+            d_p2 += d_closest_pt;
+        } else {
+            // The curve is (1-t)^2p0 + 2(1-t)tp1 + t^2p2
+            // = (p0-2p1+p2)t^2+(-2p0+2p1)t+p0 = q
+            // Want to solve (q - pt) dot q' = 0
+            // q' = (p0-2p1+p2)t + (-p0+p1)
+            // Expanding (p0-2p1+p2)^2 t^3 +
+            //           3(p0-2p1+p2)(-p0+p1) t^2 +
+            //           (2(-p0+p1)^2+(p0-2p1+p2)(p0-pt))t +
+            //           (-p0+p1)(p0-pt) = 0
+            auto A = sum((p0-2*p1+p2)*(p0-2*p1+p2));
+            auto B = sum(3*(p0-2*p1+p2)*(-p0+p1));
+            auto C = sum(2*(-p0+p1)*(-p0+p1)+(p0-2*p1+p2)*(p0-pt));
+            // auto D = sum((-p0+p1)*(p0-pt));
+            auto d_p = d_closest_pt;
+            // p = eval(t)
+            auto tt = 1 - t;
+            // (tt*tt)*p0 + (2*tt*t)*p1 + (t*t)*p2
+            auto d_tt = 2 * tt * dot(d_p, p0) + 2 * t * dot(d_p, p1);
+            auto d_t = -d_tt + 2 * tt * dot(d_p, p1) + 2 * t * dot(d_p, p2);
+            auto d_p0 = d_p * tt * tt;
+            auto d_p1 = 2 * d_p * tt * t;
+            auto d_p2 = d_p * t * t;
+            // implicit function theorem: dt/dA = -1/(p'(t)) * dp/dA
+            auto poly_deriv_t = 3 * A * t * t + 2 * B * t + C;
+            if (fabs(poly_deriv_t) > 1e-6f) {
+                auto d_A = - (d_t / poly_deriv_t) * t * t * t;
+                auto d_B = - (d_t / poly_deriv_t) * t * t;
+                auto d_C = - (d_t / poly_deriv_t) * t;
+                auto d_D = - (d_t / poly_deriv_t);
+                // A = sum((p0-2*p1+p2)*(p0-2*p1+p2))
+                // B = sum(3*(p0-2*p1+p2)*(-p0+p1))
+                // C = sum(2*(-p0+p1)*(-p0+p1)+(p0-2*p1+p2)*(p0-pt))
+                // D = sum((-p0+p1)*(p0-pt))
+                d_p0 += 2*d_A*(p0-2*p1+p2)+
+                        3*d_B*((-p0+p1)-(p0-2*p1+p2))+
+                        2*d_C*(-2*(-p0+p1))+
+                          d_C*((p0-pt)+(p0-2*p1+p2))+
+                        2*d_D*(-(p0-pt)+(-p0+p1));
+                d_p1 += (-2)*2*d_A*(p0-2*p1+p2)+
+                        3*d_B*(-2*(-p0+p1)+(p0-2*p1+p2))+
+                        2*d_C*(2*(-p0+p1))+
+                          d_C*((-2)*(p0-pt))+
+                        d_D*(p0-pt);
+                d_p2 += 2*d_A*(p0-2*p1+p2)+
+                        3*d_B*(-p0+p1)+
+                        d_C*(p0-pt);
+                d_pt += d_C*(-(p0-2*p1+p2))+
+                        d_D*(-(-p0+p1));
+            }
+        }
+        atomic_add(d_path.points + 2 * i0, d_p0);
+        atomic_add(d_path.points + 2 * i1, d_p1);
+        atomic_add(d_path.points + 2 * i2, d_p2);
+    } else if (path.num_control_points[base_point_id] == 2) {
+        // Cubic Bezier curve
+        auto i0 = point_id;
+        auto i1 = point_id + 1;
+        auto i2 = point_id + 2;
+        auto i3 = (point_id + 3) % path.num_points;
+        auto p0 = Vector2f{path.points[2 * i0], path.points[2 * i0 + 1]};
+        auto p1 = Vector2f{path.points[2 * i1], path.points[2 * i1 + 1]};
+        auto p2 = Vector2f{path.points[2 * i2], path.points[2 * i2 + 1]};
+        auto p3 = Vector2f{path.points[2 * i3], path.points[2 * i3 + 1]};
+        // auto eval = [&](float t) -> Vector2f {
+        //     auto tt = 1 - t;
+        //     return (tt*tt*tt)*p0 + (3*tt*tt*t)*p1 + (3*tt*t*t)*p2 + (t*t*t)*p3;
+        // };
+        auto d_p0 = Vector2f{0, 0};
+        auto d_p1 = Vector2f{0, 0};
+        auto d_p2 = Vector2f{0, 0};
+        auto d_p3 = Vector2f{0, 0};
+        auto t = min_t_root;
+        if (t == 0) {
+            // closest_pt = p0
+            d_p0 += d_closest_pt;
+        } else if (t == 1) {
+            // closest_pt = p1
+            d_p3 += d_closest_pt;
+        } else {
+            // The curve is (1 - t)^3 p0 + 3 * (1 - t)^2 t p1 + 3 * (1 - t) t^2 p2 + t^3 p3
+            // = (-p0+3p1-3p2+p3) t^3 + (3p0-6p1+3p2) t^2 + (-3p0+3p1) t + p0
+            // Want to solve (q - pt) dot q' = 0
+            // q' = 3*(-p0+3p1-3p2+p3)t^2 + 2*(3p0-6p1+3p2)t + (-3p0+3p1)
+            // Expanding
+            // 3*(-p0+3p1-3p2+p3)^2 t^5
+            // 5*(-p0+3p1-3p2+p3)(3p0-6p1+3p2) t^4
+            // 4*(-p0+3p1-3p2+p3)(-3p0+3p1) + 2*(3p0-6p1+3p2)^2 t^3
+            // 3*(3p0-6p1+3p2)(-3p0+3p1) + 3*(-p0+3p1-3p2+p3)(p0-pt) t^2
+            // (-3p0+3p1)^2+2(p0-pt)(3p0-6p1+3p2) t
+            // (p0-pt)(-3p0+3p1)
+            double A = 3*sum((-p0+3*p1-3*p2+p3)*(-p0+3*p1-3*p2+p3));
+            double B = 5*sum((-p0+3*p1-3*p2+p3)*(3*p0-6*p1+3*p2));
+            double C = 4*sum((-p0+3*p1-3*p2+p3)*(-3*p0+3*p1)) + 2*sum((3*p0-6*p1+3*p2)*(3*p0-6*p1+3*p2));
+            double D = 3*(sum((3*p0-6*p1+3*p2)*(-3*p0+3*p1)) + sum((-p0+3*p1-3*p2+p3)*(p0-pt)));
+            double E = sum((-3*p0+3*p1)*(-3*p0+3*p1)) + 2*sum((p0-pt)*(3*p0-6*p1+3*p2));
+            double F = sum((p0-pt)*(-3*p0+3*p1));
+            B /= A;
+            C /= A;
+            D /= A;
+            E /= A;
+            F /= A;
+            // auto eval_polynomial = [&] (double t) {
+            //     return t*t*t*t*t+
+            //            B*t*t*t*t+
+            //            C*t*t*t+
+            //            D*t*t+
+            //            E*t+
+            //            F;
+            // };
+            auto eval_polynomial_deriv = [&] (double t) {
+                return 5*t*t*t*t+
+                       4*B*t*t*t+
+                       3*C*t*t+
+                       2*D*t+
+                       E;
+            };
+            // auto p = eval(t);
+            auto d_p = d_closest_pt;
+            // (tt*tt*tt)*p0 + (3*tt*tt*t)*p1 + (3*tt*t*t)*p2 + (t*t*t)*p3
+            auto tt = 1 - t;
+            auto d_tt = 3 * tt * tt * dot(d_p, p0) +
+                        6 * tt * t * dot(d_p, p1) +
+                        3 * t * t * dot(d_p, p2);
+            auto d_t = -d_tt +
+                       3 * tt * tt * dot(d_p, p1) +
+                       6 * tt * t * dot(d_p, p2) +
+                       3 * t * t * dot(d_p, p3);
+            d_p0 += d_p * (tt * tt * tt);
+            d_p1 += d_p * (3 * tt * tt * t);
+            d_p2 += d_p * (3 * tt * t * t);
+            d_p3 += d_p * (t * t * t);
+            // implicit function theorem: dt/dA = -1/(p'(t)) * dp/dA
+            auto poly_deriv_t = eval_polynomial_deriv(t);
+            if (fabs(poly_deriv_t) > 1e-10f) {
+                auto d_B = -(d_t / poly_deriv_t) * t * t * t * t;
+                auto d_C = -(d_t / poly_deriv_t) * t * t * t;
+                auto d_D = -(d_t / poly_deriv_t) * t * t;
+                auto d_E = -(d_t / poly_deriv_t) * t;
+                auto d_F = -(d_t / poly_deriv_t);
+                // B = B' / A
+                // C = C' / A
+                // D = D' / A
+                // E = E' / A
+                // F = F' / A
+                auto d_A = -d_B * B / A
+                           -d_C * C / A
+                           -d_D * D / A
+                           -d_E * E / A
+                           -d_F * F / A;
+                d_B /= A;
+                d_C /= A;
+                d_D /= A;
+                d_E /= A;
+                d_F /= A;
+                {
+                    double A = 3*sum((-p0+3*p1-3*p2+p3)*(-p0+3*p1-3*p2+p3)) + 1e-3;
+                    double B = 5*sum((-p0+3*p1-3*p2+p3)*(3*p0-6*p1+3*p2));
+                    double C = 4*sum((-p0+3*p1-3*p2+p3)*(-3*p0+3*p1)) + 2*sum((3*p0-6*p1+3*p2)*(3*p0-6*p1+3*p2));
+                    double D = 3*(sum((3*p0-6*p1+3*p2)*(-3*p0+3*p1)) + sum((-p0+3*p1-3*p2+p3)*(p0-pt)));
+                    double E = sum((-3*p0+3*p1)*(-3*p0+3*p1)) + 2*sum((p0-pt)*(3*p0-6*p1+3*p2));
+                    double F = sum((p0-pt)*(-3*p0+3*p1));
+                    B /= A;
+                    C /= A;
+                    D /= A;
+                    E /= A;
+                    F /= A;
+                    auto eval_polynomial = [&] (double t) {
+                        return t*t*t*t*t+
+                               B*t*t*t*t+
+                               C*t*t*t+
+                               D*t*t+
+                               E*t+
+                               F;
+                    };
+                    auto eval_polynomial_deriv = [&] (double t) {
+                        return 5*t*t*t*t+
+                               4*B*t*t*t+
+                               3*C*t*t+
+                               2*D*t+
+                               E;
+                    };
+                    auto lb = t - 1e-2f;
+                    auto ub = t + 1e-2f;
+                    auto lb_eval = eval_polynomial(lb);
+                    auto ub_eval = eval_polynomial(ub);
+                    if (lb_eval > ub_eval) {
+                        swap_(lb, ub);
+                    }
+                    auto t_ = 0.5f * (lb + ub);
+                    auto num_iter = 20;
+                    for (int it = 0; it < num_iter; it++) {
+                        if (!(t_ >= lb && t_ <= ub)) {
+                            t_ = 0.5f * (lb + ub);
+                        }
+                        auto value = eval_polynomial(t_);
+                        if (fabs(value) < 1e-5f || it == num_iter - 1) {
+                            break;
+                        }
+                        // The derivative may not be entirely accurate,
+                        // but the bisection is going to handle this
+                        if (value > 0.f) {
+                            ub = t_;
+                        } else {
+                            lb = t_;
+                        }
+                        auto derivative = eval_polynomial_deriv(t);
+                        t_ -= value / derivative;
+                    }
+                }
+                // A = 3*sum((-p0+3*p1-3*p2+p3)*(-p0+3*p1-3*p2+p3))
+                d_p0 += d_A * 3 * (-1) * 2 * (-p0+3*p1-3*p2+p3);
+                d_p1 += d_A * 3 *   3  * 2 * (-p0+3*p1-3*p2+p3);
+                d_p2 += d_A * 3 * (-3) * 2 * (-p0+3*p1-3*p2+p3);
+                d_p3 += d_A * 3 *   1  * 2 * (-p0+3*p1-3*p2+p3);
+                // B = 5*sum((-p0+3*p1-3*p2+p3)*(3*p0-6*p1+3*p2))
+                d_p0 += d_B * 5 * ((-1) * (3*p0-6*p1+3*p2) + 3 * (-p0+3*p1-3*p2+p3));
+                d_p1 += d_B * 5 * (3 * (3*p0-6*p1+3*p2) + (-6) * (-p0+3*p1-3*p2+p3));
+                d_p2 += d_B * 5 * ((-3) * (3*p0-6*p1+3*p2) + 3 * (-p0+3*p1-3*p2+p3));
+                d_p3 += d_B * 5 * (3*p0-6*p1+3*p2);
+                // C = 4*sum((-p0+3*p1-3*p2+p3)*(-3*p0+3*p1)) + 2*sum((3*p0-6*p1+3*p2)*(3*p0-6*p1+3*p2))
+                d_p0 += d_C * 4 * ((-1) * (-3*p0+3*p1) + (-3) * (-p0+3*p1-3*p2+p3)) +
+                        d_C * 2 * (3 * 2 * (3*p0-6*p1+3*p2));
+                d_p1 += d_C * 4 * (3 * (-3*p0+3*p1) + 3 * (-p0+3*p1-3*p2+p3)) +
+                        d_C * 2 * ((-6) * 2 * (3*p0-6*p1+3*p2));
+                d_p2 += d_C * 4 * ((-3) * (-3*p0+3*p1)) +
+                        d_C * 2 * (3 * 2 * (3*p0-6*p1+3*p2));
+                d_p3 += d_C * 4 * (-3*p0+3*p1);
+                // D = 3*(sum((3*p0-6*p1+3*p2)*(-3*p0+3*p1)) + sum((-p0+3*p1-3*p2+p3)*(p0-pt)))
+                d_p0 += d_D * 3 * (3 * (-3*p0+3*p1) + (-3) * (3*p0-6*p1+3*p2)) +
+                        d_D * 3 * ((-1) * (p0-pt) + 1 * (-p0+3*p1-3*p2+p3));
+                d_p1 += d_D * 3 * ((-6) * (-3*p0+3*p1) + (3) * (3*p0-6*p1+3*p2)) +
+                        d_D * 3 * (3 * (p0-pt));
+                d_p2 += d_D * 3 * (3 * (-3*p0+3*p1)) +
+                        d_D * 3 * ((-3) * (p0-pt));
+                d_pt += d_D * 3 * ((-1) * (-p0+3*p1-3*p2+p3));
+                // E = sum((-3*p0+3*p1)*(-3*p0+3*p1)) + 2*sum((p0-pt)*(3*p0-6*p1+3*p2))
+                d_p0 += d_E * ((-3) * 2 * (-3*p0+3*p1)) +
+                        d_E * 2 * (1 * (3*p0-6*p1+3*p2) + 3 * (p0-pt));
+                d_p1 += d_E * (  3  * 2 * (-3*p0+3*p1)) +
+                        d_E * 2 * ((-6) * (p0-pt));
+                d_p2 += d_E * 2 * (  3  * (p0-pt));
+                d_pt += d_E * 2 * ((-1) * (3*p0-6*p1+3*p2));
+                // F = sum((p0-pt)*(-3*p0+3*p1))
+                d_p0 += d_F * (1 * (-3*p0+3*p1)) +
+                        d_F * ((-3) * (p0-pt));
+                d_p1 += d_F * (3 * (p0-pt));
+                d_pt += d_F * ((-1) * (-3*p0+3*p1));
+            }
+        }
+        atomic_add(d_path.points + 2 * i0, d_p0);
+        atomic_add(d_path.points + 2 * i1, d_p1);
+        atomic_add(d_path.points + 2 * i2, d_p2);
+        atomic_add(d_path.points + 2 * i3, d_p3);
+    } else {
+        assert(false);
+    }
+}
+DEVICE
+inline
+void d_closest_point(const Rect &rect,
+                     const Vector2f &pt,
+                     const Vector2f &d_closest_pt,
+                     Rect &d_rect,
+                     Vector2f &d_pt) {
+    auto dist = [&](const Vector2f &p0, const Vector2f &p1) -> float {
+        // project pt to line
+        auto t = dot(pt - p0, p1 - p0) / dot(p1 - p0, p1 - p0);
+        if (t < 0) {
+            return distance(p0, pt);
+        } else if (t > 1) {
+            return distance(p1, pt);
+        } else {
+            return distance(p0 + t * (p1 - p0), pt);
+        }
+        // return 0;
+    };
+    auto left_top = rect.p_min;
+    auto right_top = Vector2f{rect.p_max.x, rect.p_min.y};
+    auto left_bottom = Vector2f{rect.p_min.x, rect.p_max.y};
+    auto right_bottom = rect.p_max;
+    auto left_dist = dist(left_top, left_bottom);
+    auto top_dist = dist(left_top, right_top);
+    auto right_dist = dist(right_top, right_bottom);
+    auto bottom_dist = dist(left_bottom, right_bottom);
+    int min_id = 0;
+    auto min_dist = left_dist;
+    if (top_dist < min_dist) { min_dist = top_dist; min_id = 1; }
+    if (right_dist < min_dist) { min_dist = right_dist; min_id = 2; }
+    if (bottom_dist < min_dist) { min_dist = bottom_dist; min_id = 3; }
+    auto d_update = [&](const Vector2f &p0, const Vector2f &p1,
+                        const Vector2f &d_closest_pt,
+                        Vector2f &d_p0, Vector2f &d_p1) {
+        // project pt to line
+        auto t = dot(pt - p0, p1 - p0) / dot(p1 - p0, p1 - p0);
+        if (t < 0) {
+            d_p0 += d_closest_pt;
+        } else if (t > 1) {
+            d_p1 += d_closest_pt;
+        } else {
+            // p = p0 + t * (p1 - p0)
+            auto d_p = d_closest_pt;
+            d_p0 += d_p * (1 - t);
+            d_p1 += d_p * t;
+            auto d_t = sum(d_p * (p1 - p0));
+            // t = dot(pt - p0, p1 - p0) / dot(p1 - p0, p1 - p0)
+            auto d_numerator = d_t / dot(p1 - p0, p1 - p0);
+            auto d_denominator = d_t * (-t) / dot(p1 - p0, p1 - p0);
+            // numerator = dot(pt - p0, p1 - p0)
+            d_pt += (p1 - p0) * d_numerator;
+            d_p1 += (pt - p0) * d_numerator;
+            d_p0 += ((p0 - p1) + (p0 - pt)) * d_numerator;
+            // denominator = dot(p1 - p0, p1 - p0)
+            d_p1 += 2 * (p1 - p0) * d_denominator;
+            d_p0 += 2 * (p0 - p1) * d_denominator;
+        }
+    };
+    auto d_left_top = Vector2f{0, 0};
+    auto d_right_top = Vector2f{0, 0};
+    auto d_left_bottom = Vector2f{0, 0};
+    auto d_right_bottom = Vector2f{0, 0};
+    if (min_id == 0) {
+        d_update(left_top, left_bottom, d_closest_pt, d_left_top, d_left_bottom);
+    } else if (min_id == 1) {
+        d_update(left_top, right_top, d_closest_pt, d_left_top, d_right_top);
+    } else if (min_id == 2) {
+        d_update(right_top, right_bottom, d_closest_pt, d_right_top, d_right_bottom);
+    } else {
+        assert(min_id == 3);
+        d_update(left_bottom, right_bottom, d_closest_pt, d_left_bottom, d_right_bottom);
+    }
+    auto d_p_min = Vector2f{0, 0};
+    auto d_p_max = Vector2f{0, 0};
+    // left_top = rect.p_min
+    // right_top = Vector2f{rect.p_max.x, rect.p_min.y}
+    // left_bottom = Vector2f{rect.p_min.x, rect.p_max.y}
+    // right_bottom = rect.p_max
+    d_p_min += d_left_top;
+    d_p_max.x += d_right_top.x;
+    d_p_min.y += d_right_top.y;
+    d_p_min.x += d_left_bottom.x;
+    d_p_max.y += d_left_bottom.y;
+    d_p_max += d_right_bottom;
+    atomic_add(d_rect.p_min, d_p_min);
+    atomic_add(d_rect.p_max, d_p_max);
+}
+DEVICE
+inline
+void d_closest_point(const Shape &shape,
+                     const Vector2f &pt,
+                     const Vector2f &d_closest_pt,
+                     const ClosestPointPathInfo &path_info,
+                     Shape &d_shape,
+                     Vector2f &d_pt) {
+    switch (shape.type) {
+        case ShapeType::Circle:
+            d_closest_point(*(const Circle *)shape.ptr,
+                            pt,
+                            d_closest_pt,
+                            *(Circle *)d_shape.ptr,
+                            d_pt);
+            break;
+        case ShapeType::Ellipse:
+            // https://www.geometrictools.com/Documentation/DistancePointEllipseEllipsoid.pdf
+            assert(false);
+            break;
+        case ShapeType::Path:
+            d_closest_point(*(const Path *)shape.ptr,
+                            pt,
+                            d_closest_pt,
+                            path_info,
+                            *(Path *)d_shape.ptr,
+                            d_pt);
+            break;
+        case ShapeType::Rect:
+            d_closest_point(*(const Rect *)shape.ptr,
+                            pt,
+                            d_closest_pt,
+                            *(Rect *)d_shape.ptr,
+                            d_pt);
+            break;
+    }
+}
+DEVICE
+inline
+void d_compute_distance(const Matrix3x3f &canvas_to_shape,
+                        const Matrix3x3f &shape_to_canvas,
+                        const Shape &shape,
+                        const Vector2f &pt,
+                        const Vector2f &closest_pt,
+                        const ClosestPointPathInfo &path_info,
+                        float d_dist,
+                        Matrix3x3f &d_shape_to_canvas,
+                        Shape &d_shape,
+                        float *d_translation) {
+    if (distance_squared(pt, closest_pt) < 1e-10f) {
+        // The derivative at distance=0 is undefined
+        return;
+    }
+    assert(isfinite(d_dist));
+    // pt is in canvas space, transform it to shape's local space
+    auto local_pt = xform_pt(canvas_to_shape, pt);
+    auto local_closest_pt = xform_pt(canvas_to_shape, closest_pt);
+    // auto local_closest_pt = closest_point(shape, local_pt);
+    // auto closest_pt = xform_pt(shape_group.shape_to_canvas, local_closest_pt);
+    // auto dist = distance(closest_pt, pt);
+    auto d_pt = Vector2f{0, 0};
+    auto d_closest_pt = Vector2f{0, 0};
+    d_distance(closest_pt, pt, d_dist, d_closest_pt, d_pt);
+    assert(isfinite(d_pt));
+    assert(isfinite(d_closest_pt));
+    // auto closest_pt = xform_pt(shape_group.shape_to_canvas, local_closest_pt);
+    auto d_local_closest_pt = Vector2f{0, 0};
+    auto d_shape_to_canvas_ = Matrix3x3f();
+    d_xform_pt(shape_to_canvas, local_closest_pt, d_closest_pt,
+               d_shape_to_canvas_, d_local_closest_pt);
+    assert(isfinite(d_local_closest_pt));
+    auto d_local_pt = Vector2f{0, 0};
+    d_closest_point(shape, local_pt, d_local_closest_pt, path_info, d_shape, d_local_pt);
+    assert(isfinite(d_local_pt));
+    auto d_canvas_to_shape = Matrix3x3f();
+    d_xform_pt(canvas_to_shape,
+               pt,
+               d_local_pt,
+               d_canvas_to_shape,
+               d_pt);
+    // http://jack.valmadre.net/notes/2016/09/04/back-prop-differentials/#back-propagation-using-differentials
+    auto tc2s = transpose(canvas_to_shape);
+    d_shape_to_canvas_ += -tc2s * d_canvas_to_shape * tc2s;
+    atomic_add(&d_shape_to_canvas(0, 0), d_shape_to_canvas_);
+    if (d_translation != nullptr) {
+        atomic_add(d_translation, -d_pt);
+    }
+}

config/base.yaml ADDED Viewed

	@@ -0,0 +1,91 @@

+default:
+  use_ycrcb: False
+  seginit:
+    type: circle
+    radius: 5
+  save:
+    init: false
+    image: false
+    output: true
+    video: false
+    loss: false
+  trainable:
+    bg: False
+    record: True
+    stroke: False
+#  num_segments: 4
+  num_iter: 500
+  lr_base:
+    bg: 0.01
+    point: 1
+    color: 0.01
+    stroke_width: null
+    stroke_color: null
+  coord_init:
+    type: sparse
+  seed: 0
+  loss:
+    use_l1_loss: false
+    use_distance_weighted_loss: true
+    xing_loss_weight: 0.01
+    bis_loss_weight: null
+experiment_1x1:
+  path_schedule:
+    type: repeat
+    max_path: 1
+    schedule_each: 1
+experiment_4x1:
+  path_schedule:
+    type: repeat
+    max_path: 4
+    schedule_each: 1
+experiment_5x1:
+  path_schedule:
+    type: repeat
+    max_path: 5
+    schedule_each: 1
+experiment_8x1:
+  path_schedule:
+    type: repeat
+    max_path: 8
+    schedule_each: 1
+experiment_16x1:
+  path_schedule:
+    type: repeat
+    max_path: 16
+    schedule_each: 1
+experiment_32x1:
+  path_schedule:
+    type: repeat
+    max_path: 32
+    schedule_each: 1
+experiment_1357:
+  path_schedule:
+    type: list
+    schedule: [1, 3, 5, 7]
+experiment_exp2_256:
+  path_schedule:
+    type: exp
+    base: 2
+    max_path: 256
+    max_path_per_iter: 32
+experiment_exp2_128:
+  path_schedule:
+    type: exp
+    base: 2
+    max_path: 128
+    max_path_per_iter: 32

cuda_utils.h ADDED Viewed

	@@ -0,0 +1,53 @@

+#pragma once
+#ifdef __CUDACC__
+    #include <cuda.h>
+    #include <cuda_runtime.h>
+#endif
+#include <cstdio>
+#include <cassert>
+#include <limits>
+#ifdef __CUDACC__
+#define checkCuda(x) do { if((x)!=cudaSuccess) { \
+    printf("CUDA Runtime Error: %s at %s:%d\n",\
+    cudaGetErrorString(x),__FILE__,__LINE__);\
+    exit(1);}} while(0)
+#endif
+template <typename T>
+DEVICE
+inline T infinity() {
+#ifdef __CUDA_ARCH__
+    const unsigned long long ieee754inf = 0x7ff0000000000000;
+    return __longlong_as_double(ieee754inf);
+#else
+    return std::numeric_limits<T>::infinity();
+#endif
+}
+template <>
+DEVICE
+inline double infinity() {
+#ifdef __CUDA_ARCH__
+    return __longlong_as_double(0x7ff0000000000000ULL);
+#else
+    return std::numeric_limits<double>::infinity();
+#endif
+}
+template <>
+DEVICE
+inline float infinity() {
+#ifdef __CUDA_ARCH__
+    return __int_as_float(0x7f800000);
+#else
+    return std::numeric_limits<float>::infinity();
+#endif
+}
+inline void cuda_synchronize() {
+#ifdef __CUDACC__
+    checkCuda(cudaDeviceSynchronize());
+#endif
+}

data/demo1.png ADDED Viewed

data/demo2.jpg ADDED Viewed

data/demo3.png ADDED Viewed

diffvg.cpp ADDED Viewed

	@@ -0,0 +1,1792 @@

+#include "diffvg.h"
+#include "aabb.h"
+#include "shape.h"
+#include "sample_boundary.h"
+#include "atomic.h"
+#include "cdf.h"
+#include "compute_distance.h"
+#include "cuda_utils.h"
+#include "edge_query.h"
+#include "filter.h"
+#include "matrix.h"
+#include "parallel.h"
+#include "pcg.h"
+#include "ptr.h"
+#include "scene.h"
+#include "vector.h"
+#include "winding_number.h"
+#include "within_distance.h"
+#include <cassert>
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+#include <thrust/execution_policy.h>
+#include <thrust/sort.h>
+namespace py = pybind11;
+struct Command {
+    int shape_group_id;
+    int shape_id;
+    int point_id; // Only used by path
+};
+DEVICE
+bool is_inside(const SceneData &scene_data,
+               int shape_group_id,
+               const Vector2f &pt,
+               EdgeQuery *edge_query) {
+    const ShapeGroup &shape_group = scene_data.shape_groups[shape_group_id];
+    // pt is in canvas space, transform it to shape's local space
+    auto local_pt = xform_pt(shape_group.canvas_to_shape, pt);
+    const auto &bvh_nodes = scene_data.shape_groups_bvh_nodes[shape_group_id];
+    const AABB &bbox = bvh_nodes[2 * shape_group.num_shapes - 2].box;
+    if (!inside(bbox, local_pt)) {
+        return false;
+    }
+    auto winding_number = 0;
+    // Traverse the shape group BVH
+    constexpr auto max_bvh_stack_size = 64;
+    int bvh_stack[max_bvh_stack_size];
+    auto stack_size = 0;
+    bvh_stack[stack_size++] = 2 * shape_group.num_shapes - 2;
+    while (stack_size > 0) {
+        const BVHNode &node = bvh_nodes[bvh_stack[--stack_size]];
+        if (node.child1 < 0) {
+            // leaf
+            auto shape_id = node.child0;
+            auto w = compute_winding_number(
+                scene_data.shapes[shape_id], scene_data.path_bvhs[shape_id], local_pt);
+            winding_number += w;
+            if (edge_query != nullptr) {
+                if (edge_query->shape_group_id == shape_group_id &&
+                        edge_query->shape_id == shape_id) {
+                    if ((shape_group.use_even_odd_rule && abs(w) % 2 == 1) ||
+                        (!shape_group.use_even_odd_rule && w != 0)) {
+                        edge_query->hit = true;
+                    }
+                }
+            }
+        } else {
+            assert(node.child0 >= 0 && node.child1 >= 0);
+            const AABB &b0 = bvh_nodes[node.child0].box;
+            if (inside(b0, local_pt)) {
+                bvh_stack[stack_size++] = node.child0;
+            }
+            const AABB &b1 = bvh_nodes[node.child1].box;
+            if (inside(b1, local_pt)) {
+                bvh_stack[stack_size++] = node.child1;
+            }
+            assert(stack_size <= max_bvh_stack_size);
+        }
+    }
+    if (shape_group.use_even_odd_rule) {
+        return abs(winding_number) % 2 == 1;
+    } else {
+        return winding_number != 0;
+    }
+}
+DEVICE void accumulate_boundary_gradient(const Shape &shape,
+                                         float contrib,
+                                         float t,
+                                         const Vector2f &normal,
+                                         const BoundaryData &boundary_data,
+                                         Shape &d_shape,
+                                         const Matrix3x3f &shape_to_canvas,
+                                         const Vector2f &local_boundary_pt,
+                                         Matrix3x3f &d_shape_to_canvas) {
+    assert(isfinite(contrib));
+    assert(isfinite(normal));
+    // According to Reynold transport theorem,
+    // the Jacobian of the boundary integral is dot(velocity, normal),
+    // where the velocity depends on the variable being differentiated with.
+    if (boundary_data.is_stroke) {
+        auto has_path_thickness = false;
+        if (shape.type == ShapeType::Path) {
+            const Path &path = *(const Path *)shape.ptr;
+            has_path_thickness = path.thickness != nullptr;
+        }
+        // differentiate stroke width: velocity is the same as normal
+        if (has_path_thickness) {
+            Path *d_p = (Path*)d_shape.ptr;
+            auto base_point_id = boundary_data.path.base_point_id;
+            auto point_id = boundary_data.path.point_id;
+            auto t = boundary_data.path.t;
+            const Path &path = *(const Path *)shape.ptr;
+            if (path.num_control_points[base_point_id] == 0) {
+                // Straight line
+                auto i0 = point_id;
+                auto i1 = (point_id + 1) % path.num_points;
+                // r = r0 + t * (r1 - r0)
+                atomic_add(&d_p->thickness[i0], (1 - t) * contrib);
+                atomic_add(&d_p->thickness[i1], (    t) * contrib);
+            } else if (path.num_control_points[base_point_id] == 1) {
+                // Quadratic Bezier curve
+                auto i0 = point_id;
+                auto i1 = point_id + 1;
+                auto i2 = (point_id + 2) % path.num_points;
+                // r = (1-t)^2r0 + 2(1-t)t r1 + t^2 r2
+                atomic_add(&d_p->thickness[i0], square(1 - t) * contrib);
+                atomic_add(&d_p->thickness[i1], (2*(1-t)*t) * contrib);
+                atomic_add(&d_p->thickness[i2], (t*t) * contrib);
+            } else if (path.num_control_points[base_point_id] == 2) {
+                auto i0 = point_id;
+                auto i1 = point_id + 1;
+                auto i2 = point_id + 2;
+                auto i3 = (point_id + 3) % path.num_points;
+                // r = (1-t)^3r0 + 3*(1-t)^2tr1 + 3*(1-t)t^2r2 + t^3r3
+                atomic_add(&d_p->thickness[i0], cubic(1 - t) * contrib);
+                atomic_add(&d_p->thickness[i1], 3 * square(1 - t) * t * contrib);
+                atomic_add(&d_p->thickness[i2], 3 * (1 - t) * t * t * contrib);
+                atomic_add(&d_p->thickness[i3], t * t * t * contrib);
+            } else {
+                assert(false);
+            }
+        } else {
+            atomic_add(&d_shape.stroke_width, contrib);
+        }
+    }
+    switch (shape.type) {
+        case ShapeType::Circle: {
+            Circle *d_p = (Circle*)d_shape.ptr;
+            // velocity for the center is (1, 0) for x and (0, 1) for y
+            atomic_add(&d_p->center[0], normal * contrib);
+            // velocity for the radius is the same as the normal
+            atomic_add(&d_p->radius, contrib);
+            break;
+        } case ShapeType::Ellipse: {
+            Ellipse *d_p = (Ellipse*)d_shape.ptr;
+            // velocity for the center is (1, 0) for x and (0, 1) for y
+            atomic_add(&d_p->center[0], normal * contrib);
+            // velocity for the radius:
+            // x = center.x + r.x * cos(2pi * t)
+            // y = center.y + r.y * sin(2pi * t)
+            // for r.x: (cos(2pi * t), 0)
+            // for r.y: (0, sin(2pi * t))
+            atomic_add(&d_p->radius.x, cos(2 * float(M_PI) * t) * normal.x * contrib);
+            atomic_add(&d_p->radius.y, sin(2 * float(M_PI) * t) * normal.y * contrib);
+            break;
+        } case ShapeType::Path: {
+            Path *d_p = (Path*)d_shape.ptr;
+            auto base_point_id = boundary_data.path.base_point_id;
+            auto point_id = boundary_data.path.point_id;
+            auto t = boundary_data.path.t;
+            const Path &path = *(const Path *)shape.ptr;
+            if (path.num_control_points[base_point_id] == 0) {
+                // Straight line
+                auto i0 = point_id;
+                auto i1 = (point_id + 1) % path.num_points;
+                // pt = p0 + t * (p1 - p0)
+                // velocity for p0.x: (1 - t,     0)
+                //              p0.y: (    0, 1 - t)
+                //              p1.x: (    t,     0)
+                //              p1.y: (    0,     t)
+                atomic_add(&d_p->points[2 * i0 + 0], (1 - t) * normal.x * contrib);
+                atomic_add(&d_p->points[2 * i0 + 1], (1 - t) * normal.y * contrib);
+                atomic_add(&d_p->points[2 * i1 + 0], (    t) * normal.x * contrib);
+                atomic_add(&d_p->points[2 * i1 + 1], (    t) * normal.y * contrib);
+            } else if (path.num_control_points[base_point_id] == 1) {
+                // Quadratic Bezier curve
+                auto i0 = point_id;
+                auto i1 = point_id + 1;
+                auto i2 = (point_id + 2) % path.num_points;
+                // pt = (1-t)^2p0 + 2(1-t)t p1 + t^2 p2
+                // velocity for p0.x: ((1-t)^2,       0)
+                //              p0.y: (      0, (1-t)^2)
+                //              p1.x: (2(1-t)t,       0)
+                //              p1.y: (      0, 2(1-t)t)
+                //              p1.x: (    t^2,       0)
+                //              p1.y: (      0,     t^2)
+                atomic_add(&d_p->points[2 * i0 + 0], square(1 - t) * normal.x * contrib);
+                atomic_add(&d_p->points[2 * i0 + 1], square(1 - t) * normal.y * contrib);
+                atomic_add(&d_p->points[2 * i1 + 0], (2*(1-t)*t) * normal.x * contrib);
+                atomic_add(&d_p->points[2 * i1 + 1], (2*(1-t)*t) * normal.y * contrib);
+                atomic_add(&d_p->points[2 * i2 + 0], (t*t) * normal.x * contrib);
+                atomic_add(&d_p->points[2 * i2 + 1], (t*t) * normal.y * contrib);
+            } else if (path.num_control_points[base_point_id] == 2) {
+                auto i0 = point_id;
+                auto i1 = point_id + 1;
+                auto i2 = point_id + 2;
+                auto i3 = (point_id + 3) % path.num_points;
+                // pt = (1-t)^3p0 + 3*(1-t)^2tp1 + 3*(1-t)t^2p2 + t^3p3
+                // velocity for p0.x: (   (1-t)^3,          0)
+                //              p0.y: (         0,    (1-t)^3)
+                //              p1.x: (3*(1-t)^2t,          0)
+                //              p1.y: (         0, 3*(1-t)^2t)
+                //              p2.x: (3*(1-t)t^2,          0)
+                //              p2.y: (         0, 3*(1-t)t^2)
+                //              p2.x: (       t^3,          0)
+                //              p2.y: (         0,        t^3)
+                atomic_add(&d_p->points[2 * i0 + 0], cubic(1 - t) * normal.x * contrib);
+                atomic_add(&d_p->points[2 * i0 + 1], cubic(1 - t) * normal.y * contrib);
+                atomic_add(&d_p->points[2 * i1 + 0], 3 * square(1 - t) * t * normal.x * contrib);
+                atomic_add(&d_p->points[2 * i1 + 1], 3 * square(1 - t) * t * normal.y * contrib);
+                atomic_add(&d_p->points[2 * i2 + 0], 3 * (1 - t) * t * t * normal.x * contrib);
+                atomic_add(&d_p->points[2 * i2 + 1], 3 * (1 - t) * t * t * normal.y * contrib);
+                atomic_add(&d_p->points[2 * i3 + 0], t * t * t * normal.x * contrib);
+                atomic_add(&d_p->points[2 * i3 + 1], t * t * t * normal.y * contrib);
+            } else {
+                assert(false);
+            }
+            break;
+        } case ShapeType::Rect: {
+            Rect *d_p = (Rect*)d_shape.ptr;
+            // The velocity depends on the position of the boundary
+            if (normal == Vector2f{-1, 0}) {
+                // left
+                // velocity for p_min is (1, 0) for x and (0, 0) for y
+                atomic_add(&d_p->p_min.x, -contrib);
+            } else if (normal == Vector2f{1, 0}) {
+                // right
+                // velocity for p_max is (1, 0) for x and (0, 0) for y
+                atomic_add(&d_p->p_max.x, contrib);
+            } else if (normal == Vector2f{0, -1}) {
+                // top
+                // velocity for p_min is (0, 0) for x and (0, 1) for y
+                atomic_add(&d_p->p_min.y, -contrib);
+            } else if (normal == Vector2f{0, 1}) {
+                // bottom
+                // velocity for p_max is (0, 0) for x and (0, 1) for y
+                atomic_add(&d_p->p_max.y, contrib);
+            } else {
+                // incorrect normal assignment?
+                assert(false);
+            }
+            break;
+        } default: {
+            assert(false);
+            break;
+        }
+    }
+    // for shape_to_canvas we have the following relationship:
+    // boundary_pt = xform_pt(shape_to_canvas, local_pt)
+    // the velocity is the derivative of boundary_pt with respect to shape_to_canvas
+    // we can use reverse-mode AD to compute the dot product of the velocity and the Jacobian
+    // by passing the normal in d_xform_pt
+    auto d_shape_to_canvas_ = Matrix3x3f();
+    auto d_local_boundary_pt = Vector2f{0, 0};
+    d_xform_pt(shape_to_canvas,
+               local_boundary_pt,
+               normal * contrib,
+               d_shape_to_canvas_,
+               d_local_boundary_pt);
+    atomic_add(&d_shape_to_canvas(0, 0), d_shape_to_canvas_);
+}
+DEVICE
+Vector4f sample_color(const ColorType &color_type,
+                      void *color,
+                      const Vector2f &pt) {
+    switch (color_type) {
+        case ColorType::Constant: {
+            auto c = (const Constant*)color;
+            assert(isfinite(c->color));
+            return c->color;
+        } case ColorType::LinearGradient: {
+            auto c = (const LinearGradient*)color;
+            // Project pt to (c->begin, c->end)
+            auto beg = c->begin;
+            auto end = c->end;
+            auto t = dot(pt - beg, end - beg) / max(dot(end - beg, end - beg), 1e-3f);
+            // Find the correponding stop:
+            if (t < c->stop_offsets[0]) {
+                return Vector4f{c->stop_colors[0],
+                                c->stop_colors[1],
+                                c->stop_colors[2],
+                                c->stop_colors[3]};
+            }
+            for (int i = 0; i < c->num_stops - 1; i++) {
+                auto offset_curr = c->stop_offsets[i];
+                auto offset_next = c->stop_offsets[i + 1];
+                assert(offset_next > offset_curr);
+                if (t >= offset_curr && t < offset_next) {
+                    auto color_curr = Vector4f{
+                        c->stop_colors[4 * i + 0],
+                        c->stop_colors[4 * i + 1],
+                        c->stop_colors[4 * i + 2],
+                        c->stop_colors[4 * i + 3]};
+                    auto color_next = Vector4f{
+                        c->stop_colors[4 * (i + 1) + 0],
+                        c->stop_colors[4 * (i + 1) + 1],
+                        c->stop_colors[4 * (i + 1) + 2],
+                        c->stop_colors[4 * (i + 1) + 3]};
+                    auto tt = (t - offset_curr) / (offset_next - offset_curr);
+                    assert(isfinite(tt));
+                    assert(isfinite(color_curr));
+                    assert(isfinite(color_next));
+                    return color_curr * (1 - tt) + color_next * tt;
+                }
+            }
+            return Vector4f{c->stop_colors[4 * (c->num_stops - 1) + 0],
+                            c->stop_colors[4 * (c->num_stops - 1) + 1],
+                            c->stop_colors[4 * (c->num_stops - 1) + 2],
+                            c->stop_colors[4 * (c->num_stops - 1) + 3]};
+        } case ColorType::RadialGradient: {
+            auto c = (const RadialGradient*)color;
+            // Distance from pt to center
+            auto offset = pt - c->center;
+            auto normalized_offset = offset / c->radius;
+            auto t = length(normalized_offset);
+            // Find the correponding stop:
+            if (t < c->stop_offsets[0]) {
+                return Vector4f{c->stop_colors[0],
+                                c->stop_colors[1],
+                                c->stop_colors[2],
+                                c->stop_colors[3]};
+            }
+            for (int i = 0; i < c->num_stops - 1; i++) {
+                auto offset_curr = c->stop_offsets[i];
+                auto offset_next = c->stop_offsets[i + 1];
+                assert(offset_next > offset_curr);
+                if (t >= offset_curr && t < offset_next) {
+                    auto color_curr = Vector4f{
+                        c->stop_colors[4 * i + 0],
+                        c->stop_colors[4 * i + 1],
+                        c->stop_colors[4 * i + 2],
+                        c->stop_colors[4 * i + 3]};
+                    auto color_next = Vector4f{
+                        c->stop_colors[4 * (i + 1) + 0],
+                        c->stop_colors[4 * (i + 1) + 1],
+                        c->stop_colors[4 * (i + 1) + 2],
+                        c->stop_colors[4 * (i + 1) + 3]};
+                    auto tt = (t - offset_curr) / (offset_next - offset_curr);
+                    assert(isfinite(tt));
+                    assert(isfinite(color_curr));
+                    assert(isfinite(color_next));
+                    return color_curr * (1 - tt) + color_next * tt;
+                }
+            }
+            return Vector4f{c->stop_colors[4 * (c->num_stops - 1) + 0],
+                            c->stop_colors[4 * (c->num_stops - 1) + 1],
+                            c->stop_colors[4 * (c->num_stops - 1) + 2],
+                            c->stop_colors[4 * (c->num_stops - 1) + 3]};
+        } default: {
+            assert(false);
+        }
+    }
+    return Vector4f{};
+}
+DEVICE
+void d_sample_color(const ColorType &color_type,
+                    void *color_ptr,
+                    const Vector2f &pt,
+                    const Vector4f &d_color,
+                    void *d_color_ptr,
+                    float *d_translation) {
+    switch (color_type) {
+        case ColorType::Constant: {
+            auto d_c = (Constant*)d_color_ptr;
+            atomic_add(&d_c->color[0], d_color);
+            return;
+        } case ColorType::LinearGradient: {
+            auto c = (const LinearGradient*)color_ptr;
+            auto d_c = (LinearGradient*)d_color_ptr;
+            // Project pt to (c->begin, c->end)
+            auto beg = c->begin;
+            auto end = c->end;
+            auto t = dot(pt - beg, end - beg) / max(dot(end - beg, end - beg), 1e-3f);
+            // Find the correponding stop:
+            if (t < c->stop_offsets[0]) {
+                atomic_add(&d_c->stop_colors[0], d_color);
+                return;
+            }
+            for (int i = 0; i < c->num_stops - 1; i++) {
+                auto offset_curr = c->stop_offsets[i];
+                auto offset_next = c->stop_offsets[i + 1];
+                assert(offset_next > offset_curr);
+                if (t >= offset_curr && t < offset_next) {
+                    auto color_curr = Vector4f{
+                        c->stop_colors[4 * i + 0],
+                        c->stop_colors[4 * i + 1],
+                        c->stop_colors[4 * i + 2],
+                        c->stop_colors[4 * i + 3]};
+                    auto color_next = Vector4f{
+                        c->stop_colors[4 * (i + 1) + 0],
+                        c->stop_colors[4 * (i + 1) + 1],
+                        c->stop_colors[4 * (i + 1) + 2],
+                        c->stop_colors[4 * (i + 1) + 3]};
+                    auto tt = (t - offset_curr) / (offset_next - offset_curr);
+                    // return color_curr * (1 - tt) + color_next * tt;
+                    auto d_color_curr = d_color * (1 - tt);
+                    auto d_color_next = d_color * tt;
+                    auto d_tt = sum(d_color * (color_next - color_curr));
+                    auto d_offset_next = -d_tt * tt / (offset_next - offset_curr);
+                    auto d_offset_curr = d_tt * ((tt - 1.f) / (offset_next - offset_curr));
+                    auto d_t = d_tt / (offset_next - offset_curr);
+                    assert(isfinite(d_tt));
+                    atomic_add(&d_c->stop_colors[4 * i], d_color_curr);
+                    atomic_add(&d_c->stop_colors[4 * (i + 1)], d_color_next);
+                    atomic_add(&d_c->stop_offsets[i], d_offset_curr);
+                    atomic_add(&d_c->stop_offsets[i + 1], d_offset_next);
+                    // auto t = dot(pt - beg, end - beg) / max(dot(end - beg, end - beg), 1e-6f);
+                    // l = max(dot(end - beg, end - beg), 1e-3f)
+                    // t = dot(pt - beg, end - beg) / l;
+                    auto l = max(dot(end - beg, end - beg), 1e-3f);
+                    auto d_beg = d_t * (-(pt - beg)-(end - beg)) / l;
+                    auto d_end = d_t * (pt - beg) / l;
+                    auto d_l = -d_t * t / l;
+                    if (dot(end - beg, end - beg) > 1e-3f) {
+                        d_beg += 2 * d_l * (beg - end);
+                        d_end += 2 * d_l * (end - beg);
+                    }
+                    atomic_add(&d_c->begin[0], d_beg);
+                    atomic_add(&d_c->end[0], d_end);
+                    if (d_translation != nullptr) {
+                        atomic_add(d_translation, (d_beg + d_end));
+                    }
+                    return;
+                }
+            }
+            atomic_add(&d_c->stop_colors[4 * (c->num_stops - 1)], d_color);
+            return;
+        } case ColorType::RadialGradient: {
+            auto c = (const RadialGradient*)color_ptr;
+            auto d_c = (RadialGradient*)d_color_ptr;
+            // Distance from pt to center
+            auto offset = pt - c->center;
+            auto normalized_offset = offset / c->radius;
+            auto t = length(normalized_offset);
+            // Find the correponding stop:
+            if (t < c->stop_offsets[0]) {
+                atomic_add(&d_c->stop_colors[0], d_color);
+                return;
+            }
+            for (int i = 0; i < c->num_stops - 1; i++) {
+                auto offset_curr = c->stop_offsets[i];
+                auto offset_next = c->stop_offsets[i + 1];
+                assert(offset_next > offset_curr);
+                if (t >= offset_curr && t < offset_next) {
+                    auto color_curr = Vector4f{
+                        c->stop_colors[4 * i + 0],
+                        c->stop_colors[4 * i + 1],
+                        c->stop_colors[4 * i + 2],
+                        c->stop_colors[4 * i + 3]};
+                    auto color_next = Vector4f{
+                        c->stop_colors[4 * (i + 1) + 0],
+                        c->stop_colors[4 * (i + 1) + 1],
+                        c->stop_colors[4 * (i + 1) + 2],
+                        c->stop_colors[4 * (i + 1) + 3]};
+                    auto tt = (t - offset_curr) / (offset_next - offset_curr);
+                    assert(isfinite(tt));
+                    // return color_curr * (1 - tt) + color_next * tt;
+                    auto d_color_curr = d_color * (1 - tt);
+                    auto d_color_next = d_color * tt;
+                    auto d_tt = sum(d_color * (color_next - color_curr));
+                    auto d_offset_next = -d_tt * tt / (offset_next - offset_curr);
+                    auto d_offset_curr = d_tt * ((tt - 1.f) / (offset_next - offset_curr));
+                    auto d_t = d_tt / (offset_next - offset_curr);
+                    assert(isfinite(d_t));
+                    atomic_add(&d_c->stop_colors[4 * i], d_color_curr);
+                    atomic_add(&d_c->stop_colors[4 * (i + 1)], d_color_next);
+                    atomic_add(&d_c->stop_offsets[i], d_offset_curr);
+                    atomic_add(&d_c->stop_offsets[i + 1], d_offset_next);
+                    // offset = pt - c->center
+                    // normalized_offset = offset / c->radius
+                    // t = length(normalized_offset)
+                    auto d_normalized_offset = d_length(normalized_offset, d_t);
+                    auto d_offset = d_normalized_offset / c->radius;
+                    auto d_radius = -d_normalized_offset * offset / (c->radius * c->radius);
+                    auto d_center = -d_offset;
+                    atomic_add(&d_c->center[0], d_center);
+                    atomic_add(&d_c->radius[0], d_radius);
+                    if (d_translation != nullptr) {
+                        atomic_add(d_translation, d_center);
+                    }
+                }
+            }
+            atomic_add(&d_c->stop_colors[4 * (c->num_stops - 1)], d_color);
+            return;
+        } default: {
+            assert(false);
+        }
+    }
+}
+struct Fragment {
+    Vector3f color;
+    float alpha;
+    int group_id;
+    bool is_stroke;
+};
+struct PrefilterFragment {
+    Vector3f color;
+    float alpha;
+    int group_id;
+    bool is_stroke;
+    int shape_id;
+    float distance;
+    Vector2f closest_pt;
+    ClosestPointPathInfo path_info;
+    bool within_distance;
+};
+DEVICE
+Vector4f sample_color(const SceneData &scene,
+                      const Vector4f *background_color,
+                      const Vector2f &screen_pt,
+                      const Vector4f *d_color = nullptr,
+                      EdgeQuery *edge_query = nullptr,
+                      Vector4f *d_background_color = nullptr,
+                      float *d_translation = nullptr) {
+    if (edge_query != nullptr) {
+        edge_query->hit = false;
+    }
+    // screen_pt is in screen space ([0, 1), [0, 1)),
+    // need to transform to canvas space
+    auto pt = screen_pt;
+    pt.x *= scene.canvas_width;
+    pt.y *= scene.canvas_height;
+    constexpr auto max_hit_shapes = 256;
+    constexpr auto max_bvh_stack_size = 64;
+    Fragment fragments[max_hit_shapes];
+    int bvh_stack[max_bvh_stack_size];
+    auto stack_size = 0;
+    auto num_fragments = 0;
+    bvh_stack[stack_size++] = 2 * scene.num_shape_groups - 2;
+    while (stack_size > 0) {
+        const BVHNode &node = scene.bvh_nodes[bvh_stack[--stack_size]];
+        if (node.child1 < 0) {
+            // leaf
+            auto group_id = node.child0;
+            const ShapeGroup &shape_group = scene.shape_groups[group_id];
+            if (shape_group.stroke_color != nullptr) {
+                if (within_distance(scene, group_id, pt, edge_query)) {
+                    auto color_alpha = sample_color(shape_group.stroke_color_type,
+                                                    shape_group.stroke_color,
+                                                    pt);
+                    Fragment f;
+                    f.color = Vector3f{color_alpha[0], color_alpha[1], color_alpha[2]};
+                    f.alpha = color_alpha[3];
+                    f.group_id = group_id;
+                    f.is_stroke = true;
+                    assert(num_fragments < max_hit_shapes);
+                    fragments[num_fragments++] = f;
+                }
+            }
+            if (shape_group.fill_color != nullptr) {
+                if (is_inside(scene, group_id, pt, edge_query)) {
+                    auto color_alpha = sample_color(shape_group.fill_color_type,
+                                                    shape_group.fill_color,
+                                                    pt);
+                    Fragment f;
+                    f.color = Vector3f{color_alpha[0], color_alpha[1], color_alpha[2]};
+                    f.alpha = color_alpha[3];
+                    f.group_id = group_id;
+                    f.is_stroke = false;
+                    assert(num_fragments < max_hit_shapes);
+                    fragments[num_fragments++] = f;
+                }
+            }
+        } else {
+            assert(node.child0 >= 0 && node.child1 >= 0);
+            const AABB &b0 = scene.bvh_nodes[node.child0].box;
+            if (inside(b0, pt, scene.bvh_nodes[node.child0].max_radius)) {
+                bvh_stack[stack_size++] = node.child0;
+            }
+            const AABB &b1 = scene.bvh_nodes[node.child1].box;
+            if (inside(b1, pt, scene.bvh_nodes[node.child1].max_radius)) {
+                bvh_stack[stack_size++] = node.child1;
+            }
+            assert(stack_size <= max_bvh_stack_size);
+        }
+    }
+    if (num_fragments <= 0) {
+        if (background_color != nullptr) {
+            if (d_background_color != nullptr) {
+                *d_background_color = *d_color;
+            }
+            return *background_color;
+        }
+        return Vector4f{0, 0, 0, 0};
+    }
+    // Sort the fragments from back to front (i.e. increasing order of group id)
+    // https://github.com/frigaut/yorick-imutil/blob/master/insort.c#L37
+    for (int i = 1; i < num_fragments; i++) {
+        auto j = i;
+        auto temp = fragments[j];
+        while (j > 0 && fragments[j - 1].group_id > temp.group_id) {
+            fragments[j] = fragments[j - 1];
+            j--;
+        }
+        fragments[j] = temp;
+    }
+    // Blend the color
+    Vector3f accum_color[max_hit_shapes];
+    float accum_alpha[max_hit_shapes];
+    // auto hit_opaque = false;
+    auto first_alpha = 0.f;
+    auto first_color = Vector3f{0, 0, 0};
+    if (background_color != nullptr) {
+        first_alpha = background_color->w;
+        first_color = Vector3f{background_color->x,
+                               background_color->y,
+                               background_color->z};
+    }
+    for (int i = 0; i < num_fragments; i++) {
+        const Fragment &fragment = fragments[i];
+        auto new_color = fragment.color;
+        auto new_alpha = fragment.alpha;
+        auto prev_alpha = i > 0 ? accum_alpha[i - 1] : first_alpha;
+        auto prev_color = i > 0 ? accum_color[i - 1] : first_color;
+        if (edge_query != nullptr) {
+            // Do we hit the target shape?
+            if (new_alpha >= 1.f && edge_query->hit) {
+                // A fully opaque shape in front of the target occludes it
+                edge_query->hit = false;
+            }
+            if (edge_query->shape_group_id == fragment.group_id) {
+                edge_query->hit = true;
+            }
+        }
+        // prev_color is alpha premultiplied, don't need to multiply with
+        // prev_alpha
+        accum_color[i] = prev_color * (1 - new_alpha) + new_alpha * new_color;
+        accum_alpha[i] = prev_alpha * (1 - new_alpha) + new_alpha;
+    }
+    auto final_color = accum_color[num_fragments - 1];
+    auto final_alpha = accum_alpha[num_fragments - 1];
+    if (final_alpha > 1e-6f) {
+        final_color /= final_alpha;
+    }
+    assert(isfinite(final_color));
+    assert(isfinite(final_alpha));
+    if (d_color != nullptr) {
+        // Backward pass
+        auto d_final_color = Vector3f{(*d_color)[0], (*d_color)[1], (*d_color)[2]};
+        auto d_final_alpha = (*d_color)[3];
+        auto d_curr_color = d_final_color;
+        auto d_curr_alpha = d_final_alpha;
+        if (final_alpha > 1e-6f) {
+            // final_color = curr_color / final_alpha
+            d_curr_color = d_final_color / final_alpha;
+            d_curr_alpha -= sum(d_final_color * final_color) / final_alpha;
+        }
+        assert(isfinite(*d_color));
+        assert(isfinite(d_curr_color));
+        assert(isfinite(d_curr_alpha));
+        for (int i = num_fragments - 1; i >= 0; i--) {
+            // color[n] = prev_color * (1 - new_alpha) + new_alpha * new_color;
+            // alpha[n] = prev_alpha * (1 - new_alpha) + new_alpha;
+            auto prev_alpha = i > 0 ? accum_alpha[i - 1] : first_alpha;
+            auto prev_color = i > 0 ? accum_color[i - 1] : first_color;
+            auto d_prev_alpha = d_curr_alpha * (1.f - fragments[i].alpha);
+            auto d_alpha_i = d_curr_alpha * (1.f - prev_alpha);
+            d_alpha_i += sum(d_curr_color * (fragments[i].color - prev_color));
+            auto d_prev_color = d_curr_color * (1 - fragments[i].alpha);
+            auto d_color_i = d_curr_color * fragments[i].alpha;
+            auto group_id = fragments[i].group_id;
+            if (fragments[i].is_stroke) {
+                d_sample_color(scene.shape_groups[group_id].stroke_color_type,
+                               scene.shape_groups[group_id].stroke_color,
+                               pt,
+                               Vector4f{d_color_i[0], d_color_i[1], d_color_i[2], d_alpha_i},
+                               scene.d_shape_groups[group_id].stroke_color,
+                               d_translation);
+            } else {
+                d_sample_color(scene.shape_groups[group_id].fill_color_type,
+                               scene.shape_groups[group_id].fill_color,
+                               pt,
+                               Vector4f{d_color_i[0], d_color_i[1], d_color_i[2], d_alpha_i},
+                               scene.d_shape_groups[group_id].fill_color,
+                               d_translation);
+            }
+            d_curr_color = d_prev_color;
+            d_curr_alpha = d_prev_alpha;
+        }
+        if (d_background_color != nullptr) {
+            d_background_color->x += d_curr_color.x;
+            d_background_color->y += d_curr_color.y;
+            d_background_color->z += d_curr_color.z;
+            d_background_color->w += d_curr_alpha;
+        }
+    }
+    return Vector4f{final_color[0], final_color[1], final_color[2], final_alpha};
+}
+DEVICE
+float sample_distance(const SceneData &scene,
+                      const Vector2f &screen_pt,
+                      float weight,
+                      const float *d_dist = nullptr,
+                      float *d_translation = nullptr) {
+    // screen_pt is in screen space ([0, 1), [0, 1)),
+    // need to transform to canvas space
+    auto pt = screen_pt;
+    pt.x *= scene.canvas_width;
+    pt.y *= scene.canvas_height;
+    // for each shape
+    auto min_group_id = -1;
+    auto min_distance = 0.f;
+    auto min_shape_id = -1;
+    auto closest_pt = Vector2f{0, 0};
+    auto min_path_info = ClosestPointPathInfo{-1, -1, 0};
+    for (int group_id = scene.num_shape_groups - 1; group_id >= 0; group_id--) {
+        auto s = -1;
+        auto p = Vector2f{0, 0};
+        ClosestPointPathInfo local_path_info;
+        auto d = infinity<float>();
+        if (compute_distance(scene, group_id, pt, infinity<float>(), &s, &p, &local_path_info, &d)) {
+            if (min_group_id == -1 || d < min_distance) {
+                min_distance = d;
+                min_group_id = group_id;
+                min_shape_id = s;
+                closest_pt = p;
+                min_path_info = local_path_info;
+            }
+        }
+    }
+    if (min_group_id == -1) {
+        return min_distance;
+    }
+    min_distance *= weight;
+    auto inside = false;
+    const ShapeGroup &shape_group = scene.shape_groups[min_group_id];
+    if (shape_group.fill_color != nullptr) {
+        inside = is_inside(scene,
+                           min_group_id,
+                           pt,
+                           nullptr);
+        if (inside) {
+            min_distance = -min_distance;
+        }
+    }
+    assert((min_group_id >= 0 && min_shape_id >= 0) || scene.num_shape_groups == 0);
+    if (d_dist != nullptr) {
+        auto d_abs_dist = inside ? -(*d_dist) : (*d_dist);
+        const ShapeGroup &shape_group = scene.shape_groups[min_group_id];
+        const Shape &shape = scene.shapes[min_shape_id];
+        ShapeGroup &d_shape_group = scene.d_shape_groups[min_group_id];
+        Shape &d_shape = scene.d_shapes[min_shape_id];
+        d_compute_distance(shape_group.canvas_to_shape,
+                           shape_group.shape_to_canvas,
+                           shape,
+                           pt,
+                           closest_pt,
+                           min_path_info,
+                           d_abs_dist,
+                           d_shape_group.shape_to_canvas,
+                           d_shape,
+                           d_translation);
+    }
+    return min_distance;
+}
+// Gather d_color from d_image inside the filter kernel, normalize by
+// weight_image.
+DEVICE
+Vector4f gather_d_color(const Filter &filter,
+                        const float *d_color_image,
+                        const float *weight_image,
+                        int width,
+                        int height,
+                        const Vector2f &pt) {
+    auto x = int(pt.x);
+    auto y = int(pt.y);
+    auto radius = filter.radius;
+    assert(radius > 0);
+    auto ri = (int)ceil(radius);
+    auto d_color = Vector4f{0, 0, 0, 0};
+    for (int dy = -ri; dy <= ri; dy++) {
+        for (int dx = -ri; dx <= ri; dx++) {
+            auto xx = x + dx;
+            auto yy = y + dy;
+            if (xx >= 0 && xx < width && yy >= 0 && yy < height) {
+                auto xc = xx + 0.5f;
+                auto yc = yy + 0.5f;
+                auto filter_weight =
+                    compute_filter_weight(filter, xc - pt.x, yc - pt.y);
+                // pixel = \sum weight * color / \sum weight
+                auto weight_sum = weight_image[yy * width + xx];
+                if (weight_sum > 0) {
+                    d_color += (filter_weight / weight_sum) * Vector4f{
+                        d_color_image[4 * (yy * width + xx) + 0],
+                        d_color_image[4 * (yy * width + xx) + 1],
+                        d_color_image[4 * (yy * width + xx) + 2],
+                        d_color_image[4 * (yy * width + xx) + 3],
+                    };
+                }
+            }
+        }
+    }
+    return d_color;
+}
+DEVICE
+float smoothstep(float d) {
+    auto t = clamp((d + 1.f) / 2.f, 0.f, 1.f);
+    return t * t * (3 - 2 * t);
+}
+DEVICE
+float d_smoothstep(float d, float d_ret) {
+    if (d < -1.f || d > 1.f) {
+        return 0.f;
+    }
+    auto t = (d + 1.f) / 2.f;
+    // ret = t * t * (3 - 2 * t)
+    //     = 3 * t * t - 2 * t * t * t
+    auto d_t = d_ret * (6 * t - 6 * t * t);
+    return d_t / 2.f;
+}
+DEVICE
+Vector4f sample_color_prefiltered(const SceneData &scene,
+                                  const Vector4f *background_color,
+                                  const Vector2f &screen_pt,
+                                  const Vector4f *d_color = nullptr,
+                                  Vector4f *d_background_color = nullptr,
+                                  float *d_translation = nullptr) {
+    // screen_pt is in screen space ([0, 1), [0, 1)),
+    // need to transform to canvas space
+    auto pt = screen_pt;
+    pt.x *= scene.canvas_width;
+    pt.y *= scene.canvas_height;
+    constexpr auto max_hit_shapes = 64;
+    constexpr auto max_bvh_stack_size = 64;
+    PrefilterFragment fragments[max_hit_shapes];
+    int bvh_stack[max_bvh_stack_size];
+    auto stack_size = 0;
+    auto num_fragments = 0;
+    bvh_stack[stack_size++] = 2 * scene.num_shape_groups - 2;
+    while (stack_size > 0) {
+        const BVHNode &node = scene.bvh_nodes[bvh_stack[--stack_size]];
+        if (node.child1 < 0) {
+            // leaf
+            auto group_id = node.child0;
+            const ShapeGroup &shape_group = scene.shape_groups[group_id];
+            if (shape_group.stroke_color != nullptr) {
+                auto min_shape_id = -1;
+                auto closest_pt = Vector2f{0, 0};
+                auto local_path_info = ClosestPointPathInfo{-1, -1, 0};
+                auto d = infinity<float>();
+                compute_distance(scene, group_id, pt, infinity<float>(),
+                                 &min_shape_id, &closest_pt, &local_path_info, &d);
+                assert(min_shape_id != -1);
+                const auto &shape = scene.shapes[min_shape_id];
+                auto w = smoothstep(fabs(d) + shape.stroke_width) -
+                         smoothstep(fabs(d) - shape.stroke_width);
+                if (w > 0) {
+                    auto color_alpha = sample_color(shape_group.stroke_color_type,
+                                                    shape_group.stroke_color,
+                                                    pt);
+                    color_alpha[3] *= w;
+                    PrefilterFragment f;
+                    f.color = Vector3f{color_alpha[0], color_alpha[1], color_alpha[2]};
+                    f.alpha = color_alpha[3];
+                    f.group_id = group_id;
+                    f.shape_id = min_shape_id;
+                    f.distance = d;
+                    f.closest_pt = closest_pt;
+                    f.is_stroke = true;
+                    f.path_info = local_path_info;
+                    f.within_distance = true;
+                    assert(num_fragments < max_hit_shapes);
+                    fragments[num_fragments++] = f;
+                }
+            }
+            if (shape_group.fill_color != nullptr) {
+                auto min_shape_id = -1;
+                auto closest_pt = Vector2f{0, 0};
+                auto local_path_info = ClosestPointPathInfo{-1, -1, 0};
+                auto d = infinity<float>();
+                auto found = compute_distance(scene,
+                                              group_id,
+                                              pt,
+                                              1.f,
+                                              &min_shape_id,
+                                              &closest_pt,
+                                              &local_path_info,
+                                              &d);
+                auto inside = is_inside(scene, group_id, pt, nullptr);
+                if (found || inside) {
+                    if (!inside) {
+                        d = -d;
+                    }
+                    auto w = smoothstep(d);
+                    if (w > 0) {
+                        auto color_alpha = sample_color(shape_group.fill_color_type,
+                                                        shape_group.fill_color,
+                                                        pt);
+                        color_alpha[3] *= w;
+                        PrefilterFragment f;
+                        f.color = Vector3f{color_alpha[0], color_alpha[1], color_alpha[2]};
+                        f.alpha = color_alpha[3];
+                        f.group_id = group_id;
+                        f.shape_id = min_shape_id;
+                        f.distance = d;
+                        f.closest_pt = closest_pt;
+                        f.is_stroke = false;
+                        f.path_info = local_path_info;
+                        f.within_distance = found;
+                        assert(num_fragments < max_hit_shapes);
+                        fragments[num_fragments++] = f;
+                    }
+                }
+            }
+        } else {
+            assert(node.child0 >= 0 && node.child1 >= 0);
+            const AABB &b0 = scene.bvh_nodes[node.child0].box;
+            if (inside(b0, pt, scene.bvh_nodes[node.child0].max_radius)) {
+                bvh_stack[stack_size++] = node.child0;
+            }
+            const AABB &b1 = scene.bvh_nodes[node.child1].box;
+            if (inside(b1, pt, scene.bvh_nodes[node.child1].max_radius)) {
+                bvh_stack[stack_size++] = node.child1;
+            }
+            assert(stack_size <= max_bvh_stack_size);
+        }
+    }
+    if (num_fragments <= 0) {
+        if (background_color != nullptr) {
+            if (d_background_color != nullptr) {
+                *d_background_color = *d_color;
+            }
+            return *background_color;
+        }
+        return Vector4f{0, 0, 0, 0};
+    }
+    // Sort the fragments from back to front (i.e. increasing order of group id)
+    // https://github.com/frigaut/yorick-imutil/blob/master/insort.c#L37
+    for (int i = 1; i < num_fragments; i++) {
+        auto j = i;
+        auto temp = fragments[j];
+        while (j > 0 && fragments[j - 1].group_id > temp.group_id) {
+            fragments[j] = fragments[j - 1];
+            j--;
+        }
+        fragments[j] = temp;
+    }
+    // Blend the color
+    Vector3f accum_color[max_hit_shapes];
+    float accum_alpha[max_hit_shapes];
+    auto first_alpha = 0.f;
+    auto first_color = Vector3f{0, 0, 0};
+    if (background_color != nullptr) {
+        first_alpha = background_color->w;
+        first_color = Vector3f{background_color->x,
+                               background_color->y,
+                               background_color->z};
+    }
+    for (int i = 0; i < num_fragments; i++) {
+        const PrefilterFragment &fragment = fragments[i];
+        auto new_color = fragment.color;
+        auto new_alpha = fragment.alpha;
+        auto prev_alpha = i > 0 ? accum_alpha[i - 1] : first_alpha;
+        auto prev_color = i > 0 ? accum_color[i - 1] : first_color;
+        // prev_color is alpha premultiplied, don't need to multiply with
+        // prev_alpha
+        accum_color[i] = prev_color * (1 - new_alpha) + new_alpha * new_color;
+        accum_alpha[i] = prev_alpha * (1 - new_alpha) + new_alpha;
+    }
+    auto final_color = accum_color[num_fragments - 1];
+    auto final_alpha = accum_alpha[num_fragments - 1];
+    if (final_alpha > 1e-6f) {
+        final_color /= final_alpha;
+    }
+    assert(isfinite(final_color));
+    assert(isfinite(final_alpha));
+    if (d_color != nullptr) {
+        // Backward pass
+        auto d_final_color = Vector3f{(*d_color)[0], (*d_color)[1], (*d_color)[2]};
+        auto d_final_alpha = (*d_color)[3];
+        auto d_curr_color = d_final_color;
+        auto d_curr_alpha = d_final_alpha;
+        if (final_alpha > 1e-6f) {
+            // final_color = curr_color / final_alpha
+            d_curr_color = d_final_color / final_alpha;
+            d_curr_alpha -= sum(d_final_color * final_color) / final_alpha;
+        }
+        assert(isfinite(*d_color));
+        assert(isfinite(d_curr_color));
+        assert(isfinite(d_curr_alpha));
+        for (int i = num_fragments - 1; i >= 0; i--) {
+            // color[n] = prev_color * (1 - new_alpha) + new_alpha * new_color;
+            // alpha[n] = prev_alpha * (1 - new_alpha) + new_alpha;
+            auto prev_alpha = i > 0 ? accum_alpha[i - 1] : first_alpha;
+            auto prev_color = i > 0 ? accum_color[i - 1] : first_color;
+            auto d_prev_alpha = d_curr_alpha * (1.f - fragments[i].alpha);
+            auto d_alpha_i = d_curr_alpha * (1.f - prev_alpha);
+            d_alpha_i += sum(d_curr_color * (fragments[i].color - prev_color));
+            auto d_prev_color = d_curr_color * (1 - fragments[i].alpha);
+            auto d_color_i = d_curr_color * fragments[i].alpha;
+            auto group_id = fragments[i].group_id;
+            if (fragments[i].is_stroke) {
+                const auto &shape = scene.shapes[fragments[i].shape_id];
+                auto d = fragments[i].distance;
+                auto abs_d_plus_width = fabs(d) + shape.stroke_width;
+                auto abs_d_minus_width = fabs(d) - shape.stroke_width;
+                auto w = smoothstep(abs_d_plus_width) -
+                         smoothstep(abs_d_minus_width);
+                if (w != 0) {
+                    auto d_w = w > 0 ? (fragments[i].alpha / w) * d_alpha_i : 0.f;
+                    d_alpha_i *= w;
+                    // Backprop to color
+                    d_sample_color(scene.shape_groups[group_id].stroke_color_type,
+                                   scene.shape_groups[group_id].stroke_color,
+                                   pt,
+                                   Vector4f{d_color_i[0], d_color_i[1], d_color_i[2], d_alpha_i},
+                                   scene.d_shape_groups[group_id].stroke_color,
+                                   d_translation);
+                    auto d_abs_d_plus_width = d_smoothstep(abs_d_plus_width, d_w);
+                    auto d_abs_d_minus_width = -d_smoothstep(abs_d_minus_width, d_w);
+                    auto d_d = d_abs_d_plus_width + d_abs_d_minus_width;
+                    if (d < 0) {
+                        d_d = -d_d;
+                    }
+                    auto d_stroke_width = d_abs_d_plus_width - d_abs_d_minus_width;
+                    const auto &shape_group = scene.shape_groups[group_id];
+                    ShapeGroup &d_shape_group = scene.d_shape_groups[group_id];
+                    Shape &d_shape = scene.d_shapes[fragments[i].shape_id];
+                    if (fabs(d_d) > 1e-10f) {
+                        d_compute_distance(shape_group.canvas_to_shape,
+                                           shape_group.shape_to_canvas,
+                                           shape,
+                                           pt,
+                                           fragments[i].closest_pt,
+                                           fragments[i].path_info,
+                                           d_d,
+                                           d_shape_group.shape_to_canvas,
+                                           d_shape,
+                                           d_translation);
+                    }
+                    atomic_add(&d_shape.stroke_width, d_stroke_width);
+                }
+            } else {
+                const auto &shape = scene.shapes[fragments[i].shape_id];
+                auto d = fragments[i].distance;
+                auto w = smoothstep(d);
+                if (w != 0) {
+                    // color_alpha[3] = color_alpha[3] * w;
+                    auto d_w = w > 0 ? (fragments[i].alpha / w) * d_alpha_i : 0.f;
+                    d_alpha_i *= w;
+                    d_sample_color(scene.shape_groups[group_id].fill_color_type,
+                                   scene.shape_groups[group_id].fill_color,
+                                   pt,
+                                   Vector4f{d_color_i[0], d_color_i[1], d_color_i[2], d_alpha_i},
+                                   scene.d_shape_groups[group_id].fill_color,
+                                   d_translation);
+                    // w = smoothstep(d)
+                    auto d_d = d_smoothstep(d, d_w);
+                    if (d < 0) {
+                        d_d = -d_d;
+                    }
+                    const auto &shape_group = scene.shape_groups[group_id];
+                    ShapeGroup &d_shape_group = scene.d_shape_groups[group_id];
+                    Shape &d_shape = scene.d_shapes[fragments[i].shape_id];
+                    if (fabs(d_d) > 1e-10f && fragments[i].within_distance) {
+                        d_compute_distance(shape_group.canvas_to_shape,
+                                           shape_group.shape_to_canvas,
+                                           shape,
+                                           pt,
+                                           fragments[i].closest_pt,
+                                           fragments[i].path_info,
+                                           d_d,
+                                           d_shape_group.shape_to_canvas,
+                                           d_shape,
+                                           d_translation);
+                    }
+                }
+            }
+            d_curr_color = d_prev_color;
+            d_curr_alpha = d_prev_alpha;
+        }
+        if (d_background_color != nullptr) {
+            d_background_color->x += d_curr_color.x;
+            d_background_color->y += d_curr_color.y;
+            d_background_color->z += d_curr_color.z;
+            d_background_color->w += d_curr_alpha;
+        }
+    }
+    return Vector4f{final_color[0], final_color[1], final_color[2], final_alpha};
+}
+struct weight_kernel {
+    DEVICE void operator()(int idx) {
+        auto rng_state = init_pcg32(idx, seed);
+        // height * width * num_samples_y * num_samples_x
+        auto sx = idx % num_samples_x;
+        auto sy = (idx / num_samples_x) % num_samples_y;
+        auto x = (idx / (num_samples_x * num_samples_y)) % width;
+        auto y = (idx / (num_samples_x * num_samples_y * width));
+        assert(y < height);
+        auto rx = next_pcg32_float(&rng_state);
+        auto ry = next_pcg32_float(&rng_state);
+        if (use_prefiltering) {
+            rx = ry = 0.5f;
+        }
+        auto pt = Vector2f{x + ((float)sx + rx) / num_samples_x,
+                           y + ((float)sy + ry) / num_samples_y};
+        auto radius = scene.filter->radius;
+        assert(radius >= 0);
+        auto ri = (int)ceil(radius);
+        for (int dy = -ri; dy <= ri; dy++) {
+            for (int dx = -ri; dx <= ri; dx++) {
+                auto xx = x + dx;
+                auto yy = y + dy;
+                if (xx >= 0 && xx < width && yy >= 0 && yy < height) {
+                    auto xc = xx + 0.5f;
+                    auto yc = yy + 0.5f;
+                    auto filter_weight = compute_filter_weight(*scene.filter,
+                                                               xc - pt.x,
+                                                               yc - pt.y);
+                    atomic_add(weight_image[yy * width + xx], filter_weight);
+                }
+            }
+        }
+    }
+    SceneData scene;
+    float *weight_image;
+    int width;
+    int height;
+    int num_samples_x;
+    int num_samples_y;
+    uint64_t seed;
+    bool use_prefiltering;
+};
+// We use a "mega kernel" for rendering
+struct render_kernel {
+    DEVICE void operator()(int idx) {
+        // height * width * num_samples_y * num_samples_x
+        auto pt = Vector2f{0, 0};
+        auto x = 0;
+        auto y = 0;
+        if (eval_positions == nullptr) {
+            auto rng_state = init_pcg32(idx, seed);
+            auto sx = idx % num_samples_x;
+            auto sy = (idx / num_samples_x) % num_samples_y;
+            x = (idx / (num_samples_x * num_samples_y)) % width;
+            y = (idx / (num_samples_x * num_samples_y * width));
+            assert(x < width && y < height);
+            auto rx = next_pcg32_float(&rng_state);
+            auto ry = next_pcg32_float(&rng_state);
+            if (use_prefiltering) {
+                rx = ry = 0.5f;
+            }
+            pt = Vector2f{x + ((float)sx + rx) / num_samples_x,
+                          y + ((float)sy + ry) / num_samples_y};
+        } else {
+            pt = Vector2f{eval_positions[2 * idx],
+                          eval_positions[2 * idx + 1]};
+            x = int(pt.x);
+            y = int(pt.y);
+        }
+        // normalize pt to [0, 1]
+        auto npt = pt;
+        npt.x /= width;
+        npt.y /= height;
+        auto num_samples = num_samples_x * num_samples_y;
+        if (render_image != nullptr || d_render_image != nullptr) {
+            Vector4f d_color = Vector4f{0, 0, 0, 0};
+            if (d_render_image != nullptr) {
+                // Gather d_color from d_render_image inside the filter kernel
+                // normalize using weight_image
+                d_color = gather_d_color(*scene.filter,
+                                         d_render_image,
+                                         weight_image,
+                                         width,
+                                         height,
+                                         pt);
+            }
+            auto color = Vector4f{0, 0, 0, 0};
+            if (use_prefiltering) {
+                color = sample_color_prefiltered(scene,
+                    background_image != nullptr ? (const Vector4f*)&background_image[4 * ((y * width) + x)] : nullptr,
+                    npt,
+                    d_render_image != nullptr ? &d_color : nullptr,
+                    d_background_image != nullptr ? (Vector4f*)&d_background_image[4 * ((y * width) + x)] : nullptr,
+                    d_translation != nullptr ? &d_translation[2 * (y * width + x)] : nullptr);
+            } else {
+                color = sample_color(scene,
+                    background_image != nullptr ? (const Vector4f*)&background_image[4 * ((y * width) + x)] : nullptr,
+                    npt,
+                    d_render_image != nullptr ? &d_color : nullptr,
+                    nullptr,
+                    d_background_image != nullptr ? (Vector4f*)&d_background_image[4 * ((y * width) + x)] : nullptr,
+                    d_translation != nullptr ? &d_translation[2 * (y * width + x)] : nullptr);
+            }
+            assert(isfinite(color));
+            // Splat color onto render_image
+            auto radius = scene.filter->radius;
+            assert(radius >= 0);
+            auto ri = (int)ceil(radius);
+            for (int dy = -ri; dy <= ri; dy++) {
+                for (int dx = -ri; dx <= ri; dx++) {
+                    auto xx = x + dx;
+                    auto yy = y + dy;
+                    if (xx >= 0 && xx < width && yy >= 0 && yy < height &&
+                            weight_image[yy * width + xx] > 0) {
+                        auto weight_sum = weight_image[yy * width + xx];
+                        auto xc = xx + 0.5f;
+                        auto yc = yy + 0.5f;
+                        auto filter_weight = compute_filter_weight(*scene.filter,
+                                                                   xc - pt.x,
+                                                                   yc - pt.y);
+                        auto weighted_color = filter_weight * color / weight_sum;
+                        if (render_image != nullptr) {
+                            atomic_add(render_image[4 * (yy * width + xx) + 0],
+                                       weighted_color[0]);
+                            atomic_add(render_image[4 * (yy * width + xx) + 1],
+                                       weighted_color[1]);
+                            atomic_add(render_image[4 * (yy * width + xx) + 2],
+                                       weighted_color[2]);
+                            atomic_add(render_image[4 * (yy * width + xx) + 3],
+                                       weighted_color[3]);
+                        }
+                        if (d_render_image != nullptr) {
+                            // Backprop to filter_weight
+                            // pixel = \sum weight * color / \sum weight
+                            auto d_pixel = Vector4f{
+                                d_render_image[4 * (yy * width + xx) + 0],
+                                d_render_image[4 * (yy * width + xx) + 1],
+                                d_render_image[4 * (yy * width + xx) + 2],
+                                d_render_image[4 * (yy * width + xx) + 3],
+                            };
+                            auto d_weight =
+                                (dot(d_pixel, color) * weight_sum -
+                                 filter_weight * dot(d_pixel, color) * (weight_sum - filter_weight)) /
+                                square(weight_sum);
+                            d_compute_filter_weight(*scene.filter,
+                                                    xc - pt.x,
+                                                    yc - pt.y,
+                                                    d_weight,
+                                                    scene.d_filter);
+                        }
+                    }
+                }
+            }
+        }
+        if (sdf_image != nullptr || d_sdf_image != nullptr) {
+            float d_dist = 0.f;
+            if (d_sdf_image != nullptr) {
+                if (eval_positions == nullptr) {
+                    d_dist = d_sdf_image[y * width + x];
+                } else {
+                    d_dist = d_sdf_image[idx];
+                }
+            }
+            auto weight = eval_positions == nullptr ? 1.f / num_samples : 1.f;
+            auto dist = sample_distance(scene, npt, weight,
+                d_sdf_image != nullptr ? &d_dist : nullptr,
+                d_translation != nullptr ? &d_translation[2 * (y * width + x)] : nullptr);
+            if (sdf_image != nullptr) {
+                if (eval_positions == nullptr) {
+                    atomic_add(sdf_image[y * width + x], dist);
+                } else {
+                    atomic_add(sdf_image[idx], dist);
+                }
+            }
+        }
+    }
+    SceneData scene;
+    float *background_image;
+    float *render_image;
+    float *weight_image;
+    float *sdf_image;
+    float *d_background_image;
+    float *d_render_image;
+    float *d_sdf_image;
+    float *d_translation;
+    int width;
+    int height;
+    int num_samples_x;
+    int num_samples_y;
+    uint64_t seed;
+    bool use_prefiltering;
+    float *eval_positions;
+};
+struct BoundarySample {
+    Vector2f pt;
+    Vector2f local_pt;
+    Vector2f normal;
+    int shape_group_id;
+    int shape_id;
+    float t;
+    BoundaryData data;
+    float pdf;
+};
+struct sample_boundary_kernel {
+    DEVICE void operator()(int idx) {
+        boundary_samples[idx].pt = Vector2f{0, 0};
+        boundary_samples[idx].shape_id = -1;
+        boundary_ids[idx] = idx;
+        morton_codes[idx] = 0;
+        auto rng_state = init_pcg32(idx, seed);
+        auto u = next_pcg32_float(&rng_state);
+        // Sample a shape
+        auto sample_id = sample(scene.sample_shapes_cdf,
+                                scene.num_total_shapes,
+                                u);
+        assert(sample_id >= 0 && sample_id < scene.num_total_shapes);
+        auto shape_id = scene.sample_shape_id[sample_id];
+        assert(shape_id >= 0 && shape_id < scene.num_shapes);
+        auto shape_group_id = scene.sample_group_id[sample_id];
+        assert(shape_group_id >= 0 && shape_group_id < scene.num_shape_groups);
+        auto shape_pmf = scene.sample_shapes_pmf[shape_id];
+        if (shape_pmf <= 0) {
+            return;
+        }
+        // Sample a point on the boundary of the shape
+        auto boundary_pdf = 0.f;
+        auto normal = Vector2f{0, 0};
+        auto t = next_pcg32_float(&rng_state);
+        BoundaryData boundary_data;
+        const ShapeGroup &shape_group = scene.shape_groups[shape_group_id];
+        auto local_boundary_pt = sample_boundary(
+            scene, shape_group_id, shape_id,
+            t, normal, boundary_pdf, boundary_data);
+        if (boundary_pdf <= 0) {
+            return;
+        }
+        // local_boundary_pt & normal are in shape's local space,
+        // transform them to canvas space
+        auto boundary_pt = xform_pt(shape_group.shape_to_canvas, local_boundary_pt);
+        normal = xform_normal(shape_group.canvas_to_shape, normal);
+        // Normalize boundary_pt to [0, 1)
+        boundary_pt.x /= scene.canvas_width;
+        boundary_pt.y /= scene.canvas_height;
+        boundary_samples[idx].pt = boundary_pt;
+        boundary_samples[idx].local_pt = local_boundary_pt;
+        boundary_samples[idx].normal = normal;
+        boundary_samples[idx].shape_group_id = shape_group_id;
+        boundary_samples[idx].shape_id = shape_id;
+        boundary_samples[idx].t = t;
+        boundary_samples[idx].data = boundary_data;
+        boundary_samples[idx].pdf = shape_pmf * boundary_pdf;
+        TVector2<uint32_t> p_i{boundary_pt.x * 1023, boundary_pt.y * 1023};
+        morton_codes[idx] = (expand_bits(p_i.x) << 1u) |
+                            (expand_bits(p_i.y) << 0u);
+    }
+    SceneData scene;
+    uint64_t seed;
+    BoundarySample *boundary_samples;
+    int *boundary_ids;
+    uint32_t *morton_codes;
+};
+struct render_edge_kernel {
+    DEVICE void operator()(int idx) {
+        auto bid = boundary_ids[idx];
+        if (boundary_samples[bid].shape_id == -1) {
+            return;
+        }
+        auto boundary_pt = boundary_samples[bid].pt;
+        auto local_boundary_pt = boundary_samples[bid].local_pt;
+        auto normal = boundary_samples[bid].normal;
+        auto shape_group_id = boundary_samples[bid].shape_group_id;
+        auto shape_id = boundary_samples[bid].shape_id;
+        auto t = boundary_samples[bid].t;
+        auto boundary_data = boundary_samples[bid].data;
+        auto pdf = boundary_samples[bid].pdf;
+        const ShapeGroup &shape_group = scene.shape_groups[shape_group_id];
+        auto bx = int(boundary_pt.x * width);
+        auto by = int(boundary_pt.y * height);
+        if (bx < 0 || bx >= width || by < 0 || by >= height) {
+            return;
+        }
+        // Sample the two sides of the boundary
+        auto inside_query = EdgeQuery{shape_group_id, shape_id, false};
+        auto outside_query = EdgeQuery{shape_group_id, shape_id, false};
+        auto color_inside = sample_color(scene,
+            background_image != nullptr ? (const Vector4f *)&background_image[4 * ((by * width) + bx)] : nullptr,
+            boundary_pt - 1e-4f * normal,
+            nullptr, &inside_query);
+        auto color_outside = sample_color(scene,
+            background_image != nullptr ? (const Vector4f *)&background_image[4 * ((by * width) + bx)] : nullptr,
+            boundary_pt + 1e-4f * normal,
+            nullptr, &outside_query);
+        if (!inside_query.hit && !outside_query.hit) {
+            // occluded
+            return;
+        }
+        if (!inside_query.hit) {
+            normal = -normal;
+            swap_(inside_query, outside_query);
+            swap_(color_inside, color_outside);
+        }
+        // Boundary point in screen space
+        auto sboundary_pt = boundary_pt;
+        sboundary_pt.x *= width;
+        sboundary_pt.y *= height;
+        auto d_color = gather_d_color(*scene.filter,
+                                      d_render_image,
+                                      weight_image,
+                                      width,
+                                      height,
+                                      sboundary_pt);
+        // Normalization factor
+        d_color /= float(scene.canvas_width * scene.canvas_height);
+        assert(isfinite(d_color));
+        assert(isfinite(pdf) && pdf > 0);
+        auto contrib = dot(color_inside - color_outside, d_color) / pdf;
+        ShapeGroup &d_shape_group = scene.d_shape_groups[shape_group_id];
+        accumulate_boundary_gradient(scene.shapes[shape_id],
+            contrib, t, normal, boundary_data, scene.d_shapes[shape_id],
+            shape_group.shape_to_canvas, local_boundary_pt, d_shape_group.shape_to_canvas);
+        // Don't need to backprop to filter weights:
+        // \int f'(x) g(x) dx doesn't contain discontinuities
+        // if f is continuous, even if g is discontinuous
+        if (d_translation != nullptr) {
+            // According to Reynold transport theorem,
+            // the Jacobian of the boundary integral is dot(velocity, normal)
+            // The velocity of the object translating x is (1, 0)
+            // The velocity of the object translating y is (0, 1)
+            atomic_add(&d_translation[2 * (by * width + bx) + 0], normal.x * contrib);
+            atomic_add(&d_translation[2 * (by * width + bx) + 1], normal.y * contrib);
+        }
+    }
+    SceneData scene;
+    const float *background_image;
+    const BoundarySample *boundary_samples;
+    const int *boundary_ids;
+    float *weight_image;
+    float *d_render_image;
+    float *d_translation;
+    int width;
+    int height;
+    int num_samples_x;
+    int num_samples_y;
+};
+void render(std::shared_ptr<Scene> scene,
+            ptr<float> background_image,
+            ptr<float> render_image,
+            ptr<float> render_sdf,
+            int width,
+            int height,
+            int num_samples_x,
+            int num_samples_y,
+            uint64_t seed,
+            ptr<float> d_background_image,
+            ptr<float> d_render_image,
+            ptr<float> d_render_sdf,
+            ptr<float> d_translation,
+            bool use_prefiltering,
+            ptr<float> eval_positions,
+            int num_eval_positions) {
+#ifdef __NVCC__
+    int old_device_id = -1;
+    if (scene->use_gpu) {
+        checkCuda(cudaGetDevice(&old_device_id));
+        if (scene->gpu_index != -1) {
+            checkCuda(cudaSetDevice(scene->gpu_index));
+        }
+    }
+#endif
+    parallel_init();
+    float *weight_image = nullptr;
+    // Allocate and zero the weight image
+    if (scene->use_gpu) {
+#ifdef __CUDACC__
+        if (eval_positions.get() == nullptr) {
+            checkCuda(cudaMallocManaged(&weight_image, width * height * sizeof(float)));
+            cudaMemset(weight_image, 0, width * height * sizeof(float));
+        }
+#else
+        assert(false);
+#endif
+    } else {
+        if (eval_positions.get() == nullptr) {
+            weight_image = (float*)malloc(width * height * sizeof(float));
+            memset(weight_image, 0, width * height * sizeof(float));
+        }
+    }
+    if (render_image.get() != nullptr || d_render_image.get() != nullptr ||
+        render_sdf.get() != nullptr || d_render_sdf.get() != nullptr) {
+        if (weight_image != nullptr) {
+            parallel_for(weight_kernel{
+                get_scene_data(*scene.get()),
+                weight_image,
+                width,
+                height,
+                num_samples_x,
+                num_samples_y,
+                seed
+            }, width * height * num_samples_x * num_samples_y, scene->use_gpu);
+        }
+        auto num_samples = eval_positions.get() == nullptr ?
+            width * height * num_samples_x * num_samples_y : num_eval_positions;
+        parallel_for(render_kernel{
+            get_scene_data(*scene.get()),
+            background_image.get(),
+            render_image.get(),
+            weight_image,
+            render_sdf.get(),
+            d_background_image.get(),
+            d_render_image.get(),
+            d_render_sdf.get(),
+            d_translation.get(),
+            width,
+            height,
+            num_samples_x,
+            num_samples_y,
+            seed,
+            use_prefiltering,
+            eval_positions.get()
+        }, num_samples, scene->use_gpu);
+    }
+    // Boundary sampling
+    if (!use_prefiltering && d_render_image.get() != nullptr) {
+        auto num_samples = width * height * num_samples_x * num_samples_y;
+        BoundarySample *boundary_samples = nullptr;
+        int *boundary_ids = nullptr; // for sorting
+        uint32_t *morton_codes = nullptr; // for sorting
+        // Allocate boundary samples
+        if (scene->use_gpu) {
+#ifdef __CUDACC__
+            checkCuda(cudaMallocManaged(&boundary_samples,
+                num_samples * sizeof(BoundarySample)));
+            checkCuda(cudaMallocManaged(&boundary_ids,
+                num_samples * sizeof(int)));
+            checkCuda(cudaMallocManaged(&morton_codes,
+                num_samples * sizeof(uint32_t)));
+#else
+            assert(false);
+    #endif
+        } else {
+            boundary_samples = (BoundarySample*)malloc(
+                num_samples * sizeof(BoundarySample));
+            boundary_ids = (int*)malloc(
+                num_samples * sizeof(int));
+            morton_codes = (uint32_t*)malloc(
+                num_samples * sizeof(uint32_t));
+        }
+        // Edge sampling
+        // We sort the boundary samples for better thread coherency
+        parallel_for(sample_boundary_kernel{
+            get_scene_data(*scene.get()),
+            seed,
+            boundary_samples,
+            boundary_ids,
+            morton_codes
+        }, num_samples, scene->use_gpu);
+        if (scene->use_gpu) {
+            thrust::sort_by_key(thrust::device, morton_codes, morton_codes + num_samples, boundary_ids);
+        } else {
+            // Don't need to sort for CPU, we are not using SIMD hardware anyway.
+            // thrust::sort_by_key(thrust::host, morton_codes, morton_codes + num_samples, boundary_ids);
+        }
+        parallel_for(render_edge_kernel{
+            get_scene_data(*scene.get()),
+            background_image.get(),
+            boundary_samples,
+            boundary_ids,
+            weight_image,
+            d_render_image.get(),
+            d_translation.get(),
+            width,
+            height,
+            num_samples_x,
+            num_samples_y
+        }, num_samples, scene->use_gpu);
+        if (scene->use_gpu) {
+#ifdef __CUDACC__
+            checkCuda(cudaFree(boundary_samples));
+            checkCuda(cudaFree(boundary_ids));
+            checkCuda(cudaFree(morton_codes));
+#else
+            assert(false);
+#endif
+        } else {
+            free(boundary_samples);
+            free(boundary_ids);
+            free(morton_codes);
+        }
+    }
+    // Clean up weight image
+    if (scene->use_gpu) {
+#ifdef __CUDACC__
+        checkCuda(cudaFree(weight_image));
+#else
+        assert(false);
+#endif
+    } else {
+        free(weight_image);
+    }
+    if (scene->use_gpu) {
+        cuda_synchronize();
+    }
+    parallel_cleanup();
+#ifdef __NVCC__
+    if (old_device_id != -1) {
+        checkCuda(cudaSetDevice(old_device_id));
+    }
+#endif
+}
+PYBIND11_MODULE(diffvg, m) {
+    m.doc() = "Differential Vector Graphics";
+    py::class_<ptr<void>>(m, "void_ptr")
+        .def(py::init<std::size_t>())
+        .def("as_size_t", &ptr<void>::as_size_t);
+    py::class_<ptr<float>>(m, "float_ptr")
+        .def(py::init<std::size_t>());
+    py::class_<ptr<int>>(m, "int_ptr")
+        .def(py::init<std::size_t>());
+    py::class_<Vector2f>(m, "Vector2f")
+        .def(py::init<float, float>())
+        .def_readwrite("x", &Vector2f::x)
+        .def_readwrite("y", &Vector2f::y);
+    py::class_<Vector3f>(m, "Vector3f")
+        .def(py::init<float, float, float>())
+        .def_readwrite("x", &Vector3f::x)
+        .def_readwrite("y", &Vector3f::y)
+        .def_readwrite("z", &Vector3f::z);
+    py::class_<Vector4f>(m, "Vector4f")
+        .def(py::init<float, float, float, float>())
+        .def_readwrite("x", &Vector4f::x)
+        .def_readwrite("y", &Vector4f::y)
+        .def_readwrite("z", &Vector4f::z)
+        .def_readwrite("w", &Vector4f::w);
+    py::enum_<ShapeType>(m, "ShapeType")
+        .value("circle", ShapeType::Circle)
+        .value("ellipse", ShapeType::Ellipse)
+        .value("path", ShapeType::Path)
+        .value("rect", ShapeType::Rect);
+    py::class_<Circle>(m, "Circle")
+        .def(py::init<float, Vector2f>())
+        .def("get_ptr", &Circle::get_ptr)
+        .def_readonly("radius", &Circle::radius)
+        .def_readonly("center", &Circle::center);
+    py::class_<Ellipse>(m, "Ellipse")
+        .def(py::init<Vector2f, Vector2f>())
+        .def("get_ptr", &Ellipse::get_ptr)
+        .def_readonly("radius", &Ellipse::radius)
+        .def_readonly("center", &Ellipse::center);
+    py::class_<Path>(m, "Path")
+        .def(py::init<ptr<int>, ptr<float>, ptr<float>, int, int, bool, bool>())
+        .def("get_ptr", &Path::get_ptr)
+        .def("has_thickness", &Path::has_thickness)
+        .def("copy_to", &Path::copy_to)
+        .def_readonly("num_points", &Path::num_points);
+    py::class_<Rect>(m, "Rect")
+        .def(py::init<Vector2f, Vector2f>())
+        .def("get_ptr", &Rect::get_ptr)
+        .def_readonly("p_min", &Rect::p_min)
+        .def_readonly("p_max", &Rect::p_max);
+    py::enum_<ColorType>(m, "ColorType")
+        .value("constant", ColorType::Constant)
+        .value("linear_gradient", ColorType::LinearGradient)
+        .value("radial_gradient", ColorType::RadialGradient);
+    py::class_<Constant>(m, "Constant")
+        .def(py::init<Vector4f>())
+        .def("get_ptr", &Constant::get_ptr)
+        .def_readonly("color", &Constant::color);
+    py::class_<LinearGradient>(m, "LinearGradient")
+        .def(py::init<Vector2f, Vector2f, int, ptr<float>, ptr<float>>())
+        .def("get_ptr", &LinearGradient::get_ptr)
+        .def("copy_to", &LinearGradient::copy_to)
+        .def_readonly("begin", &LinearGradient::begin)
+        .def_readonly("end", &LinearGradient::end)
+        .def_readonly("num_stops", &LinearGradient::num_stops);
+    py::class_<RadialGradient>(m, "RadialGradient")
+        .def(py::init<Vector2f, Vector2f, int, ptr<float>, ptr<float>>())
+        .def("get_ptr", &RadialGradient::get_ptr)
+        .def("copy_to", &RadialGradient::copy_to)
+        .def_readonly("center", &RadialGradient::center)
+        .def_readonly("radius", &RadialGradient::radius)
+        .def_readonly("num_stops", &RadialGradient::num_stops);
+    py::class_<Shape>(m, "Shape")
+        .def(py::init<ShapeType, ptr<void>, float>())
+        .def("as_circle", &Shape::as_circle)
+        .def("as_ellipse", &Shape::as_ellipse)
+        .def("as_path", &Shape::as_path)
+        .def("as_rect", &Shape::as_rect)
+        .def_readonly("type", &Shape::type)
+        .def_readonly("stroke_width", &Shape::stroke_width);
+    py::class_<ShapeGroup>(m, "ShapeGroup")
+        .def(py::init<ptr<int>,
+                      int,
+                      ColorType,
+                      ptr<void>,
+                      ColorType,
+                      ptr<void>,
+                      bool,
+                      ptr<float>>())
+        .def("fill_color_as_constant", &ShapeGroup::fill_color_as_constant)
+        .def("fill_color_as_linear_gradient", &ShapeGroup::fill_color_as_linear_gradient)
+        .def("fill_color_as_radial_gradient", &ShapeGroup::fill_color_as_radial_gradient)
+        .def("stroke_color_as_constant", &ShapeGroup::stroke_color_as_constant)
+        .def("stroke_color_as_linear_gradient", &ShapeGroup::stroke_color_as_linear_gradient)
+        .def("stroke_color_as_radial_gradient", &ShapeGroup::fill_color_as_radial_gradient)
+        .def("has_fill_color", &ShapeGroup::has_fill_color)
+        .def("has_stroke_color", &ShapeGroup::has_stroke_color)
+        .def("copy_to", &ShapeGroup::copy_to)
+        .def_readonly("fill_color_type", &ShapeGroup::fill_color_type)
+        .def_readonly("stroke_color_type", &ShapeGroup::stroke_color_type);
+    py::enum_<FilterType>(m, "FilterType")
+        .value("box", FilterType::Box)
+        .value("tent", FilterType::Tent)
+        .value("parabolic", FilterType::RadialParabolic)
+        .value("hann", FilterType::Hann);
+    py::class_<Filter>(m, "Filter")
+        .def(py::init<FilterType,
+                      float>());
+    py::class_<Scene, std::shared_ptr<Scene>>(m, "Scene")
+        .def(py::init<int,
+                      int,
+                      const std::vector<const Shape*> &,
+                      const std::vector<const ShapeGroup*> &,
+                      const Filter &,
+                      bool,
+                      int>())
+        .def("get_d_shape", &Scene::get_d_shape)
+        .def("get_d_shape_group", &Scene::get_d_shape_group)
+        .def("get_d_filter_radius", &Scene::get_d_filter_radius)
+        .def_readonly("num_shapes", &Scene::num_shapes)
+        .def_readonly("num_shape_groups", &Scene::num_shape_groups);
+    m.def("render", &render, "");
+}

diffvg.h ADDED Viewed

	@@ -0,0 +1,156 @@

+#pragma once
+#ifdef __NVCC__
+    #define DEVICE __device__ __host__
+#else
+    #define DEVICE
+#endif
+#ifndef __NVCC__
+    #include <cmath>
+    namespace {
+        inline float fmodf(float a, float b) {
+            return std::fmod(a, b);
+        }
+        inline double fmod(double a, double b) {
+            return std::fmod(a, b);
+        }
+    }
+    using std::isfinite;
+#endif
+#ifndef M_PI
+#define M_PI 3.14159265358979323846
+#endif
+#include <cstdint>
+#include <atomic>
+// We use Real for most of the internal computation.
+// However, for PyTorch interfaces, Optix Prime and Embree queries
+// we use float
+using Real = float;
+template <typename T>
+DEVICE
+inline T square(const T &x) {
+    return x * x;
+}
+template <typename T>
+DEVICE
+inline T cubic(const T &x) {
+    return x * x * x;
+}
+template <typename T>
+DEVICE
+inline T clamp(const T &v, const T &lo, const T &hi) {
+    if (v < lo) return lo;
+    else if (v > hi) return hi;
+    else return v;
+}
+DEVICE
+inline int modulo(int a, int b) {
+    auto r = a % b;
+    return (r < 0) ? r+b : r;
+}
+DEVICE
+inline float modulo(float a, float b) {
+    float r = ::fmodf(a, b);
+    return (r < 0.0f) ? r+b : r;
+}
+DEVICE
+inline double modulo(double a, double b) {
+    double r = ::fmod(a, b);
+    return (r < 0.0) ? r+b : r;
+}
+template <typename T>
+DEVICE
+inline T max(const T &a, const T &b) {
+    return a > b ? a : b;
+}
+template <typename T>
+DEVICE
+inline T min(const T &a, const T &b) {
+    return a < b ? a : b;
+}
+/// Return ceil(x/y) for integers x and y
+inline int idiv_ceil(int x, int y) {
+    return (x + y-1) / y;
+}
+template <typename T>
+DEVICE
+inline void swap_(T &a, T &b) {
+    T tmp = a;
+    a = b;
+    b = tmp;
+}
+inline double log2(double x) {
+    return log(x) / log(Real(2));
+}
+template <typename T>
+DEVICE
+inline T safe_acos(const T &x) {
+    if (x >= 1) return T(0);
+    else if(x <= -1) return T(M_PI);
+    return acos(x);
+}
+// For Morton code computation. This can be made faster.
+DEVICE
+inline uint32_t expand_bits(uint32_t x) {
+    // Insert one zero after every bit given a 10-bit integer
+    constexpr uint64_t mask = 0x1u;
+    // We start from LSB (bit 31)
+    auto result = (x & (mask << 0u));
+    result |= ((x & (mask << 1u)) << 1u);
+    result |= ((x & (mask << 2u)) << 2u);
+    result |= ((x & (mask << 3u)) << 3u);
+    result |= ((x & (mask << 4u)) << 4u);
+    result |= ((x & (mask << 5u)) << 5u);
+    result |= ((x & (mask << 6u)) << 6u);
+    result |= ((x & (mask << 7u)) << 7u);
+    result |= ((x & (mask << 8u)) << 8u);
+    result |= ((x & (mask << 9u)) << 9u);
+    return result;
+}
+// DEVICE
+// inline int clz(uint64_t x) {
+// #ifdef __CUDA_ARCH__
+//     return __clzll(x);
+// #else
+//     // TODO: use _BitScanReverse in windows
+//     return x == 0 ? 64 : __builtin_clzll(x);
+// #endif
+// }
+// DEVICE
+// inline int ffs(uint8_t x) {
+// #ifdef __CUDA_ARCH__
+//     return __ffs(x);
+// #else
+//     // TODO: use _BitScanReverse in windows
+//     return __builtin_ffs(x);
+// #endif
+// }
+// DEVICE
+// inline int popc(uint8_t x) {
+// #ifdef __CUDA_ARCH__
+//     return __popc(x);
+// #else
+//     // TODO: use _popcnt in windows
+//     return __builtin_popcount(x);
+// #endif
+// }

edge_query.h ADDED Viewed

	@@ -0,0 +1,7 @@

+#pragma once
+struct EdgeQuery {
+	int shape_group_id;
+    int shape_id;
+    bool hit; // Do we hit the specified shape_group_id & shape_id?
+};

examples/1.png ADDED Viewed

examples/2.png ADDED Viewed

examples/3.jpg ADDED Viewed

examples/4.png ADDED Viewed

examples/5.png ADDED Viewed

figures/smile.png ADDED Viewed

filter.h ADDED Viewed

	@@ -0,0 +1,106 @@

+#pragma once
+#include "diffvg.h"
+#include "atomic.h"
+enum class FilterType {
+    Box,
+    Tent,
+    RadialParabolic, // 4/3(1 - (d/r))
+    Hann // https://en.wikipedia.org/wiki/Window_function#Hann_and_Hamming_windows
+};
+struct Filter {
+    FilterType type;
+    float radius;
+};
+struct DFilter {
+    float radius;
+};
+DEVICE
+inline
+float compute_filter_weight(const Filter &filter,
+                            float dx,
+                            float dy) {
+    if (fabs(dx) > filter.radius || fabs(dy) > filter.radius) {
+        return 0;
+    }
+    if (filter.type == FilterType::Box) {
+        return 1.f / square(2 * filter.radius);
+    } else if (filter.type == FilterType::Tent) {
+        return (filter.radius - fabs(dx)) * (filter.radius - fabs(dy)) /
+               square(square(filter.radius));
+    } else if (filter.type == FilterType::RadialParabolic) {
+        return (4.f / 3.f) * (1 - square(dx / filter.radius)) *
+               (4.f / 3.f) * (1 - square(dy / filter.radius));
+    } else {
+        assert(filter.type == FilterType::Hann);
+        // normalize dx, dy to [0, 1]
+        auto ndx = (dx / (2*filter.radius)) + 0.5f;
+        auto ndy = (dy / (2*filter.radius)) + 0.5f;
+        // the normalization factor is R^2
+        return 0.5f * (1.f - cos(float(2 * M_PI) * ndx)) *
+               0.5f * (1.f - cos(float(2 * M_PI) * ndy)) /
+               square(filter.radius);
+    }
+}
+DEVICE
+inline
+void d_compute_filter_weight(const Filter &filter,
+                             float dx,
+                             float dy,
+                             float d_return,
+                             DFilter *d_filter) {
+    if (filter.type == FilterType::Box) {
+        // return 1.f / square(2 * filter.radius);
+        atomic_add(d_filter->radius,
+            d_return * (-2) * 2 * filter.radius / cubic(2 * filter.radius));
+    } else if (filter.type == FilterType::Tent) {
+        // return (filer.radius - fabs(dx)) * (filer.radius - fabs(dy)) /
+        //        square(square(filter.radius));
+        auto fx = filter.radius - fabs(dx);
+        auto fy = filter.radius - fabs(dy);
+        auto norm = 1 / square(filter.radius);
+        auto d_fx = d_return * fy * norm;
+        auto d_fy = d_return * fx * norm;
+        auto d_norm = d_return * fx * fy;
+        atomic_add(d_filter->radius,
+            d_fx + d_fy + (-4) * d_norm / pow(filter.radius, 5));
+    } else if (filter.type == FilterType::RadialParabolic) {
+        // return (4.f / 3.f) * (1 - square(dx / filter.radius)) *
+        //        (4.f / 3.f) * (1 - square(dy / filter.radius));
+        // auto d_square_x = d_return * (-4.f / 3.f);
+        // auto d_square_y = d_return * (-4.f / 3.f);
+        auto r3 = filter.radius * filter.radius * filter.radius;
+        auto d_radius = -(2 * square(dx) + 2 * square(dy)) / r3;
+        atomic_add(d_filter->radius, d_radius);
+    } else {
+        assert(filter.type == FilterType::Hann);
+        // // normalize dx, dy to [0, 1]
+        // auto ndx = (dx / (2*filter.radius)) + 0.5f;
+        // auto ndy = (dy / (2*filter.radius)) + 0.5f;
+        // // the normalization factor is R^2
+        // return 0.5f * (1.f - cos(float(2 * M_PI) * ndx)) *
+        //        0.5f * (1.f - cos(float(2 * M_PI) * ndy)) /
+        //        square(filter.radius);
+        // normalize dx, dy to [0, 1]
+        auto ndx = (dx / (2*filter.radius)) + 0.5f;
+        auto ndy = (dy / (2*filter.radius)) + 0.5f;
+        auto fx = 0.5f * (1.f - cos(float(2*M_PI) * ndx));
+        auto fy = 0.5f * (1.f - cos(float(2*M_PI) * ndy));
+        auto norm = 1 / square(filter.radius);
+        auto d_fx = d_return * fy * norm;
+        auto d_fy = d_return * fx * norm;
+        auto d_norm = d_return * fx * fy;
+        auto d_ndx = d_fx * 0.5f * sin(float(2*M_PI) * ndx) * float(2*M_PI);
+        auto d_ndy = d_fy * 0.5f * sin(float(2*M_PI) * ndy) * float(2*M_PI);
+        atomic_add(d_filter->radius,
+            d_ndx * (-2*dx / square(2*filter.radius)) +
+            d_ndy * (-2*dy / square(2*filter.radius)) +
+            (-2) * d_norm / cubic(filter.radius));
+    }
+}

icon/logo.ico ADDED Viewed

img_example/Millenial-at-work.jpg ADDED Viewed

img_example/bus.jpg ADDED Viewed

img_example/zidane.jpg ADDED Viewed

main.py ADDED Viewed

	@@ -0,0 +1,1040 @@

+"""
+Here are some use cases:
+python main.py --config config/all.yaml --experiment experiment_8x1 --signature demo1 --target data/demo1.png
+"""
+import pydiffvg
+import torch
+import cv2
+import matplotlib.pyplot as plt
+import random
+import argparse
+import math
+import errno
+from tqdm import tqdm
+from torch.optim.lr_scheduler import CosineAnnealingLR, LambdaLR
+from torch.nn.functional import adaptive_avg_pool2d
+import warnings
+warnings.filterwarnings("ignore")
+import PIL
+import PIL.Image
+import os
+import os.path as osp
+import numpy as np
+import numpy.random as npr
+import shutil
+import copy
+# import skfmm
+from xing_loss import xing_loss
+import yaml
+from easydict import EasyDict as edict
+pydiffvg.set_print_timing(False)
+gamma = 1.0
+##########
+# helper #
+##########
+from utils import \
+    get_experiment_id, \
+    get_path_schedule, \
+    edict_2_dict, \
+    check_and_create_dir
+def get_bezier_circle(radius=1, segments=4, bias=None):
+    points = []
+    if bias is None:
+        bias = (random.random(), random.random())
+    avg_degree = 360 / (segments*3)
+    for i in range(0, segments*3):
+        point = (np.cos(np.deg2rad(i * avg_degree)),
+                    np.sin(np.deg2rad(i * avg_degree)))
+        points.append(point)
+    points = torch.tensor(points)
+    points = (points)*radius + torch.tensor(bias).unsqueeze(dim=0)
+    points = points.type(torch.FloatTensor)
+    return points
+def get_sdf(phi, method='skfmm', **kwargs):
+    if method == 'skfmm':
+        import skfmm
+        phi = (phi-0.5)*2
+        if (phi.max() <= 0) or (phi.min() >= 0):
+            return np.zeros(phi.shape).astype(np.float32)
+        sd = skfmm.distance(phi, dx=1)
+        flip_negative = kwargs.get('flip_negative', True)
+        if flip_negative:
+            sd = np.abs(sd)
+        truncate = kwargs.get('truncate', 10)
+        sd = np.clip(sd, -truncate, truncate)
+        # print(f"max sd value is: {sd.max()}")
+        zero2max = kwargs.get('zero2max', True)
+        if zero2max and flip_negative:
+            sd = sd.max() - sd
+        elif zero2max:
+            raise ValueError
+        normalize = kwargs.get('normalize', 'sum')
+        if normalize == 'sum':
+            sd /= sd.sum()
+        elif normalize == 'to1':
+            sd /= sd.max()
+        return sd
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--debug', action='store_true', default=False)
+    parser.add_argument("--config", type=str)
+    parser.add_argument("--experiment", type=str)
+    parser.add_argument("--seed", type=int)
+    parser.add_argument("--target", type=str, help="target image path")
+    parser.add_argument('--log_dir', metavar='DIR', default="log/debug")
+    parser.add_argument('--initial', type=str, default="random", choices=['random', 'circle'])
+    parser.add_argument('--signature', nargs='+', type=str)
+    parser.add_argument('--seginit', nargs='+', type=str)
+    parser.add_argument("--num_segments", type=int, default=4)
+    # parser.add_argument("--num_paths", type=str, default="1,1,1")
+    # parser.add_argument("--num_iter", type=int, default=500)
+    # parser.add_argument('--free', action='store_true')
+    # Please ensure that image resolution is divisible by pool_size; otherwise the performance would drop a lot.
+    # parser.add_argument('--pool_size', type=int, default=40, help="the pooled image size for next path initialization")
+    # parser.add_argument('--save_loss', action='store_true')
+    # parser.add_argument('--save_init', action='store_true')
+    # parser.add_argument('--save_image', action='store_true')
+    # parser.add_argument('--save_video', action='store_true')
+    # parser.add_argument('--print_weight', action='store_true')
+    # parser.add_argument('--circle_init_radius',  type=float)
+    cfg = edict()
+    args = parser.parse_args()
+    cfg.debug = args.debug
+    cfg.config = args.config
+    cfg.experiment = args.experiment
+    cfg.seed = args.seed
+    cfg.target = args.target
+    cfg.log_dir = args.log_dir
+    cfg.initial = args.initial
+    cfg.signature = args.signature
+    # set cfg num_segments in command
+    cfg.num_segments = args.num_segments
+    if args.seginit is not None:
+        cfg.seginit = edict()
+        cfg.seginit.type = args.seginit[0]
+        if cfg.seginit.type == 'circle':
+            cfg.seginit.radius = float(args.seginit[1])
+    return cfg
+def ycrcb_conversion(im, format='[bs x 3 x 2D]', reverse=False):
+    mat = torch.FloatTensor([
+        [ 65.481/255, 128.553/255,  24.966/255], # ranged_from [0, 219/255]
+        [-37.797/255, -74.203/255, 112.000/255], # ranged_from [-112/255, 112/255]
+        [112.000/255, -93.786/255, -18.214/255], # ranged_from [-112/255, 112/255]
+    ]).to(im.device)
+    if reverse:
+        mat = mat.inverse()
+    if format == '[bs x 3 x 2D]':
+        im = im.permute(0, 2, 3, 1)
+        im = torch.matmul(im, mat.T)
+        im = im.permute(0, 3, 1, 2).contiguous()
+        return im
+    elif format == '[2D x 3]':
+        im = torch.matmul(im, mat.T)
+        return im
+    else:
+        raise ValueError
+class random_coord_init():
+    def __init__(self, canvas_size):
+        self.canvas_size = canvas_size
+    def __call__(self):
+        h, w = self.canvas_size
+        return [npr.uniform(0, 1)*w, npr.uniform(0, 1)*h]
+class naive_coord_init():
+    def __init__(self, pred, gt, format='[bs x c x 2D]', replace_sampling=True):
+        if isinstance(pred, torch.Tensor):
+            pred = pred.detach().cpu().numpy()
+        if isinstance(gt, torch.Tensor):
+            gt = gt.detach().cpu().numpy()
+        if format == '[bs x c x 2D]':
+            self.map = ((pred[0] - gt[0])**2).sum(0)
+        elif format == ['[2D x c]']:
+            self.map = ((pred - gt)**2).sum(-1)
+        else:
+            raise ValueError
+        self.replace_sampling = replace_sampling
+    def __call__(self):
+        coord = np.where(self.map == self.map.max())
+        coord_h, coord_w = coord[0][0], coord[1][0]
+        if self.replace_sampling:
+            self.map[coord_h, coord_w] = -1
+        return [coord_w, coord_h]
+class sparse_coord_init():
+    def __init__(self, pred, gt, format='[bs x c x 2D]', quantile_interval=200, nodiff_thres=0.1):
+        if isinstance(pred, torch.Tensor):
+            pred = pred.detach().cpu().numpy()
+        if isinstance(gt, torch.Tensor):
+            gt = gt.detach().cpu().numpy()
+        if format == '[bs x c x 2D]':
+            self.map = ((pred[0] - gt[0])**2).sum(0)
+            self.reference_gt = copy.deepcopy(
+                np.transpose(gt[0], (1, 2, 0)))
+        elif format == ['[2D x c]']:
+            self.map = (np.abs(pred - gt)).sum(-1)
+            self.reference_gt = copy.deepcopy(gt[0])
+        else:
+            raise ValueError
+        # OptionA: Zero too small errors to avoid the error too small deadloop
+        self.map[self.map < nodiff_thres] = 0
+        quantile_interval = np.linspace(0., 1., quantile_interval)
+        quantized_interval = np.quantile(self.map, quantile_interval)
+        # remove redundant
+        quantized_interval = np.unique(quantized_interval)
+        quantized_interval = sorted(quantized_interval[1:-1])
+        self.map = np.digitize(self.map, quantized_interval, right=False)
+        self.map = np.clip(self.map, 0, 255).astype(np.uint8)
+        self.idcnt = {}
+        for idi in sorted(np.unique(self.map)):
+            self.idcnt[idi] = (self.map==idi).sum()
+        self.idcnt.pop(min(self.idcnt.keys()))
+        # remove smallest one to remove the correct region
+    def __call__(self):
+        if len(self.idcnt) == 0:
+            h, w = self.map.shape
+            return [npr.uniform(0, 1)*w, npr.uniform(0, 1)*h]
+        target_id = max(self.idcnt, key=self.idcnt.get)
+        _, component, cstats, ccenter = cv2.connectedComponentsWithStats(
+            (self.map==target_id).astype(np.uint8), connectivity=4)
+        # remove cid = 0, it is the invalid area
+        csize = [ci[-1] for ci in cstats[1:]]
+        target_cid = csize.index(max(csize))+1
+        center = ccenter[target_cid][::-1]
+        coord = np.stack(np.where(component == target_cid)).T
+        dist = np.linalg.norm(coord-center, axis=1)
+        target_coord_id = np.argmin(dist)
+        coord_h, coord_w = coord[target_coord_id]
+        # replace_sampling
+        self.idcnt[target_id] -= max(csize)
+        if self.idcnt[target_id] == 0:
+            self.idcnt.pop(target_id)
+        self.map[component == target_cid] = 0
+        return [coord_w, coord_h]
+def init_shapes(num_paths,
+                num_segments,
+                canvas_size,
+                seginit_cfg,
+                shape_cnt,
+                pos_init_method=None,
+                trainable_stroke=False,
+                gt=None,
+                **kwargs):
+    shapes = []
+    shape_groups = []
+    h, w = canvas_size
+    # change path init location
+    if pos_init_method is None:
+        pos_init_method = random_coord_init(canvas_size=canvas_size)
+    for i in range(num_paths):
+        num_control_points = [2] * num_segments
+        if seginit_cfg.type=="random":
+            points = []
+            p0 = pos_init_method()
+            color_ref = copy.deepcopy(p0)
+            points.append(p0)
+            for j in range(num_segments):
+                radius = seginit_cfg.radius
+                p1 = (p0[0] + radius * npr.uniform(-0.5, 0.5),
+                      p0[1] + radius * npr.uniform(-0.5, 0.5))
+                p2 = (p1[0] + radius * npr.uniform(-0.5, 0.5),
+                      p1[1] + radius * npr.uniform(-0.5, 0.5))
+                p3 = (p2[0] + radius * npr.uniform(-0.5, 0.5),
+                      p2[1] + radius * npr.uniform(-0.5, 0.5))
+                points.append(p1)
+                points.append(p2)
+                if j < num_segments - 1:
+                    points.append(p3)
+                    p0 = p3
+            points = torch.FloatTensor(points)
+        # circle points initialization
+        elif seginit_cfg.type=="circle":
+            radius = seginit_cfg.radius
+            if radius is None:
+                radius = npr.uniform(0.5, 1)
+            center = pos_init_method()
+            color_ref = copy.deepcopy(center)
+            points = get_bezier_circle(
+                radius=radius, segments=num_segments,
+                bias=center)
+        path = pydiffvg.Path(num_control_points = torch.LongTensor(num_control_points),
+                             points = points,
+                             stroke_width = torch.tensor(0.0),
+                             is_closed = True)
+        shapes.append(path)
+        # !!!!!!problem is here. the shape group shape_ids is wrong
+        if gt is not None:
+            wref, href = color_ref
+            wref = max(0, min(int(wref), w-1))
+            href = max(0, min(int(href), h-1))
+            fill_color_init = list(gt[0, :, href, wref]) + [1.]
+            fill_color_init = torch.FloatTensor(fill_color_init)
+            stroke_color_init = torch.FloatTensor(npr.uniform(size=[4]))
+        else:
+            fill_color_init = torch.FloatTensor(npr.uniform(size=[4]))
+            stroke_color_init = torch.FloatTensor(npr.uniform(size=[4]))
+        path_group = pydiffvg.ShapeGroup(
+            shape_ids = torch.LongTensor([shape_cnt+i]),
+            fill_color = fill_color_init,
+            stroke_color = stroke_color_init,
+        )
+        shape_groups.append(path_group)
+    point_var = []
+    color_var = []
+    for path in shapes:
+        path.points.requires_grad = True
+        point_var.append(path.points)
+    for group in shape_groups:
+        group.fill_color.requires_grad = True
+        color_var.append(group.fill_color)
+    if trainable_stroke:
+        stroke_width_var = []
+        stroke_color_var = []
+        for path in shapes:
+            path.stroke_width.requires_grad = True
+            stroke_width_var.append(path.stroke_width)
+        for group in shape_groups:
+            group.stroke_color.requires_grad = True
+            stroke_color_var.append(group.stroke_color)
+        return shapes, shape_groups, point_var, color_var, stroke_width_var, stroke_color_var
+    else:
+        return shapes, shape_groups, point_var, color_var
+class linear_decay_lrlambda_f(object):
+    def __init__(self, decay_every, decay_ratio):
+        self.decay_every = decay_every
+        self.decay_ratio = decay_ratio
+    def __call__(self, n):
+        decay_time = n//self.decay_every
+        decay_step = n %self.decay_every
+        lr_s = self.decay_ratio**decay_time
+        lr_e = self.decay_ratio**(decay_time+1)
+        r = decay_step/self.decay_every
+        lr = lr_s * (1-r) + lr_e * r
+        return lr
+def main_func(target, experiment, num_iter, cfg_arg):
+    with open(cfg_arg.config, 'r') as f:
+        cfg = yaml.load(f, Loader=yaml.FullLoader)
+    cfg_default = edict(cfg['default'])
+    cfg = edict(cfg[cfg_arg.experiment])
+    cfg.update(cfg_default)
+    cfg.update(cfg_arg)
+    cfg.exid = get_experiment_id(cfg.debug)
+    cfg.experiment_dir = \
+        osp.join(cfg.log_dir, '{}_{}'.format(cfg.exid, '_'.join(cfg.signature)))
+    cfg.target = target
+    cfg.experiment = experiment
+    cfg.num_iter = num_iter
+    configfile = osp.join(cfg.experiment_dir, 'config.yaml')
+    check_and_create_dir(configfile)
+    with open(osp.join(configfile), 'w') as f:
+        yaml.dump(edict_2_dict(cfg), f)
+    # Use GPU if available
+    pydiffvg.set_use_gpu(torch.cuda.is_available())
+    device = pydiffvg.get_device()
+    # gt = np.array(PIL.Image.open(cfg.target))
+    gt = np.array(cfg.target)
+    print(f"Input image shape is: {gt.shape}")
+    if len(gt.shape) == 2:
+        print("Converting the gray-scale image to RGB.")
+        gt = gt.unsqueeze(dim=-1).repeat(1,1,3)
+    if gt.shape[2] == 4:
+        print("Input image includes alpha channel, simply dropout alpha channel.")
+        gt = gt[:, :, :3]
+    gt = (gt/255).astype(np.float32)
+    gt = torch.FloatTensor(gt).permute(2, 0, 1)[None].to(device)
+    if cfg.use_ycrcb:
+        gt = ycrcb_conversion(gt)
+    h, w = gt.shape[2:]
+    path_schedule = get_path_schedule(**cfg.path_schedule)
+    if cfg.seed is not None:
+        random.seed(cfg.seed)
+        npr.seed(cfg.seed)
+        torch.manual_seed(cfg.seed)
+    render = pydiffvg.RenderFunction.apply
+    shapes_record, shape_groups_record = [], []
+    region_loss = None
+    loss_matrix = []
+    para_point, para_color = {}, {}
+    if cfg.trainable.stroke:
+        para_stroke_width, para_stroke_color = {}, {}
+    pathn_record = []
+    # Background
+    if cfg.trainable.bg:
+        # meancolor = gt.mean([2, 3])[0]
+        para_bg = torch.tensor([1., 1., 1.], requires_grad=True, device=device)
+    else:
+        if cfg.use_ycrcb:
+            para_bg = torch.tensor([219/255, 0, 0], requires_grad=False, device=device)
+        else:
+            para_bg = torch.tensor([1., 1., 1.], requires_grad=False, device=device)
+    ##################
+    # start_training #
+    ##################
+    loss_weight = None
+    loss_weight_keep = 0
+    if cfg.coord_init.type == 'naive':
+        pos_init_method = naive_coord_init(
+            para_bg.view(1, -1, 1, 1).repeat(1, 1, h, w), gt)
+    elif cfg.coord_init.type == 'sparse':
+        pos_init_method = sparse_coord_init(
+            para_bg.view(1, -1, 1, 1).repeat(1, 1, h, w), gt)
+    elif cfg.coord_init.type == 'random':
+        pos_init_method = random_coord_init([h, w])
+    else:
+        raise ValueError
+    lrlambda_f = linear_decay_lrlambda_f(cfg.num_iter, 0.4)
+    optim_schedular_dict = {}
+    for path_idx, pathn in enumerate(path_schedule):
+        loss_list = []
+        print("=> Adding [{}] paths, [{}] ...".format(pathn, cfg.seginit.type))
+        pathn_record.append(pathn)
+        pathn_record_str = '-'.join([str(i) for i in pathn_record])
+        # initialize new shapes related stuffs.
+        if cfg.trainable.stroke:
+            shapes, shape_groups, point_var, color_var, stroke_width_var, stroke_color_var = init_shapes(
+                pathn, cfg.num_segments, (h, w),
+                cfg.seginit, len(shapes_record),
+                pos_init_method,
+                trainable_stroke=True,
+                gt=gt, )
+            para_stroke_width[path_idx] = stroke_width_var
+            para_stroke_color[path_idx] = stroke_color_var
+        else:
+            shapes, shape_groups, point_var, color_var = init_shapes(
+                pathn, cfg.num_segments, (h, w),
+                cfg.seginit, len(shapes_record),
+                pos_init_method,
+                trainable_stroke=False,
+                gt=gt, )
+        shapes_record += shapes
+        shape_groups_record += shape_groups
+        if cfg.save.init:
+            filename = os.path.join(
+                cfg.experiment_dir, "svg-init",
+                "{}-init.svg".format(pathn_record_str))
+            check_and_create_dir(filename)
+            pydiffvg.save_svg(
+                filename, w, h,
+                shapes_record, shape_groups_record)
+        para = {}
+        if (cfg.trainable.bg) and (path_idx == 0):
+            para['bg'] = [para_bg]
+        para['point'] = point_var
+        para['color'] = color_var
+        if cfg.trainable.stroke:
+            para['stroke_width'] = stroke_width_var
+            para['stroke_color'] = stroke_color_var
+        pg = [{'params' : para[ki], 'lr' : cfg.lr_base[ki]} for ki in sorted(para.keys())]
+        optim = torch.optim.Adam(pg)
+        if cfg.trainable.record:
+            scheduler = LambdaLR(
+                optim, lr_lambda=lrlambda_f, last_epoch=-1)
+        else:
+            scheduler = LambdaLR(
+                optim, lr_lambda=lrlambda_f, last_epoch=cfg.num_iter)
+        optim_schedular_dict[path_idx] = (optim, scheduler)
+        # Inner loop training
+        t_range = tqdm(range(cfg.num_iter))
+        for t in t_range:
+            for _, (optim, _) in optim_schedular_dict.items():
+                optim.zero_grad()
+            # Forward pass: render the image.
+            scene_args = pydiffvg.RenderFunction.serialize_scene(
+                w, h, shapes_record, shape_groups_record)
+            img = render(w, h, 2, 2, t, None, *scene_args)
+            # Compose img with white background
+            img = img[:, :, 3:4] * img[:, :, :3] + \
+                para_bg * (1 - img[:, :, 3:4])
+            if cfg.save.video:
+                filename = os.path.join(
+                    cfg.experiment_dir, "video-png",
+                    "{}-iter{}.png".format(pathn_record_str, t))
+                check_and_create_dir(filename)
+                if cfg.use_ycrcb:
+                    imshow = ycrcb_conversion(
+                        img, format='[2D x 3]', reverse=True).detach().cpu()
+                else:
+                    imshow = img.detach().cpu()
+                pydiffvg.imwrite(imshow, filename, gamma=gamma)
+            # ### added for app
+            # if t%30==0 and t !=0 :
+            #     # print(f"debug: {t}, {filename} {img.size()}")
+            #     return img.detach().cpu().numpy(), t
+            x = img.unsqueeze(0).permute(0, 3, 1, 2) # HWC -> NCHW
+            if cfg.use_ycrcb:
+                color_reweight = torch.FloatTensor([255/219, 255/224, 255/255]).to(device)
+                loss = ((x-gt)*(color_reweight.view(1, -1, 1, 1)))**2
+            else:
+                loss = ((x-gt)**2)
+            if cfg.loss.use_l1_loss:
+                loss = abs(x-gt)
+            if cfg.loss.use_distance_weighted_loss:
+                if cfg.use_ycrcb:
+                    raise ValueError
+                shapes_forsdf = copy.deepcopy(shapes)
+                shape_groups_forsdf = copy.deepcopy(shape_groups)
+                for si in shapes_forsdf:
+                    si.stroke_width = torch.FloatTensor([0]).to(device)
+                for sg_idx, sgi in enumerate(shape_groups_forsdf):
+                    sgi.fill_color = torch.FloatTensor([1, 1, 1, 1]).to(device)
+                    sgi.shape_ids = torch.LongTensor([sg_idx]).to(device)
+                sargs_forsdf = pydiffvg.RenderFunction.serialize_scene(
+                    w, h, shapes_forsdf, shape_groups_forsdf)
+                with torch.no_grad():
+                    im_forsdf = render(w, h, 2, 2, 0, None, *sargs_forsdf)
+                # use alpha channel is a trick to get 0-1 image
+                im_forsdf = (im_forsdf[:, :, 3]).detach().cpu().numpy()
+                loss_weight = get_sdf(im_forsdf, normalize='to1')
+                loss_weight += loss_weight_keep
+                loss_weight = np.clip(loss_weight, 0, 1)
+                loss_weight = torch.FloatTensor(loss_weight).to(device)
+            if cfg.save.loss:
+                save_loss = loss.squeeze(dim=0).mean(dim=0,keepdim=False).cpu().detach().numpy()
+                save_weight = loss_weight.cpu().detach().numpy()
+                save_weighted_loss = save_loss*save_weight
+                # normalize to [0,1]
+                save_loss = (save_loss - np.min(save_loss))/np.ptp(save_loss)
+                save_weight = (save_weight - np.min(save_weight))/np.ptp(save_weight)
+                save_weighted_loss = (save_weighted_loss - np.min(save_weighted_loss))/np.ptp(save_weighted_loss)
+                # save
+                plt.imshow(save_loss, cmap='Reds')
+                plt.axis('off')
+                # plt.colorbar()
+                filename = os.path.join(cfg.experiment_dir, "loss", "{}-iter{}-mseloss.png".format(pathn_record_str, t))
+                check_and_create_dir(filename)
+                plt.savefig(filename, dpi=800)
+                plt.close()
+                plt.imshow(save_weight, cmap='Greys')
+                plt.axis('off')
+                # plt.colorbar()
+                filename = os.path.join(cfg.experiment_dir, "loss", "{}-iter{}-sdfweight.png".format(pathn_record_str, t))
+                plt.savefig(filename, dpi=800)
+                plt.close()
+                plt.imshow(save_weighted_loss, cmap='Reds')
+                plt.axis('off')
+                # plt.colorbar()
+                filename = os.path.join(cfg.experiment_dir, "loss", "{}-iter{}-weightedloss.png".format(pathn_record_str, t))
+                plt.savefig(filename, dpi=800)
+                plt.close()
+            if loss_weight is None:
+                loss = loss.sum(1).mean()
+            else:
+                loss = (loss.sum(1)*loss_weight).mean()
+            # if (cfg.loss.bis_loss_weight is not None)  and (cfg.loss.bis_loss_weight > 0):
+            #     loss_bis = bezier_intersection_loss(point_var[0]) * cfg.loss.bis_loss_weight
+            #     loss = loss + loss_bis
+            if (cfg.loss.xing_loss_weight is not None) \
+                    and (cfg.loss.xing_loss_weight > 0):
+                loss_xing = xing_loss(point_var) * cfg.loss.xing_loss_weight
+                loss = loss + loss_xing
+            loss_list.append(loss.item())
+            t_range.set_postfix({'loss': loss.item()})
+            loss.backward()
+            # step
+            for _, (optim, scheduler) in optim_schedular_dict.items():
+                optim.step()
+                scheduler.step()
+            for group in shape_groups_record:
+                group.fill_color.data.clamp_(0.0, 1.0)
+        if cfg.loss.use_distance_weighted_loss:
+            loss_weight_keep = loss_weight.detach().cpu().numpy() * 1
+        if not cfg.trainable.record:
+            for _, pi in pg.items():
+                for ppi in pi:
+                    pi.require_grad = False
+            optim_schedular_dict = {}
+        if cfg.save.image:
+            filename = os.path.join(
+                cfg.experiment_dir, "demo-png", "{}.png".format(pathn_record_str))
+            check_and_create_dir(filename)
+            if cfg.use_ycrcb:
+                imshow = ycrcb_conversion(
+                    img, format='[2D x 3]', reverse=True).detach().cpu()
+            else:
+                imshow = img.detach().cpu()
+            pydiffvg.imwrite(imshow, filename, gamma=gamma)
+        svg_app_file_name = ""
+        if cfg.save.output:
+            filename = os.path.join(
+                cfg.experiment_dir, "output-svg", "{}.svg".format(pathn_record_str))
+            check_and_create_dir(filename)
+            pydiffvg.save_svg(filename, w, h, shapes_record, shape_groups_record)
+            svg_app_file_name = filename
+        loss_matrix.append(loss_list)
+        # calculate the pixel loss
+        # pixel_loss = ((x-gt)**2).sum(dim=1, keepdim=True).sqrt_() # [N,1,H, W]
+        # region_loss = adaptive_avg_pool2d(pixel_loss, cfg.region_loss_pool_size)
+        # loss_weight = torch.softmax(region_loss.reshape(1, 1, -1), dim=-1)\
+        #     .reshape_as(region_loss)
+        pos_init_method = naive_coord_init(x, gt)
+        if cfg.coord_init.type == 'naive':
+            pos_init_method = naive_coord_init(x, gt)
+        elif cfg.coord_init.type == 'sparse':
+            pos_init_method = sparse_coord_init(x, gt)
+        elif cfg.coord_init.type == 'random':
+            pos_init_method = random_coord_init([h, w])
+        else:
+            raise ValueError
+        if cfg.save.video:
+            print("saving iteration video...")
+            img_array = []
+            for ii in range(0, cfg.num_iter):
+                filename = os.path.join(
+                    cfg.experiment_dir, "video-png",
+                    "{}-iter{}.png".format(pathn_record_str, ii))
+                img = cv2.imread(filename)
+                # cv2.putText(
+                #     img, "Path:{} \nIteration:{}".format(pathn_record_str, ii),
+                #     (10, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1)
+                img_array.append(img)
+            videoname = os.path.join(
+                cfg.experiment_dir, "video-avi",
+                "{}.avi".format(pathn_record_str))
+            check_and_create_dir(videoname)
+            out = cv2.VideoWriter(
+                videoname,
+                # cv2.VideoWriter_fourcc(*'mp4v'),
+                cv2.VideoWriter_fourcc(*'FFV1'),
+                20.0, (w, h))
+            for iii in range(len(img_array)):
+                out.write(img_array[iii])
+            out.release()
+            # shutil.rmtree(os.path.join(cfg.experiment_dir, "video-png"))
+    print("The last loss is: {}".format(loss.item()))
+    return img.detach().cpu().numpy(), svg_app_file_name
+if __name__ == "__main__":
+    ###############
+    # make config #
+    ###############
+    cfg_arg = parse_args()
+    with open(cfg_arg.config, 'r') as f:
+        cfg = yaml.load(f, Loader=yaml.FullLoader)
+    cfg_default = edict(cfg['default'])
+    cfg = edict(cfg[cfg_arg.experiment])
+    cfg.update(cfg_default)
+    cfg.update(cfg_arg)
+    cfg.exid = get_experiment_id(cfg.debug)
+    cfg.experiment_dir = \
+        osp.join(cfg.log_dir, '{}_{}'.format(cfg.exid, '_'.join(cfg.signature)))
+    configfile = osp.join(cfg.experiment_dir, 'config.yaml')
+    check_and_create_dir(configfile)
+    with open(osp.join(configfile), 'w') as f:
+        yaml.dump(edict_2_dict(cfg), f)
+    # Use GPU if available
+    pydiffvg.set_use_gpu(torch.cuda.is_available())
+    device = pydiffvg.get_device()
+    gt = np.array(PIL.Image.open(cfg.target))
+    print(f"Input image shape is: {gt.shape}")
+    if len(gt.shape) == 2:
+        print("Converting the gray-scale image to RGB.")
+        gt = gt.unsqueeze(dim=-1).repeat(1,1,3)
+    if gt.shape[2] == 4:
+        print("Input image includes alpha channel, simply dropout alpha channel.")
+        gt = gt[:, :, :3]
+    gt = (gt/255).astype(np.float32)
+    gt = torch.FloatTensor(gt).permute(2, 0, 1)[None].to(device)
+    if cfg.use_ycrcb:
+        gt = ycrcb_conversion(gt)
+    h, w = gt.shape[2:]
+    path_schedule = get_path_schedule(**cfg.path_schedule)
+    if cfg.seed is not None:
+        random.seed(cfg.seed)
+        npr.seed(cfg.seed)
+        torch.manual_seed(cfg.seed)
+    render = pydiffvg.RenderFunction.apply
+    shapes_record, shape_groups_record = [], []
+    region_loss = None
+    loss_matrix = []
+    para_point, para_color = {}, {}
+    if cfg.trainable.stroke:
+        para_stroke_width, para_stroke_color = {}, {}
+    pathn_record = []
+    # Background
+    if cfg.trainable.bg:
+        # meancolor = gt.mean([2, 3])[0]
+        para_bg = torch.tensor([1., 1., 1.], requires_grad=True, device=device)
+    else:
+        if cfg.use_ycrcb:
+            para_bg = torch.tensor([219/255, 0, 0], requires_grad=False, device=device)
+        else:
+            para_bg = torch.tensor([1., 1., 1.], requires_grad=False, device=device)
+    ##################
+    # start_training #
+    ##################
+    loss_weight = None
+    loss_weight_keep = 0
+    if cfg.coord_init.type == 'naive':
+        pos_init_method = naive_coord_init(
+            para_bg.view(1, -1, 1, 1).repeat(1, 1, h, w), gt)
+    elif cfg.coord_init.type == 'sparse':
+        pos_init_method = sparse_coord_init(
+            para_bg.view(1, -1, 1, 1).repeat(1, 1, h, w), gt)
+    elif cfg.coord_init.type == 'random':
+        pos_init_method = random_coord_init([h, w])
+    else:
+        raise ValueError
+    lrlambda_f = linear_decay_lrlambda_f(cfg.num_iter, 0.4)
+    optim_schedular_dict = {}
+    for path_idx, pathn in enumerate(path_schedule):
+        loss_list = []
+        print("=> Adding [{}] paths, [{}] ...".format(pathn, cfg.seginit.type))
+        pathn_record.append(pathn)
+        pathn_record_str = '-'.join([str(i) for i in pathn_record])
+        # initialize new shapes related stuffs.
+        if cfg.trainable.stroke:
+            shapes, shape_groups, point_var, color_var, stroke_width_var, stroke_color_var = init_shapes(
+                pathn, cfg.num_segments, (h, w),
+                cfg.seginit, len(shapes_record),
+                pos_init_method,
+                trainable_stroke=True,
+                gt=gt, )
+            para_stroke_width[path_idx] = stroke_width_var
+            para_stroke_color[path_idx] = stroke_color_var
+        else:
+            shapes, shape_groups, point_var, color_var = init_shapes(
+                pathn, cfg.num_segments, (h, w),
+                cfg.seginit, len(shapes_record),
+                pos_init_method,
+                trainable_stroke=False,
+                gt=gt, )
+        shapes_record += shapes
+        shape_groups_record += shape_groups
+        if cfg.save.init:
+            filename = os.path.join(
+                cfg.experiment_dir, "svg-init",
+                "{}-init.svg".format(pathn_record_str))
+            check_and_create_dir(filename)
+            pydiffvg.save_svg(
+                filename, w, h,
+                shapes_record, shape_groups_record)
+        para = {}
+        if (cfg.trainable.bg) and (path_idx == 0):
+            para['bg'] = [para_bg]
+        para['point'] = point_var
+        para['color'] = color_var
+        if cfg.trainable.stroke:
+            para['stroke_width'] = stroke_width_var
+            para['stroke_color'] = stroke_color_var
+        pg = [{'params' : para[ki], 'lr' : cfg.lr_base[ki]} for ki in sorted(para.keys())]
+        optim = torch.optim.Adam(pg)
+        if cfg.trainable.record:
+            scheduler = LambdaLR(
+                optim, lr_lambda=lrlambda_f, last_epoch=-1)
+        else:
+            scheduler = LambdaLR(
+                optim, lr_lambda=lrlambda_f, last_epoch=cfg.num_iter)
+        optim_schedular_dict[path_idx] = (optim, scheduler)
+        # Inner loop training
+        t_range = tqdm(range(cfg.num_iter))
+        for t in t_range:
+            for _, (optim, _) in optim_schedular_dict.items():
+                optim.zero_grad()
+            # Forward pass: render the image.
+            scene_args = pydiffvg.RenderFunction.serialize_scene(
+                w, h, shapes_record, shape_groups_record)
+            img = render(w, h, 2, 2, t, None, *scene_args)
+            # Compose img with white background
+            img = img[:, :, 3:4] * img[:, :, :3] + \
+                para_bg * (1 - img[:, :, 3:4])
+            if cfg.save.video:
+                filename = os.path.join(
+                    cfg.experiment_dir, "video-png",
+                    "{}-iter{}.png".format(pathn_record_str, t))
+                check_and_create_dir(filename)
+                if cfg.use_ycrcb:
+                    imshow = ycrcb_conversion(
+                        img, format='[2D x 3]', reverse=True).detach().cpu()
+                else:
+                    imshow = img.detach().cpu()
+                pydiffvg.imwrite(imshow, filename, gamma=gamma)
+            x = img.unsqueeze(0).permute(0, 3, 1, 2) # HWC -> NCHW
+            if cfg.use_ycrcb:
+                color_reweight = torch.FloatTensor([255/219, 255/224, 255/255]).to(device)
+                loss = ((x-gt)*(color_reweight.view(1, -1, 1, 1)))**2
+            else:
+                loss = ((x-gt)**2)
+            if cfg.loss.use_l1_loss:
+                loss = abs(x-gt)
+            if cfg.loss.use_distance_weighted_loss:
+                if cfg.use_ycrcb:
+                    raise ValueError
+                shapes_forsdf = copy.deepcopy(shapes)
+                shape_groups_forsdf = copy.deepcopy(shape_groups)
+                for si in shapes_forsdf:
+                    si.stroke_width = torch.FloatTensor([0]).to(device)
+                for sg_idx, sgi in enumerate(shape_groups_forsdf):
+                    sgi.fill_color = torch.FloatTensor([1, 1, 1, 1]).to(device)
+                    sgi.shape_ids = torch.LongTensor([sg_idx]).to(device)
+                sargs_forsdf = pydiffvg.RenderFunction.serialize_scene(
+                    w, h, shapes_forsdf, shape_groups_forsdf)
+                with torch.no_grad():
+                    im_forsdf = render(w, h, 2, 2, 0, None, *sargs_forsdf)
+                # use alpha channel is a trick to get 0-1 image
+                im_forsdf = (im_forsdf[:, :, 3]).detach().cpu().numpy()
+                loss_weight = get_sdf(im_forsdf, normalize='to1')
+                loss_weight += loss_weight_keep
+                loss_weight = np.clip(loss_weight, 0, 1)
+                loss_weight = torch.FloatTensor(loss_weight).to(device)
+            if cfg.save.loss:
+                save_loss = loss.squeeze(dim=0).mean(dim=0,keepdim=False).cpu().detach().numpy()
+                save_weight = loss_weight.cpu().detach().numpy()
+                save_weighted_loss = save_loss*save_weight
+                # normalize to [0,1]
+                save_loss = (save_loss - np.min(save_loss))/np.ptp(save_loss)
+                save_weight = (save_weight - np.min(save_weight))/np.ptp(save_weight)
+                save_weighted_loss = (save_weighted_loss - np.min(save_weighted_loss))/np.ptp(save_weighted_loss)
+                # save
+                plt.imshow(save_loss, cmap='Reds')
+                plt.axis('off')
+                # plt.colorbar()
+                filename = os.path.join(cfg.experiment_dir, "loss", "{}-iter{}-mseloss.png".format(pathn_record_str, t))
+                check_and_create_dir(filename)
+                plt.savefig(filename, dpi=800)
+                plt.close()
+                plt.imshow(save_weight, cmap='Greys')
+                plt.axis('off')
+                # plt.colorbar()
+                filename = os.path.join(cfg.experiment_dir, "loss", "{}-iter{}-sdfweight.png".format(pathn_record_str, t))
+                plt.savefig(filename, dpi=800)
+                plt.close()
+                plt.imshow(save_weighted_loss, cmap='Reds')
+                plt.axis('off')
+                # plt.colorbar()
+                filename = os.path.join(cfg.experiment_dir, "loss", "{}-iter{}-weightedloss.png".format(pathn_record_str, t))
+                plt.savefig(filename, dpi=800)
+                plt.close()
+            if loss_weight is None:
+                loss = loss.sum(1).mean()
+            else:
+                loss = (loss.sum(1)*loss_weight).mean()
+            # if (cfg.loss.bis_loss_weight is not None)  and (cfg.loss.bis_loss_weight > 0):
+            #     loss_bis = bezier_intersection_loss(point_var[0]) * cfg.loss.bis_loss_weight
+            #     loss = loss + loss_bis
+            if (cfg.loss.xing_loss_weight is not None) \
+                    and (cfg.loss.xing_loss_weight > 0):
+                loss_xing = xing_loss(point_var) * cfg.loss.xing_loss_weight
+                loss = loss + loss_xing
+            loss_list.append(loss.item())
+            t_range.set_postfix({'loss': loss.item()})
+            loss.backward()
+            # step
+            for _, (optim, scheduler) in optim_schedular_dict.items():
+                optim.step()
+                scheduler.step()
+            for group in shape_groups_record:
+                group.fill_color.data.clamp_(0.0, 1.0)
+        if cfg.loss.use_distance_weighted_loss:
+            loss_weight_keep = loss_weight.detach().cpu().numpy() * 1
+        if not cfg.trainable.record:
+            for _, pi in pg.items():
+                for ppi in pi:
+                    pi.require_grad = False
+            optim_schedular_dict = {}
+        if cfg.save.image:
+            filename = os.path.join(
+                cfg.experiment_dir, "demo-png", "{}.png".format(pathn_record_str))
+            check_and_create_dir(filename)
+            if cfg.use_ycrcb:
+                imshow = ycrcb_conversion(
+                    img, format='[2D x 3]', reverse=True).detach().cpu()
+            else:
+                imshow = img.detach().cpu()
+            pydiffvg.imwrite(imshow, filename, gamma=gamma)
+        if cfg.save.output:
+            filename = os.path.join(
+                cfg.experiment_dir, "output-svg", "{}.svg".format(pathn_record_str))
+            check_and_create_dir(filename)
+            pydiffvg.save_svg(filename, w, h, shapes_record, shape_groups_record)
+        loss_matrix.append(loss_list)
+        # calculate the pixel loss
+        # pixel_loss = ((x-gt)**2).sum(dim=1, keepdim=True).sqrt_() # [N,1,H, W]
+        # region_loss = adaptive_avg_pool2d(pixel_loss, cfg.region_loss_pool_size)
+        # loss_weight = torch.softmax(region_loss.reshape(1, 1, -1), dim=-1)\
+        #     .reshape_as(region_loss)
+        pos_init_method = naive_coord_init(x, gt)
+        if cfg.coord_init.type == 'naive':
+            pos_init_method = naive_coord_init(x, gt)
+        elif cfg.coord_init.type == 'sparse':
+            pos_init_method = sparse_coord_init(x, gt)
+        elif cfg.coord_init.type == 'random':
+            pos_init_method = random_coord_init([h, w])
+        else:
+            raise ValueError
+        if cfg.save.video:
+            print("saving iteration video...")
+            img_array = []
+            for ii in range(0, cfg.num_iter):
+                filename = os.path.join(
+                    cfg.experiment_dir, "video-png",
+                    "{}-iter{}.png".format(pathn_record_str, ii))
+                img = cv2.imread(filename)
+                # cv2.putText(
+                #     img, "Path:{} \nIteration:{}".format(pathn_record_str, ii),
+                #     (10, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1)
+                img_array.append(img)
+            videoname = os.path.join(
+                cfg.experiment_dir, "video-avi",
+                "{}.avi".format(pathn_record_str))
+            check_and_create_dir(videoname)
+            out = cv2.VideoWriter(
+                videoname,
+                # cv2.VideoWriter_fourcc(*'mp4v'),
+                cv2.VideoWriter_fourcc(*'FFV1'),
+                20.0, (w, h))
+            for iii in range(len(img_array)):
+                out.write(img_array[iii])
+            out.release()
+            # shutil.rmtree(os.path.join(cfg.experiment_dir, "video-png"))
+    print("The last loss is: {}".format(loss.item()))

matrix.h ADDED Viewed

	@@ -0,0 +1,544 @@

+#pragma once
+#include "diffvg.h"
+#include "vector.h"
+#include <iostream>
+template <typename T>
+struct TMatrix3x3 {
+    DEVICE
+    TMatrix3x3() {
+        for (int i = 0; i < 3; i++) {
+            for (int j = 0; j < 3; j++) {
+                data[i][j] = T(0);
+            }
+        }
+    }
+    template <typename T2>
+    DEVICE
+    TMatrix3x3(T2 *arr) {
+    	data[0][0] = arr[0];
+    	data[0][1] = arr[1];
+    	data[0][2] = arr[2];
+    	data[1][0] = arr[3];
+    	data[1][1] = arr[4];
+    	data[1][2] = arr[5];
+    	data[2][0] = arr[6];
+    	data[2][1] = arr[7];
+    	data[2][2] = arr[8];
+    }
+    DEVICE
+    TMatrix3x3(T v00, T v01, T v02,
+               T v10, T v11, T v12,
+               T v20, T v21, T v22) {
+        data[0][0] = v00;
+        data[0][1] = v01;
+        data[0][2] = v02;
+        data[1][0] = v10;
+        data[1][1] = v11;
+        data[1][2] = v12;
+        data[2][0] = v20;
+        data[2][1] = v21;
+        data[2][2] = v22;
+    }
+    DEVICE
+    const T& operator()(int i, int j) const {
+        return data[i][j];
+    }
+    DEVICE
+    T& operator()(int i, int j) {
+        return data[i][j];
+    }
+    DEVICE
+    static TMatrix3x3<T> identity() {
+        TMatrix3x3<T> m(1, 0, 0,
+                        0, 1, 0,
+                        0, 0, 1);
+        return m;
+    }
+    T data[3][3];
+};
+using Matrix3x3 = TMatrix3x3<Real>;
+using Matrix3x3f = TMatrix3x3<float>;
+template <typename T>
+struct TMatrix4x4 {
+    DEVICE TMatrix4x4() {
+        for (int i = 0; i < 4; i++) {
+            for (int j = 0; j < 4; j++) {
+                data[i][j] = T(0);
+            }
+        }
+    }
+    template <typename T2>
+    DEVICE TMatrix4x4(const T2 *arr) {
+        for (int i = 0; i < 4; i++) {
+            for (int j = 0; j < 4; j++) {
+                data[i][j] = (T)arr[i * 4 + j];
+            }
+        }
+    }
+    template <typename T2>
+    DEVICE TMatrix4x4(const TMatrix4x4<T2> &m) {
+        for (int i = 0; i < 4; i++) {
+            for (int j = 0; j < 4; j++) {
+                data[i][j] = T(m.data[i][j]);
+            }
+        }
+    }
+    template <typename T2>
+    DEVICE TMatrix4x4(T2 v00, T2 v01, T2 v02, T2 v03,
+                      T2 v10, T2 v11, T2 v12, T2 v13,
+                      T2 v20, T2 v21, T2 v22, T2 v23,
+                      T2 v30, T2 v31, T2 v32, T2 v33) {
+        data[0][0] = (T)v00;
+        data[0][1] = (T)v01;
+        data[0][2] = (T)v02;
+        data[0][3] = (T)v03;
+        data[1][0] = (T)v10;
+        data[1][1] = (T)v11;
+        data[1][2] = (T)v12;
+        data[1][3] = (T)v13;
+        data[2][0] = (T)v20;
+        data[2][1] = (T)v21;
+        data[2][2] = (T)v22;
+        data[2][3] = (T)v23;
+        data[3][0] = (T)v30;
+        data[3][1] = (T)v31;
+        data[3][2] = (T)v32;
+        data[3][3] = (T)v33;
+    }
+    DEVICE
+    const T& operator()(int i, int j) const {
+        return data[i][j];
+    }
+    DEVICE
+    T& operator()(int i, int j) {
+        return data[i][j];
+    }
+    DEVICE
+    static TMatrix4x4<T> identity() {
+        TMatrix4x4<T> m(1, 0, 0, 0,
+                        0, 1, 0, 0,
+                        0, 0, 1, 0,
+                        0, 0, 0, 1);
+        return m;
+    }
+    T data[4][4];
+};
+using Matrix4x4 = TMatrix4x4<Real>;
+using Matrix4x4f = TMatrix4x4<float>;
+template <typename T0, typename T1>
+DEVICE
+inline auto operator+(const TMatrix3x3<T0> &m0, const TMatrix3x3<T1> &m1) -> TMatrix3x3<decltype(m0(0, 0) + m1(0, 0))> {
+    TMatrix3x3<decltype(m0(0, 0) + m1(0, 0))> m;
+    for (int i = 0; i < 3; i++) {
+        for (int j = 0; j < 3; j++) {
+            m(i, j) = m0(i, j) + m1(i, j);
+        }
+    }
+    return m;
+}
+template <typename T0, typename T1>
+DEVICE
+inline auto operator-(const TMatrix3x3<T0> &m0, const TMatrix3x3<T1> &m1) -> TMatrix3x3<decltype(m0(0, 0) - m1(0, 0))> {
+    TMatrix3x3<decltype(m0(0, 0) - m1(0, 0))> m;
+    for (int i = 0; i < 3; i++) {
+        for (int j = 0; j < 3; j++) {
+            m(i, j) = m0(i, j) - m1(i, j);
+        }
+    }
+    return m;
+}
+template <typename T>
+DEVICE
+inline auto operator*(const TMatrix3x3<T> &m0, const TMatrix3x3<T> &m1) -> TMatrix3x3<T> {
+    TMatrix3x3<T> ret;
+    for (int i = 0; i < 3; i++) {
+        for (int j = 0; j < 3; j++) {
+            ret(i, j) = T(0);
+            for (int k = 0; k < 3; k++) {
+                ret(i, j) += m0(i, k) * m1(k, j);
+            }
+        }
+    }
+    return ret;
+}
+template <typename T>
+DEVICE
+inline auto operator*(const TVector3<T> &v, const TMatrix3x3<T> &m) -> TVector3<T> {
+    TVector3<T> ret;
+    for (int i = 0; i < 3; i++) {
+        ret[i] = T(0);
+        for (int j = 0; j < 3; j++) {
+            ret[i] += v[j] * m(j, i);
+        }
+    }
+    return ret;
+}
+template <typename T>
+DEVICE
+inline auto operator*(const TMatrix3x3<T> &m, const TVector3<T> &v) -> TVector3<T> {
+    TVector3<T> ret;
+    for (int i = 0; i < 3; i++) {
+        ret[i] = 0.f;
+        for (int j = 0; j < 3; j++) {
+            ret[i] += m(i, j) * v[j];
+        }
+    }
+    return ret;
+}
+template <typename T>
+DEVICE
+inline auto inverse(const TMatrix3x3<T> &m) -> TMatrix3x3<T> {
+    // computes the inverse of a matrix m
+    auto det = m(0, 0) * (m(1, 1) * m(2, 2) - m(2, 1) * m(1, 2)) -
+               m(0, 1) * (m(1, 0) * m(2, 2) - m(1, 2) * m(2, 0)) +
+               m(0, 2) * (m(1, 0) * m(2, 1) - m(1, 1) * m(2, 0));
+    auto invdet = 1 / det;
+    auto m_inv = TMatrix3x3<T>{};
+    m_inv(0, 0) = (m(1, 1) * m(2, 2) - m(2, 1) * m(1, 2)) * invdet;
+    m_inv(0, 1) = (m(0, 2) * m(2, 1) - m(0, 1) * m(2, 2)) * invdet;
+    m_inv(0, 2) = (m(0, 1) * m(1, 2) - m(0, 2) * m(1, 1)) * invdet;
+    m_inv(1, 0) = (m(1, 2) * m(2, 0) - m(1, 0) * m(2, 2)) * invdet;
+    m_inv(1, 1) = (m(0, 0) * m(2, 2) - m(0, 2) * m(2, 0)) * invdet;
+    m_inv(1, 2) = (m(1, 0) * m(0, 2) - m(0, 0) * m(1, 2)) * invdet;
+    m_inv(2, 0) = (m(1, 0) * m(2, 1) - m(2, 0) * m(1, 1)) * invdet;
+    m_inv(2, 1) = (m(2, 0) * m(0, 1) - m(0, 0) * m(2, 1)) * invdet;
+    m_inv(2, 2) = (m(0, 0) * m(1, 1) - m(1, 0) * m(0, 1)) * invdet;
+    return m_inv;
+}
+template <typename T0, typename T1>
+DEVICE
+inline auto operator+(const TMatrix4x4<T0> &m0, const TMatrix4x4<T1> &m1) -> TMatrix4x4<decltype(m0(0, 0) + m1(0, 0))> {
+    TMatrix4x4<decltype(m0(0, 0) + m1(0, 0))> m;
+    for (int i = 0; i < 4; i++) {
+        for (int j = 0; j < 4; j++) {
+            m(i, j) = m0(i, j) + m1(i, j);
+        }
+    }
+    return m;
+}
+template <typename T>
+DEVICE
+TMatrix3x3<T> transpose(const TMatrix3x3<T> &m) {
+    return TMatrix3x3<T>(m(0, 0), m(1, 0), m(2, 0),
+                         m(0, 1), m(1, 1), m(2, 1),
+                         m(0, 2), m(1, 2), m(2, 2));
+}
+template <typename T>
+DEVICE
+TMatrix4x4<T> transpose(const TMatrix4x4<T> &m) {
+    return TMatrix4x4<T>(m(0, 0), m(1, 0), m(2, 0), m(3, 0),
+                         m(0, 1), m(1, 1), m(2, 1), m(3, 1),
+                         m(0, 2), m(1, 2), m(2, 2), m(3, 2),
+                         m(0, 3), m(1, 3), m(2, 3), m(3, 3));
+}
+template <typename T>
+DEVICE
+inline TMatrix3x3<T> operator-(const TMatrix3x3<T> &m0) {
+    TMatrix3x3<T> m;
+    for (int i = 0; i < 3; i++) {
+        for (int j = 0; j < 3; j++) {
+            m(i, j) = -m0(i, j);
+        }
+    }
+    return m;
+}
+template <typename T>
+DEVICE
+inline TMatrix4x4<T> operator-(const TMatrix4x4<T> &m0) {
+    TMatrix4x4<T> m;
+    for (int i = 0; i < 4; i++) {
+        for (int j = 0; j < 4; j++) {
+            m(i, j) = -m0(i, j);
+        }
+    }
+    return m;
+}
+template <typename T>
+DEVICE
+inline TMatrix4x4<T> operator-(const TMatrix4x4<T> &m0, const TMatrix4x4<T> &m1) {
+    TMatrix4x4<T> m;
+    for (int i = 0; i < 4; i++) {
+        for (int j = 0; j < 4; j++) {
+            m(i, j) = m0(i, j) - m1(i, j);
+        }
+    }
+    return m;
+}
+template <typename T>
+DEVICE
+inline TMatrix3x3<T>& operator+=(TMatrix3x3<T> &m0, const TMatrix3x3<T> &m1) {
+    for (int i = 0; i < 3; i++) {
+        for (int j = 0; j < 3; j++) {
+            m0(i, j) += m1(i, j);
+        }
+    }
+    return m0;
+}
+template <typename T>
+DEVICE
+inline TMatrix4x4<T>& operator+=(TMatrix4x4<T> &m0, const TMatrix4x4<T> &m1) {
+    for (int i = 0; i < 4; i++) {
+        for (int j = 0; j < 4; j++) {
+            m0(i, j) += m1(i, j);
+        }
+    }
+    return m0;
+}
+template <typename T>
+DEVICE
+inline TMatrix4x4<T>& operator-=(TMatrix4x4<T> &m0, const TMatrix4x4<T> &m1) {
+    for (int i = 0; i < 4; i++) {
+        for (int j = 0; j < 4; j++) {
+            m0(i, j) -= m1(i, j);
+        }
+    }
+    return m0;
+}
+template <typename T>
+DEVICE
+inline TMatrix4x4<T> operator*(const TMatrix4x4<T> &m0, const TMatrix4x4<T> &m1) {
+    TMatrix4x4<T> m;
+    for (int i = 0; i < 4; i++) {
+        for (int j = 0; j < 4; j++) {
+            for (int k = 0; k < 4; k++) {
+                m(i, j) += m0(i, k) * m1(k, j);
+            }
+        }
+    }
+    return m;
+}
+template <typename T>
+DEVICE
+TMatrix4x4<T> inverse(const TMatrix4x4<T> &m) {
+    // https://stackoverflow.com/questions/1148309/inverting-a-4x4-matrix
+    TMatrix4x4<T> inv;
+    inv(0, 0) = m(1, 1) * m(2, 2) * m(3, 3) -
+                m(1, 1) * m(2, 3) * m(3, 2) -
+                m(2, 1) * m(1, 2) * m(3, 3) +
+                m(2, 1) * m(1, 3) * m(3, 2) +
+                m(3, 1) * m(1, 2) * m(2, 3) -
+                m(3, 1) * m(1, 3) * m(2, 2);
+    inv(1, 0) = -m(1, 0) * m(2, 2) * m(3, 3) +
+                 m(1, 0) * m(2, 3) * m(3, 2) +
+                 m(2, 0) * m(1, 2) * m(3, 3) -
+                 m(2, 0) * m(1, 3) * m(3, 2) -
+                 m(3, 0) * m(1, 2) * m(2, 3) +
+                 m(3, 0) * m(1, 3) * m(2, 2);
+    inv(2, 0) = m(1, 0) * m(2, 1) * m(3, 3) -
+                m(1, 0) * m(2, 3) * m(3, 1) -
+                m(2, 0) * m(1, 1) * m(3, 3) +
+                m(2, 0) * m(1, 3) * m(3, 1) +
+                m(3, 0) * m(1, 1) * m(2, 3) -
+                m(3, 0) * m(1, 3) * m(2, 1);
+    inv(3, 0) = -m(1, 0) * m(2, 1) * m(3, 2) +
+                 m(1, 0) * m(2, 2) * m(3, 1) +
+                 m(2, 0) * m(1, 1) * m(3, 2) -
+                 m(2, 0) * m(1, 2) * m(3, 1) -
+                 m(3, 0) * m(1, 1) * m(2, 2) +
+                 m(3, 0) * m(1, 2) * m(2, 1);
+    inv(0, 1) = -m(0, 1) * m(2, 2) * m(3, 3) +
+                 m(0, 1) * m(2, 3) * m(3, 2) +
+                 m(2, 1) * m(0, 2) * m(3, 3) -
+                 m(2, 1) * m(0, 3) * m(3, 2) -
+                 m(3, 1) * m(0, 2) * m(2, 3) +
+                 m(3, 1) * m(0, 3) * m(2, 2);
+    inv(1, 1) = m(0, 0) * m(2, 2) * m(3, 3) -
+                m(0, 0) * m(2, 3) * m(3, 2) -
+                m(2, 0) * m(0, 2) * m(3, 3) +
+                m(2, 0) * m(0, 3) * m(3, 2) +
+                m(3, 0) * m(0, 2) * m(2, 3) -
+                m(3, 0) * m(0, 3) * m(2, 2);
+    inv(2, 1) = -m(0, 0) * m(2, 1) * m(3, 3) +
+                 m(0, 0) * m(2, 3) * m(3, 1) +
+                 m(2, 0) * m(0, 1) * m(3, 3) -
+                 m(2, 0) * m(0, 3) * m(3, 1) -
+                 m(3, 0) * m(0, 1) * m(2, 3) +
+                 m(3, 0) * m(0, 3) * m(2, 1);
+    inv(3, 1) = m(0, 0) * m(2, 1) * m(3, 2) -
+                m(0, 0) * m(2, 2) * m(3, 1) -
+                m(2, 0) * m(0, 1) * m(3, 2) +
+                m(2, 0) * m(0, 2) * m(3, 1) +
+                m(3, 0) * m(0, 1) * m(2, 2) -
+                m(3, 0) * m(0, 2) * m(2, 1);
+    inv(0, 2) = m(0, 1) * m(1, 2) * m(3, 3) -
+                m(0, 1) * m(1, 3) * m(3, 2) -
+                m(1, 1) * m(0, 2) * m(3, 3) +
+                m(1, 1) * m(0, 3) * m(3, 2) +
+                m(3, 1) * m(0, 2) * m(1, 3) -
+                m(3, 1) * m(0, 3) * m(1, 2);
+    inv(1, 2) = -m(0, 0) * m(1, 2) * m(3, 3) +
+                 m(0, 0) * m(1, 3) * m(3, 2) +
+                 m(1, 0) * m(0, 2) * m(3, 3) -
+                 m(1, 0) * m(0, 3) * m(3, 2) -
+                 m(3, 0) * m(0, 2) * m(1, 3) +
+                 m(3, 0) * m(0, 3) * m(1, 2);
+    inv(2, 2) = m(0, 0) * m(1, 1) * m(3, 3) -
+                m(0, 0) * m(1, 3) * m(3, 1) -
+                m(1, 0) * m(0, 1) * m(3, 3) +
+                m(1, 0) * m(0, 3) * m(3, 1) +
+                m(3, 0) * m(0, 1) * m(1, 3) -
+                m(3, 0) * m(0, 3) * m(1, 1);
+    inv(3, 2) = -m(0, 0) * m(1, 1) * m(3, 2) +
+                 m(0, 0) * m(1, 2) * m(3, 1) +
+                 m(1, 0) * m(0, 1) * m(3, 2) -
+                 m(1, 0) * m(0, 2) * m(3, 1) -
+                 m(3, 0) * m(0, 1) * m(1, 2) +
+                 m(3, 0) * m(0, 2) * m(1, 1);
+    inv(0, 3) = -m(0, 1) * m(1, 2) * m(2, 3) +
+                 m(0, 1) * m(1, 3) * m(2, 2) +
+                 m(1, 1) * m(0, 2) * m(2, 3) -
+                 m(1, 1) * m(0, 3) * m(2, 2) -
+                 m(2, 1) * m(0, 2) * m(1, 3) +
+                 m(2, 1) * m(0, 3) * m(1, 2);
+    inv(1, 3) = m(0, 0) * m(1, 2) * m(2, 3) -
+                m(0, 0) * m(1, 3) * m(2, 2) -
+                m(1, 0) * m(0, 2) * m(2, 3) +
+                m(1, 0) * m(0, 3) * m(2, 2) +
+                m(2, 0) * m(0, 2) * m(1, 3) -
+                m(2, 0) * m(0, 3) * m(1, 2);
+    inv(2, 3) = -m(0, 0) * m(1, 1) * m(2, 3) +
+                 m(0, 0) * m(1, 3) * m(2, 1) +
+                 m(1, 0) * m(0, 1) * m(2, 3) -
+                 m(1, 0) * m(0, 3) * m(2, 1) -
+                 m(2, 0) * m(0, 1) * m(1, 3) +
+                 m(2, 0) * m(0, 3) * m(1, 1);
+    inv(3, 3) = m(0, 0) * m(1, 1) * m(2, 2) -
+                m(0, 0) * m(1, 2) * m(2, 1) -
+                m(1, 0) * m(0, 1) * m(2, 2) +
+                m(1, 0) * m(0, 2) * m(2, 1) +
+                m(2, 0) * m(0, 1) * m(1, 2) -
+                m(2, 0) * m(0, 2) * m(1, 1);
+    auto det = m(0, 0) * inv(0, 0) +
+               m(0, 1) * inv(1, 0) +
+               m(0, 2) * inv(2, 0) +
+               m(0, 3) * inv(3, 0);
+    if (det == 0) {
+        return TMatrix4x4<T>{};
+    }
+    auto inv_det = 1.0 / det;
+    for (int i = 0; i < 4; i++) {
+        for (int j = 0; j < 4; j++) {
+            inv(i, j) *= inv_det;
+        }
+    }
+    return inv;
+}
+template <typename T>
+inline std::ostream& operator<<(std::ostream &os, const TMatrix3x3<T> &m) {
+    for (int i = 0; i < 3; i++) {
+        for (int j = 0; j < 3; j++) {
+            os << m(i, j) << " ";
+        }
+        os << std::endl;
+    }
+    return os;
+}
+template <typename T>
+inline std::ostream& operator<<(std::ostream &os, const TMatrix4x4<T> &m) {
+    for (int i = 0; i < 4; i++) {
+        for (int j = 0; j < 4; j++) {
+            os << m(i, j) << " ";
+        }
+        os << std::endl;
+    }
+    return os;
+}
+template <typename T>
+DEVICE
+TVector2<T> xform_pt(const TMatrix3x3<T> &m, const TVector2<T> &pt) {
+    TVector3<T> t{m(0, 0) * pt[0] + m(0, 1) * pt[1] + m(0, 2),
+                  m(1, 0) * pt[0] + m(1, 1) * pt[1] + m(1, 2),
+                  m(2, 0) * pt[0] + m(2, 1) * pt[1] + m(2, 2)};
+    return TVector2<T>{t[0] / t[2], t[1] / t[2]};
+}
+template <typename T>
+DEVICE
+void d_xform_pt(const TMatrix3x3<T> &m, const TVector2<T> &pt,
+                const TVector2<T> &d_out,
+                TMatrix3x3<T> &d_m,
+                TVector2<T> &d_pt) {
+    TVector3<T> t{m(0, 0) * pt[0] + m(0, 1) * pt[1] + m(0, 2),
+                  m(1, 0) * pt[0] + m(1, 1) * pt[1] + m(1, 2),
+                  m(2, 0) * pt[0] + m(2, 1) * pt[1] + m(2, 2)};
+    auto out = TVector2<T>{t[0] / t[2], t[1] / t[2]};
+    TVector3<T> d_t{d_out[0] / t[2],
+                    d_out[1] / t[2],
+                    -(d_out[0] * out[0] + d_out[1] * out[1]) / t[2]};
+    d_m(0, 0) += d_t[0] * pt[0];
+    d_m(0, 1) += d_t[0] * pt[1];
+    d_m(0, 2) += d_t[0];
+    d_m(1, 0) += d_t[1] * pt[0];
+    d_m(1, 1) += d_t[1] * pt[1];
+    d_m(1, 2) += d_t[1];
+    d_m(2, 0) += d_t[2] * pt[0];
+    d_m(2, 1) += d_t[2] * pt[1];
+    d_m(2, 2) += d_t[2];
+    d_pt[0] += d_t[0] * m(0, 0) + d_t[1] * m(1, 0) + d_t[2] * m(2, 0);
+    d_pt[1] += d_t[0] * m(0, 1) + d_t[1] * m(1, 1) + d_t[2] * m(2, 1);
+}
+template <typename T>
+DEVICE
+TVector2<T> xform_normal(const TMatrix3x3<T> &m_inv, const TVector2<T> &n) {
+    return normalize(TVector2<T>{m_inv(0, 0) * n[0] + m_inv(1, 0) * n[1],
+                                 m_inv(0, 1) * n[0] + m_inv(1, 1) * n[1]});
+}

model_config/model_name_p5_all.csv ADDED Viewed

	@@ -0,0 +1,5 @@

+yolov5n
+yolov5s
+yolov5m
+yolov5l
+yolov5x

model_config/model_name_p5_all.yaml ADDED Viewed

	@@ -0,0 +1 @@


1	+ model_names: ["yolov5n", "yolov5s", "yolov5m", "yolov5l", "yolov5x"]

model_config/model_name_p5_n.csv ADDED Viewed

	@@ -0,0 +1 @@


1	+ yolov5n

model_config/model_name_p5_n.yaml ADDED Viewed

	@@ -0,0 +1 @@


1	+ model_names: ["yolov5n"]

model_config/model_name_p6_all.csv ADDED Viewed

	@@ -0,0 +1,5 @@

+yolov5n6
+yolov5s6
+yolov5m6
+yolov5l6
+yolov5x6

model_config/model_name_p6_all.yaml ADDED Viewed

	@@ -0,0 +1 @@


1	+ model_names: ["yolov5n6", "yolov5s6", "yolov5m6", "yolov5l6", "yolov5x6"]