Spaces:

Realcat
/

image-matching-webui

Running

App Files Files Community

Realcat commited on May 5, 2024

Commit

10dcc2e

1 Parent(s): d21720c

add: COTR(https://github.com/ubc-vision/COTR)

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

README.md +1 -0
common/app_class.py +3 -1
common/config.yaml +10 -0
common/utils.py +33 -5
env-docker.txt +2 -2
hloc/match_dense.py +38 -0
hloc/matchers/cotr.py +77 -0
third_party/COTR/.gitignore +1 -0
third_party/COTR/COTR/cameras/camera_pose.py +164 -0
third_party/COTR/COTR/cameras/capture.py +432 -0
third_party/COTR/COTR/cameras/pinhole_camera.py +73 -0
third_party/COTR/COTR/datasets/colmap_helper.py +312 -0
third_party/COTR/COTR/datasets/cotr_dataset.py +243 -0
third_party/COTR/COTR/datasets/megadepth_dataset.py +140 -0
third_party/COTR/COTR/global_configs/__init__.py +10 -0
third_party/COTR/COTR/global_configs/commons.json +1 -0
third_party/COTR/COTR/global_configs/dataset_config.json +41 -0
third_party/COTR/COTR/inference/inference_helper.py +311 -0
third_party/COTR/COTR/inference/refinement_task.py +191 -0
third_party/COTR/COTR/inference/sparse_engine.py +427 -0
third_party/COTR/COTR/models/__init__.py +10 -0
third_party/COTR/COTR/models/backbone.py +135 -0
third_party/COTR/COTR/models/cotr_model.py +51 -0
third_party/COTR/COTR/models/misc.py +112 -0
third_party/COTR/COTR/models/position_encoding.py +83 -0
third_party/COTR/COTR/models/transformer.py +228 -0
third_party/COTR/COTR/options/options.py +52 -0
third_party/COTR/COTR/options/options_utils.py +108 -0
third_party/COTR/COTR/projector/pcd_projector.py +210 -0
third_party/COTR/COTR/sfm_scenes/knn_search.py +56 -0
third_party/COTR/COTR/sfm_scenes/sfm_scenes.py +87 -0
third_party/COTR/COTR/trainers/base_trainer.py +111 -0
third_party/COTR/COTR/trainers/cotr_trainer.py +200 -0
third_party/COTR/COTR/trainers/tensorboard_helper.py +97 -0
third_party/COTR/COTR/transformations/transform_basics.py +114 -0
third_party/COTR/COTR/transformations/transformations.py +1951 -0
third_party/COTR/COTR/utils/constants.py +3 -0
third_party/COTR/COTR/utils/debug_utils.py +15 -0
third_party/COTR/COTR/utils/utils.py +271 -0
third_party/COTR/LICENSE +201 -0
third_party/COTR/demo_face.py +69 -0
third_party/COTR/demo_guided_matching.py +85 -0
third_party/COTR/demo_homography.py +84 -0
third_party/COTR/demo_reconstruction.py +92 -0
third_party/COTR/demo_single_pair.py +66 -0
third_party/COTR/demo_wbs.py +71 -0
third_party/COTR/environment.yml +104 -0
third_party/COTR/out/.DS_Store +0 -0
third_party/COTR/out/.placeholder +0 -0
third_party/COTR/out/default/checkpoint.pth.tar +3 -0

README.md CHANGED Viewed

@@ -56,6 +56,7 @@ The tool currently supports various popular image matching algorithms, namely:
 - [x] [LANet](https://github.com/wangch-g/lanet), ACCV 2022
 - [ ] [LISRD](https://github.com/rpautrat/LISRD), ECCV 2022
 - [ ] [REKD](https://github.com/bluedream1121/REKD), CVPR 2022
 - [x] [ALIKE](https://github.com/Shiaoming/ALIKE), TMM 2022
 - [x] [RoRD](https://github.com/UditSinghParihar/RoRD), IROS 2021
 - [x] [SGMNet](https://github.com/vdvchen/SGMNet), ICCV 2021

 - [x] [LANet](https://github.com/wangch-g/lanet), ACCV 2022
 - [ ] [LISRD](https://github.com/rpautrat/LISRD), ECCV 2022
 - [ ] [REKD](https://github.com/bluedream1121/REKD), CVPR 2022
+- [x] [CoTR](https://github.com/ubc-vision/COTR), ICCV 2021
 - [x] [ALIKE](https://github.com/Shiaoming/ALIKE), TMM 2022
 - [x] [RoRD](https://github.com/UditSinghParihar/RoRD), IROS 2021
 - [x] [SGMNet](https://github.com/vdvchen/SGMNet), ICCV 2021

common/app_class.py CHANGED Viewed

@@ -300,6 +300,7 @@ class ImageMatchingApp:
                     fn=run_ransac,
                     inputs=[
                         state_cache,
                         ransac_method,
                         ransac_reproj_threshold,
                         ransac_confidence,
@@ -308,6 +309,7 @@ class ImageMatchingApp:
                     outputs=[
                         output_matches_ransac,
                         matches_result_info,
                     ],
                 )
@@ -457,7 +459,7 @@ class ImageMatchingApp:
             return gr.Markdown(markdown_table)
         elif style == "tab":
             for k, v in cfg.items():
-                if not v["info"]["display"]:
                     continue
                 data.append(
                     [

                     fn=run_ransac,
                     inputs=[
                         state_cache,
+                        choice_geometry_type,
                         ransac_method,
                         ransac_reproj_threshold,
                         ransac_confidence,
                     outputs=[
                         output_matches_ransac,
                         matches_result_info,
+                        output_wrapped,
                     ],
                 )
             return gr.Markdown(markdown_table)
         elif style == "tab":
             for k, v in cfg.items():
+                if not v["info"].get("display", True):
                     continue
                 data.append(
                     [

common/config.yaml CHANGED Viewed

@@ -46,6 +46,16 @@ matcher_zoo:
       paper: https://arxiv.org/pdf/2104.00680
       project: https://zju3dv.github.io/loftr
       display: true
   topicfm:
     matcher: topicfm
     dense: true

       paper: https://arxiv.org/pdf/2104.00680
       project: https://zju3dv.github.io/loftr
       display: true
+  cotr:
+    matcher: cotr
+    dense: true
+    info:
+      name: CoTR #dispaly name
+      source: "ICCV 2021"
+      github: https://github.com/ubc-vision/COTR
+      paper: https://arxiv.org/abs/2103.14167
+      project: null
+      display: true
   topicfm:
     matcher: topicfm
     dense: true

common/utils.py CHANGED Viewed

@@ -443,6 +443,7 @@ def generate_warp_images(
 def run_ransac(
     state_cache: Dict[str, Any],
     ransac_method: str = DEFAULT_RANSAC_METHOD,
     ransac_reproj_threshold: int = DEFAULT_RANSAC_REPROJ_THRESHOLD,
     ransac_confidence: float = DEFAULT_RANSAC_CONFIDENCE,
@@ -493,11 +494,32 @@ def run_ransac(
     )
     logger.info(f"Display matches done using: {time.time()-t1:.3f}s")
     t1 = time.time()
     num_matches_raw = state_cache["num_matches_raw"]
-    return output_matches_ransac, {
-        "num_matches_raw": num_matches_raw,
-        "num_matches_ransac": num_matches_ransac,
-    }
 def run_matching(
@@ -666,7 +688,13 @@ def run_matching(
     t1 = time.time()
     # plot wrapped images
-    geom_info = compute_geometry(pred)
     output_wrapped, _ = generate_warp_images(
         pred["image0_orig"],
         pred["image1_orig"],

 def run_ransac(
     state_cache: Dict[str, Any],
+    choice_geometry_type: str,
     ransac_method: str = DEFAULT_RANSAC_METHOD,
     ransac_reproj_threshold: int = DEFAULT_RANSAC_REPROJ_THRESHOLD,
     ransac_confidence: float = DEFAULT_RANSAC_CONFIDENCE,
     )
     logger.info(f"Display matches done using: {time.time()-t1:.3f}s")
     t1 = time.time()
+    # compute warp images
+    geom_info = compute_geometry(
+        state_cache,
+        ransac_method=ransac_method,
+        ransac_reproj_threshold=ransac_reproj_threshold,
+        ransac_confidence=ransac_confidence,
+        ransac_max_iter=ransac_max_iter,
+    )
+    output_wrapped, _ = generate_warp_images(
+        state_cache["image0_orig"],
+        state_cache["image1_orig"],
+        {"geom_info": geom_info},
+        choice_geometry_type,
+    )
+    plt.close("all")
     num_matches_raw = state_cache["num_matches_raw"]
+    return (
+        output_matches_ransac,
+        {
+            "num_matches_raw": num_matches_raw,
+            "num_matches_ransac": num_matches_ransac,
+        },
+        output_wrapped,
+    )
 def run_matching(
     t1 = time.time()
     # plot wrapped images
+    geom_info = compute_geometry(
+        pred,
+        ransac_method=ransac_method,
+        ransac_reproj_threshold=ransac_reproj_threshold,
+        ransac_confidence=ransac_confidence,
+        ransac_max_iter=ransac_max_iter,
+    )
     output_wrapped, _ = generate_warp_images(
         pred["image0_orig"],
         pred["image1_orig"],

env-docker.txt CHANGED Viewed

@@ -1,8 +1,8 @@
 e2cnn==0.2.3
 einops==0.6.1
 gdown==4.7.1
-gradio==3.41.2
-gradio_client==0.5.0
 h5py==3.9.0
 imageio==2.31.1
 Jinja2==3.1.2

 e2cnn==0.2.3
 einops==0.6.1
 gdown==4.7.1
+gradio==4.28.3
+gradio_client==0.16.0
 h5py==3.9.0
 imageio==2.31.1
 Jinja2==3.1.2

hloc/match_dense.py CHANGED Viewed

@@ -28,6 +28,44 @@ confs = {
         "max_error": 1,  # max error for assigned keypoints (in px)
         "cell_size": 1,  # size of quantization patch (max 1 kp/patch)
     },
     # Semi-scalable loftr which limits detected keypoints
     "loftr_aachen": {
         "output": "matches-loftr_aachen",

         "max_error": 1,  # max error for assigned keypoints (in px)
         "cell_size": 1,  # size of quantization patch (max 1 kp/patch)
     },
+    # "loftr_quadtree": {
+    #     "output": "matches-loftr-quadtree",
+    #     "model": {
+    #         "name": "quadtree",
+    #         "weights": "outdoor",
+    #         "max_keypoints": 2000,
+    #         "match_threshold": 0.2,
+    #     },
+    #     "preprocessing": {
+    #         "grayscale": True,
+    #         "resize_max": 1024,
+    #         "dfactor": 8,
+    #         "width": 640,
+    #         "height": 480,
+    #         "force_resize": True,
+    #     },
+    #     "max_error": 1,  # max error for assigned keypoints (in px)
+    #     "cell_size": 1,  # size of quantization patch (max 1 kp/patch)
+    # },
+    "cotr": {
+        "output": "matches-cotr",
+        "model": {
+            "name": "cotr",
+            "weights": "out/default",
+            "max_keypoints": 2000,
+            "match_threshold": 0.2,
+        },
+        "preprocessing": {
+            "grayscale": False,
+            "resize_max": 1024,
+            "dfactor": 8,
+            "width": 640,
+            "height": 480,
+            "force_resize": True,
+        },
+        "max_error": 1,  # max error for assigned keypoints (in px)
+        "cell_size": 1,  # size of quantization patch (max 1 kp/patch)
+    },
     # Semi-scalable loftr which limits detected keypoints
     "loftr_aachen": {
         "output": "matches-loftr_aachen",

hloc/matchers/cotr.py ADDED Viewed

	@@ -0,0 +1,77 @@

+import sys
+import argparse
+import torch
+import warnings
+import numpy as np
+from pathlib import Path
+from torchvision.transforms import ToPILImage
+from ..utils.base_model import BaseModel
+sys.path.append(str(Path(__file__).parent / "../../third_party/COTR"))
+from COTR.utils import utils as utils_cotr
+from COTR.models import build_model
+from COTR.options.options import *
+from COTR.options.options_utils import *
+from COTR.inference.inference_helper import triangulate_corr
+from COTR.inference.sparse_engine import SparseEngine
+utils_cotr.fix_randomness(0)
+torch.set_grad_enabled(False)
+cotr_path = Path(__file__).parent / "../../third_party/COTR"
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+class COTR(BaseModel):
+    default_conf = {
+        "weights": "out/default",
+        "match_threshold": 0.2,
+        "max_keypoints": -1,
+    }
+    required_inputs = ["image0", "image1"]
+    def _init(self, conf):
+        parser = argparse.ArgumentParser()
+        set_COTR_arguments(parser)
+        opt = parser.parse_args()
+        opt.command = " ".join(sys.argv)
+        opt.load_weights_path = str(
+            cotr_path / conf["weights"] / "checkpoint.pth.tar"
+        )
+        layer_2_channels = {
+            "layer1": 256,
+            "layer2": 512,
+            "layer3": 1024,
+            "layer4": 2048,
+        }
+        opt.dim_feedforward = layer_2_channels[opt.layer]
+        model = build_model(opt)
+        model = model.to(device)
+        weights = torch.load(opt.load_weights_path, map_location="cpu")[
+            "model_state_dict"
+        ]
+        utils_cotr.safe_load_weights(model, weights)
+        self.net = model.eval()
+        self.to_pil_func = ToPILImage(mode="RGB")
+    def _forward(self, data):
+        img_a = np.array(self.to_pil_func(data["image0"][0].cpu()))
+        img_b = np.array(self.to_pil_func(data["image1"][0].cpu()))
+        corrs = SparseEngine(
+            self.net, 32, mode="tile"
+        ).cotr_corr_multiscale_with_cycle_consistency(
+            img_a,
+            img_b,
+            np.linspace(0.5, 0.0625, 4),
+            1,
+            max_corrs=self.conf["max_keypoints"],
+            queries_a=None,
+        )
+        pred = {
+            "keypoints0": torch.from_numpy(corrs[:,:2]),
+            "keypoints1": torch.from_numpy(corrs[:,2:]),
+        }
+        return pred

third_party/COTR/.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ *.pyc

third_party/COTR/COTR/cameras/camera_pose.py ADDED Viewed

	@@ -0,0 +1,164 @@

+'''
+Extrinsic camera pose
+'''
+import math
+import copy
+import numpy as np
+from COTR.transformations import transformations
+from COTR.transformations.transform_basics import Translation, Rotation, UnstableRotation
+class CameraPose():
+    def __init__(self, t: Translation, r: Rotation):
+        '''
+        WARN: World 2 cam
+        Translation and rotation are world to camera
+        translation_vector is not the coordinate of the camera in world space.
+        '''
+        assert isinstance(t, Translation)
+        assert isinstance(r, Rotation) or isinstance(r, UnstableRotation)
+        self.t = t
+        self.r = r
+    def __str__(self):
+        string = f'center in world: {self.camera_center_in_world}, translation(w2c): {self.t}, rotation(w2c): {self.r}'
+        return string
+    @classmethod
+    def from_world_to_camera(cls, world_to_camera, unstable=False):
+        assert isinstance(world_to_camera, np.ndarray)
+        assert world_to_camera.shape == (4, 4)
+        vec = transformations.translation_from_matrix(world_to_camera).astype(np.float32)
+        t = Translation(vec)
+        if unstable:
+            r = UnstableRotation(world_to_camera)
+        else:
+            quat = transformations.quaternion_from_matrix(world_to_camera).astype(np.float32)
+            r = Rotation(quat)
+        return cls(t, r)
+    @classmethod
+    def from_camera_to_world(cls, camera_to_world, unstable=False):
+        assert isinstance(camera_to_world, np.ndarray)
+        assert camera_to_world.shape == (4, 4)
+        world_to_camera = np.linalg.inv(camera_to_world)
+        world_to_camera /= world_to_camera[3, 3]
+        return cls.from_world_to_camera(world_to_camera, unstable)
+    @classmethod
+    def from_pose_vector(cls, pose_vector):
+        t = Translation(pose_vector[:3])
+        r = Rotation(pose_vector[3:])
+        return cls(t, r)
+    @property
+    def translation_vector(self):
+        return self.t.translation_vector
+    @property
+    def translation_matrix(self):
+        return self.t.translation_matrix
+    @property
+    def quaternion(self):
+        '''
+        quaternion format (w, x, y, z)
+        '''
+        return self.r.quaternion
+    @property
+    def rotation_matrix(self):
+        return self.r.rotation_matrix
+    @property
+    def pose_vector(self):
+        '''
+        Pose vector is a concat of translation vector and quaternion vector
+        (X, Y, Z, w, x, y, z)
+        w2c
+        '''
+        return np.concatenate([self.translation_vector, self.quaternion])
+    @property
+    def inv_pose_vector(self):
+        inv_quat = transformations.quaternion_inverse(self.quaternion)
+        return np.concatenate([self.camera_center_in_world, inv_quat])
+    @property
+    def pose_vector_6_dof(self):
+        '''
+        Here we assuming the quaternion is normalized and we remove the W component
+        (X, Y, Z, x, y, z)
+        '''
+        return np.concatenate([self.translation_vector, self.quaternion[1:]])
+    @property
+    def world_to_camera(self):
+        M = np.matmul(self.translation_matrix, self.rotation_matrix)
+        M /= M[3, 3]
+        return M
+    @property
+    def world_to_camera_3x4(self):
+        M = self.world_to_camera
+        M = M[0:3, 0:4]
+        return M
+    @property
+    def extrinsic_mat(self):
+        return self.world_to_camera_3x4
+    @property
+    def camera_to_world(self):
+        M = np.linalg.inv(self.world_to_camera)
+        M /= M[3, 3]
+        return M
+    @property
+    def camera_to_world_3x4(self):
+        M = self.camera_to_world
+        M = M[0:3, 0:4]
+        return M
+    @property
+    def camera_center_in_world(self):
+        return self.camera_to_world[:3, 3]
+    @property
+    def forward(self):
+        return self.camera_to_world[:3, 2]
+    @property
+    def up(self):
+        return self.camera_to_world[:3, 1]
+    @property
+    def right(self):
+        return self.camera_to_world[:3, 0]
+    @property
+    def essential_matrix(self):
+        E = np.cross(self.rotation_matrix[:3, :3], self.camera_center_in_world)
+        return E / np.linalg.norm(E)
+def inverse_camera_pose(cam_pose: CameraPose):
+    return CameraPose.from_world_to_camera(np.linalg.inv(cam_pose.world_to_camera))
+def rotate_camera_pose(cam_pose, rot):
+    if rot == 0:
+        return copy.deepcopy(cam_pose)
+    else:
+        rot = rot / 180 * np.pi
+        sin_rot = np.sin(rot)
+        cos_rot = np.cos(rot)
+        rot_mat = np.stack([np.stack([cos_rot, -sin_rot, 0, 0], axis=-1),
+                            np.stack([sin_rot, cos_rot, 0, 0], axis=-1),
+                            np.stack([0, 0, 1, 0], axis=-1),
+                            np.stack([0, 0, 0, 1], axis=-1)], axis=1)
+        new_world2cam = np.matmul(rot_mat, cam_pose.world_to_camera)
+        return CameraPose.from_world_to_camera(new_world2cam)

third_party/COTR/COTR/cameras/capture.py ADDED Viewed

	@@ -0,0 +1,432 @@

+'''
+Capture from a pinhole camera
+Separate the captured content and the camera...
+'''
+import os
+import time
+import abc
+import copy
+import cv2
+import torch
+import numpy as np
+import imageio
+import PIL
+from PIL import Image
+from COTR.cameras.camera_pose import CameraPose, rotate_camera_pose
+from COTR.cameras.pinhole_camera import PinholeCamera, rotate_pinhole_camera, crop_pinhole_camera
+from COTR.utils import debug_utils, utils, constants
+from COTR.utils.utils import Point2D
+from COTR.projector import pcd_projector
+from COTR.utils.constants import MAX_SIZE
+from COTR.utils.utils import CropCamConfig
+def crop_center_max_xy(p2d, shape):
+    h, w = shape
+    crop_x = min(h, w)
+    crop_y = crop_x
+    start_x = w // 2 - crop_x // 2
+    start_y = h // 2 - crop_y // 2
+    mask = (p2d.xy[:, 0] > start_x) & (p2d.xy[:, 0] < start_x + crop_x) & (p2d.xy[:, 1] > start_y) & (p2d.xy[:, 1] < start_y + crop_y)
+    out_xy = (p2d.xy - [start_x, start_y])[mask]
+    out = Point2D(p2d.id_3d[mask], out_xy)
+    return out
+def crop_center_max(img):
+    if isinstance(img, torch.Tensor):
+        return crop_center_max_torch(img)
+    elif isinstance(img, np.ndarray):
+        return crop_center_max_np(img)
+    else:
+        raise ValueError
+def crop_center_max_torch(img):
+    if len(img.shape) == 2:
+        h, w = img.shape
+    elif len(img.shape) == 3:
+        c, h, w = img.shape
+    elif len(img.shape) == 4:
+        b, c, h, w = img.shape
+    else:
+        raise ValueError
+    crop_x = min(h, w)
+    crop_y = crop_x
+    start_x = w // 2 - crop_x // 2
+    start_y = h // 2 - crop_y // 2
+    if len(img.shape) == 2:
+        return img[start_y:start_y + crop_y, start_x:start_x + crop_x]
+    elif len(img.shape) in [3, 4]:
+        return img[..., start_y:start_y + crop_y, start_x:start_x + crop_x]
+def crop_center_max_np(img, return_starts=False):
+    if len(img.shape) == 2:
+        h, w = img.shape
+    elif len(img.shape) == 3:
+        h, w, c = img.shape
+    elif len(img.shape) == 4:
+        b, h, w, c = img.shape
+    else:
+        raise ValueError
+    crop_x = min(h, w)
+    crop_y = crop_x
+    start_x = w // 2 - crop_x // 2
+    start_y = h // 2 - crop_y // 2
+    if len(img.shape) == 2:
+        canvas = img[start_y:start_y + crop_y, start_x:start_x + crop_x]
+    elif len(img.shape) == 3:
+        canvas = img[start_y:start_y + crop_y, start_x:start_x + crop_x, :]
+    elif len(img.shape) == 4:
+        canvas = img[:, start_y:start_y + crop_y, start_x:start_x + crop_x, :]
+    if return_starts:
+        return canvas, -start_x, -start_y
+    else:
+        return canvas
+def pad_to_square_np(img, till_divisible_by=1, return_starts=False):
+    if len(img.shape) == 2:
+        h, w = img.shape
+    elif len(img.shape) == 3:
+        h, w, c = img.shape
+    elif len(img.shape) == 4:
+        b, h, w, c = img.shape
+    else:
+        raise ValueError
+    if till_divisible_by == 1:
+        size = max(h, w)
+    else:
+        size = (max(h, w) + till_divisible_by) - (max(h, w) % till_divisible_by)
+    start_x = size // 2 - w // 2
+    start_y = size // 2 - h // 2
+    if len(img.shape) == 2:
+        canvas = np.zeros([size, size], dtype=img.dtype)
+        canvas[start_y:start_y + h, start_x:start_x + w] = img
+    elif len(img.shape) == 3:
+        canvas = np.zeros([size, size, c], dtype=img.dtype)
+        canvas[start_y:start_y + h, start_x:start_x + w, :] = img
+    elif len(img.shape) == 4:
+        canvas = np.zeros([b, size, size, c], dtype=img.dtype)
+        canvas[:, start_y:start_y + h, start_x:start_x + w, :] = img
+    if return_starts:
+        return canvas, start_x, start_y
+    else:
+        return canvas
+def stretch_to_square_np(img):
+    size = max(*img.shape[:2])
+    return np.array(PIL.Image.fromarray(img).resize((size, size), resample=PIL.Image.BILINEAR))
+def rotate_image(image, angle, interpolation=cv2.INTER_LINEAR):
+    image_center = tuple(np.array(image.shape[1::-1]) / 2)
+    rot_mat = cv2.getRotationMatrix2D(image_center, angle, 1.0)
+    result = cv2.warpAffine(image, rot_mat, image.shape[1::-1], flags=interpolation)
+    return result
+def read_array(path):
+    '''
+    https://github.com/colmap/colmap/blob/dev/scripts/python/read_dense.py
+    '''
+    with open(path, "rb") as fid:
+        width, height, channels = np.genfromtxt(fid, delimiter="&", max_rows=1,
+                                                usecols=(0, 1, 2), dtype=int)
+        fid.seek(0)
+        num_delimiter = 0
+        byte = fid.read(1)
+        while True:
+            if byte == b"&":
+                num_delimiter += 1
+                if num_delimiter >= 3:
+                    break
+            byte = fid.read(1)
+        array = np.fromfile(fid, np.float32)
+    array = array.reshape((width, height, channels), order="F")
+    return np.transpose(array, (1, 0, 2)).squeeze()
+################ Content ################
+class CapturedContent(abc.ABC):
+    def __init__(self):
+        self._rotation = 0
+    @property
+    def rotation(self):
+        return self._rotation
+    @rotation.setter
+    def rotation(self, rot):
+        self._rotation = rot
+class CapturedImage(CapturedContent):
+    def __init__(self, img_path, crop_cam, pinhole_cam_before=None):
+        super(CapturedImage, self).__init__()
+        assert os.path.isfile(img_path), 'file does not exist: {0}'.format(img_path)
+        self.crop_cam = crop_cam
+        self._image = None
+        self.img_path = img_path
+        self.pinhole_cam_before = pinhole_cam_before
+        self._p2d = None
+    def read_image_to_ram(self) -> int:
+        # raise NotImplementedError
+        assert self._image is None
+        _image = self.image
+        self._image = _image
+        return self._image.nbytes
+    @property
+    def image(self):
+        if self._image is not None:
+            _image = self._image
+        else:
+            _image = imageio.imread(self.img_path, pilmode='RGB')
+            if self.rotation != 0:
+                _image = rotate_image(_image, self.rotation)
+            if _image.shape[:2] != self.pinhole_cam_before.shape:
+                _image = np.array(PIL.Image.fromarray(_image).resize(self.pinhole_cam_before.shape[::-1], resample=PIL.Image.BILINEAR))
+                assert _image.shape[:2] == self.pinhole_cam_before.shape
+            if self.crop_cam == 'no_crop':
+                pass
+            elif self.crop_cam == 'crop_center':
+                _image = crop_center_max(_image)
+            elif self.crop_cam == 'crop_center_and_resize':
+                _image = crop_center_max(_image)
+                _image = np.array(PIL.Image.fromarray(_image).resize((MAX_SIZE, MAX_SIZE), resample=PIL.Image.BILINEAR))
+            elif isinstance(self.crop_cam, CropCamConfig):
+                assert _image.shape[0] == self.crop_cam.orig_h
+                assert _image.shape[1] == self.crop_cam.orig_w
+                _image = _image[self.crop_cam.y:self.crop_cam.y + self.crop_cam.h,
+                                self.crop_cam.x:self.crop_cam.x + self.crop_cam.w, ]
+                _image = np.array(PIL.Image.fromarray(_image).resize((self.crop_cam.out_w, self.crop_cam.out_h), resample=PIL.Image.BILINEAR))
+                assert _image.shape[:2] == (self.crop_cam.out_h, self.crop_cam.out_w)
+            else:
+                raise ValueError()
+        return _image
+    @property
+    def p2d(self):
+        if self._p2d is None:
+            return self._p2d
+        else:
+            _p2d = self._p2d
+            if self.crop_cam == 'no_crop':
+                pass
+            elif self.crop_cam == 'crop_center':
+                _p2d = crop_center_max_xy(_p2d, self.pinhole_cam_before.shape)
+            else:
+                raise ValueError()
+        return _p2d
+    @p2d.setter
+    def p2d(self, value):
+        if value is not None:
+            assert isinstance(value, Point2D)
+        self._p2d = value
+class CapturedDepth(CapturedContent):
+    def __init__(self, depth_path, crop_cam, pinhole_cam_before=None):
+        super(CapturedDepth, self).__init__()
+        if not depth_path.endswith('dummy'):
+            assert os.path.isfile(depth_path), 'file does not exist: {0}'.format(depth_path)
+        self.crop_cam = crop_cam
+        self._depth = None
+        self.depth_path = depth_path
+        self.pinhole_cam_before = pinhole_cam_before
+    def read_depth(self):
+        import tables
+        if self.depth_path.endswith('dummy'):
+            image_path = self.depth_path[:-5]
+            w, h = Image.open(image_path).size
+            _depth = np.zeros([h, w], dtype=np.float32)
+        elif self.depth_path.endswith('.h5'):
+            depth_h5 = tables.open_file(self.depth_path, mode='r')
+            _depth = np.array(depth_h5.root.depth)
+            depth_h5.close()
+        else:
+            raise ValueError
+        return _depth.astype(np.float32)
+    def read_depth_to_ram(self) -> int:
+        # raise NotImplementedError
+        assert self._depth is None
+        _depth = self.depth_map
+        self._depth = _depth
+        return self._depth.nbytes
+    @property
+    def depth_map(self):
+        if self._depth is not None:
+            _depth = self._depth
+        else:
+            _depth = self.read_depth()
+            if self.rotation != 0:
+                _depth = rotate_image(_depth, self.rotation, interpolation=cv2.INTER_NEAREST)
+            if _depth.shape != self.pinhole_cam_before.shape:
+                _depth = np.array(PIL.Image.fromarray(_depth).resize(self.pinhole_cam_before.shape[::-1], resample=PIL.Image.NEAREST))
+                assert _depth.shape[:2] == self.pinhole_cam_before.shape
+            if self.crop_cam == 'no_crop':
+                pass
+            elif self.crop_cam == 'crop_center':
+                _depth = crop_center_max(_depth)
+            elif self.crop_cam == 'crop_center_and_resize':
+                _depth = crop_center_max(_depth)
+                _depth = np.array(PIL.Image.fromarray(_depth).resize((MAX_SIZE, MAX_SIZE), resample=PIL.Image.NEAREST))
+            elif isinstance(self.crop_cam, CropCamConfig):
+                assert _depth.shape[0] == self.crop_cam.orig_h
+                assert _depth.shape[1] == self.crop_cam.orig_w
+                _depth = _depth[self.crop_cam.y:self.crop_cam.y + self.crop_cam.h,
+                                self.crop_cam.x:self.crop_cam.x + self.crop_cam.w, ]
+                _depth = np.array(PIL.Image.fromarray(_depth).resize((self.crop_cam.out_w, self.crop_cam.out_h), resample=PIL.Image.NEAREST))
+                assert _depth.shape[:2] == (self.crop_cam.out_h, self.crop_cam.out_w)
+            else:
+                raise ValueError()
+        assert (_depth >= 0).all()
+        return _depth
+################ Pinhole Capture ################
+class BasePinholeCapture():
+    def __init__(self, pinhole_cam, cam_pose, crop_cam):
+        self.crop_cam = crop_cam
+        self.cam_pose = cam_pose
+        # modify the camera instrinsics
+        self.pinhole_cam = crop_pinhole_camera(pinhole_cam, crop_cam)
+        self.pinhole_cam_before = pinhole_cam
+    def __str__(self):
+        string = 'pinhole camera: {0}\ncamera pose: {1}'.format(self.pinhole_cam, self.cam_pose)
+        return string
+    @property
+    def intrinsic_mat(self):
+        return self.pinhole_cam.intrinsic_mat
+    @property
+    def extrinsic_mat(self):
+        return self.cam_pose.extrinsic_mat
+    @property
+    def shape(self):
+        return self.pinhole_cam.shape
+    @property
+    def size(self):
+        return self.shape
+    @property
+    def mvp_mat(self):
+        '''
+        model-view-projection matrix (naming from opengl)
+        '''
+        return np.matmul(self.pinhole_cam.intrinsic_mat, self.cam_pose.world_to_camera_3x4)
+class RGBPinholeCapture(BasePinholeCapture):
+    def __init__(self, img_path, pinhole_cam, cam_pose, crop_cam):
+        BasePinholeCapture.__init__(self, pinhole_cam, cam_pose, crop_cam)
+        self.captured_image = CapturedImage(img_path, crop_cam, self.pinhole_cam_before)
+    def read_image_to_ram(self) -> int:
+        return self.captured_image.read_image_to_ram()
+    @property
+    def img_path(self):
+        return self.captured_image.img_path
+    @property
+    def image(self):
+        _image = self.captured_image.image
+        assert _image.shape[0:2] == self.pinhole_cam.shape, 'image shape: {0}, pinhole camera: {1}'.format(_image.shape, self.pinhole_cam)
+        return _image
+    @property
+    def seq_id(self):
+        return os.path.dirname(self.captured_image.img_path)
+    @property
+    def p2d(self):
+        return self.captured_image.p2d
+    @p2d.setter
+    def p2d(self, value):
+        self.captured_image.p2d = value
+class DepthPinholeCapture(BasePinholeCapture):
+    def __init__(self, depth_path, pinhole_cam, cam_pose, crop_cam):
+        BasePinholeCapture.__init__(self, pinhole_cam, cam_pose, crop_cam)
+        self.captured_depth = CapturedDepth(depth_path, crop_cam, self.pinhole_cam_before)
+    def read_depth_to_ram(self) -> int:
+        return self.captured_depth.read_depth_to_ram()
+    @property
+    def depth_path(self):
+        return self.captured_depth.depth_path
+    @property
+    def depth_map(self):
+        _depth = self.captured_depth.depth_map
+        # if self.pinhole_cam.shape != _depth.shape:
+        #     _depth = misc.imresize(_depth, self.pinhole_cam.shape, interp='nearest', mode='F')
+        assert (_depth >= 0).all()
+        return _depth
+    @property
+    def point_cloud_world(self):
+        return self.get_point_cloud_world_from_depth(feat_map=None)
+    def get_point_cloud_world_from_depth(self, feat_map=None):
+        _pcd = pcd_projector.PointCloudProjector.img_2d_to_pcd_3d_np(self.depth_map, self.pinhole_cam.intrinsic_mat, img=feat_map, motion=self.cam_pose.camera_to_world).astype(constants.DEFAULT_PRECISION)
+        return _pcd
+class RGBDPinholeCapture(RGBPinholeCapture, DepthPinholeCapture):
+    def __init__(self, img_path, depth_path, pinhole_cam, cam_pose, crop_cam):
+        RGBPinholeCapture.__init__(self, img_path, pinhole_cam, cam_pose, crop_cam)
+        DepthPinholeCapture.__init__(self, depth_path, pinhole_cam, cam_pose, crop_cam)
+    @property
+    def point_cloud_w_rgb_world(self):
+        return self.get_point_cloud_world_from_depth(feat_map=self.image)
+def rotate_capture(cap, rot):
+    if rot == 0:
+        return copy.deepcopy(cap)
+    else:
+        rot_pose = rotate_camera_pose(cap.cam_pose, rot)
+        rot_cap = copy.deepcopy(cap)
+        rot_cap.cam_pose = rot_pose
+        if hasattr(rot_cap, 'captured_image'):
+            rot_cap.captured_image.rotation = rot
+        if hasattr(rot_cap, 'captured_depth'):
+            rot_cap.captured_depth.rotation = rot
+        return rot_cap
+def crop_capture(cap, crop_cam):
+    if isinstance(cap, RGBDPinholeCapture):
+        cropped_cap = RGBDPinholeCapture(cap.img_path, cap.depth_path, cap.pinhole_cam, cap.cam_pose, crop_cam)
+    elif isinstance(cap, RGBPinholeCapture):
+        cropped_cap = RGBPinholeCapture(cap.img_path, cap.pinhole_cam, cap.cam_pose, crop_cam)
+    else:
+        raise ValueError
+    if hasattr(cropped_cap, 'captured_image'):
+        cropped_cap.captured_image.rotation = cap.captured_image.rotation
+    if hasattr(cropped_cap, 'captured_depth'):
+        cropped_cap.captured_depth.rotation = cap.captured_depth.rotation
+    return cropped_cap

third_party/COTR/COTR/cameras/pinhole_camera.py ADDED Viewed

	@@ -0,0 +1,73 @@

+"""
+Static pinhole camera
+"""
+import copy
+import numpy as np
+from COTR.utils import constants
+from COTR.utils.constants import MAX_SIZE
+from COTR.utils.utils import CropCamConfig
+class PinholeCamera():
+    def __init__(self, width, height, fx, fy, cx, cy):
+        self.width = int(width)
+        self.height = int(height)
+        self.fx = fx
+        self.fy = fy
+        self.cx = cx
+        self.cy = cy
+    def __str__(self):
+        string = 'width: {0}, height: {1}, fx: {2}, fy: {3}, cx: {4}, cy: {5}'.format(self.width, self.height, self.fx, self.fy, self.cx, self.cy)
+        return string
+    @property
+    def shape(self):
+        return (self.height, self.width)
+    @property
+    def intrinsic_mat(self):
+        mat = np.array([[self.fx, 0.0, self.cx],
+                        [0.0, self.fy, self.cy],
+                        [0.0, 0.0, 1.0]], dtype=constants.DEFAULT_PRECISION)
+        return mat
+def rotate_pinhole_camera(cam, rot):
+    assert 0, 'TODO: Camera should stay the same while rotation'
+    assert rot in [0, 90, 180, 270], 'only support 0/90/180/270 degrees rotation'
+    if rot in [0, 180]:
+        return copy.deepcopy(cam)
+    elif rot in [90, 270]:
+        return PinholeCamera(width=cam.height, height=cam.width, fx=cam.fy, fy=cam.fx, cx=cam.cy, cy=cam.cx)
+    else:
+        raise NotImplementedError
+def crop_pinhole_camera(pinhole_cam, crop_cam):
+    if crop_cam == 'no_crop':
+        cropped_pinhole_cam = pinhole_cam
+    elif crop_cam == 'crop_center':
+        _h = _w = min(*pinhole_cam.shape)
+        _cx = _cy = _h / 2
+        cropped_pinhole_cam = PinholeCamera(_w, _h, pinhole_cam.fx, pinhole_cam.fy, _cx, _cy)
+    elif crop_cam == 'crop_center_and_resize':
+        _h = _w = MAX_SIZE
+        _cx = _cy = MAX_SIZE / 2
+        scale = MAX_SIZE / min(*pinhole_cam.shape)
+        cropped_pinhole_cam = PinholeCamera(_w, _h, pinhole_cam.fx * scale, pinhole_cam.fy * scale, _cx, _cy)
+    elif isinstance(crop_cam, CropCamConfig):
+        scale = crop_cam.out_h / crop_cam.h
+        cropped_pinhole_cam = PinholeCamera(crop_cam.out_w,
+                                            crop_cam.out_h,
+                                            pinhole_cam.fx * scale,
+                                            pinhole_cam.fy * scale,
+                                            (pinhole_cam.cx - crop_cam.x) * scale,
+                                            (pinhole_cam.cy - crop_cam.y) * scale
+                                            )
+    else:
+        raise ValueError
+    return cropped_pinhole_cam

third_party/COTR/COTR/datasets/colmap_helper.py ADDED Viewed

	@@ -0,0 +1,312 @@

+import sys
+assert sys.version_info >= (3, 7), 'ordered dict is required'
+import os
+import re
+from collections import namedtuple
+import json
+import numpy as np
+from tqdm import tqdm
+from COTR.utils import debug_utils
+from COTR.cameras.pinhole_camera import PinholeCamera
+from COTR.cameras.camera_pose import CameraPose
+from COTR.cameras.capture import RGBPinholeCapture, RGBDPinholeCapture
+from COTR.cameras import capture
+from COTR.transformations import transformations
+from COTR.transformations.transform_basics import Translation, Rotation
+from COTR.sfm_scenes import sfm_scenes
+from COTR.global_configs import dataset_config
+from COTR.utils.utils import Point2D, Point3D
+ImageMeta = namedtuple('ImageMeta', ['image_id', 'r', 't', 'camera_id', 'image_path', 'point3d_id', 'p2d'])
+COVISIBILITY_CHECK = False
+LOAD_PCD = False
+class ColmapAsciiReader():
+    def __init__(self):
+        pass
+    @classmethod
+    def read_sfm_scene(cls, scene_dir, images_dir, crop_cam):
+        point_cloud_path = os.path.join(scene_dir, 'points3D.txt')
+        cameras_path = os.path.join(scene_dir, 'cameras.txt')
+        images_path = os.path.join(scene_dir, 'images.txt')
+        captures = cls.read_captures(images_path, cameras_path, images_dir, crop_cam)
+        if LOAD_PCD:
+            point_cloud = cls.read_point_cloud(point_cloud_path)
+        else:
+            point_cloud = None
+        sfm_scene = sfm_scenes.SfmScene(captures, point_cloud)
+        return sfm_scene
+    @staticmethod
+    def read_point_cloud(points_txt_path):
+        with open(points_txt_path, "r") as fid:
+            line = fid.readline()
+            assert line == '# 3D point list with one line of data per point:\n'
+            line = fid.readline()
+            assert line == '#   POINT3D_ID, X, Y, Z, R, G, B, ERROR, TRACK[] as (IMAGE_ID, POINT2D_IDX)\n'
+            line = fid.readline()
+            assert re.search('^# Number of points: \d+, mean track length: [-+]?\d*\.\d+|\d+\n$', line)
+            num_points, mean_track_length = re.findall(r"[-+]?\d*\.\d+|\d+", line)
+            num_points = int(num_points)
+            mean_track_length = float(mean_track_length)
+            xyz = np.zeros((num_points, 3), dtype=np.float32)
+            rgb = np.zeros((num_points, 3), dtype=np.float32)
+            if COVISIBILITY_CHECK:
+                point_meta = {}
+            for i in tqdm(range(num_points), desc='reading point cloud'):
+                elems = fid.readline().split()
+                xyz[i] = list(map(float, elems[1:4]))
+                rgb[i] = list(map(int, elems[4:7]))
+                if COVISIBILITY_CHECK:
+                    point_id = int(elems[0])
+                    image_ids = np.array(tuple(map(int, elems[8::2])))
+                    point_meta[point_id] = Point3D(id=point_id,
+                                                   arr_idx=i,
+                                                   image_ids=image_ids)
+            pcd = np.concatenate([xyz, rgb], axis=1)
+        if COVISIBILITY_CHECK:
+            return pcd, point_meta
+        else:
+            return pcd
+    @classmethod
+    def read_captures(cls, images_txt_path, cameras_txt_path, images_dir, crop_cam):
+        captures = []
+        cameras = cls.read_cameras(cameras_txt_path)
+        images_meta = cls.read_images_meta(images_txt_path, images_dir)
+        for key in images_meta.keys():
+            cur_cam_id = images_meta[key].camera_id
+            cur_cam = cameras[cur_cam_id]
+            cur_camera_pose = CameraPose(images_meta[key].t, images_meta[key].r)
+            cur_image_path = images_meta[key].image_path
+            cap = RGBPinholeCapture(cur_image_path, cur_cam, cur_camera_pose, crop_cam)
+            captures.append(cap)
+        return captures
+    @classmethod
+    def read_cameras(cls, cameras_txt_path):
+        cameras = {}
+        with open(cameras_txt_path, "r") as fid:
+            line = fid.readline()
+            assert line == '# Camera list with one line of data per camera:\n'
+            line = fid.readline()
+            assert line == '#   CAMERA_ID, MODEL, WIDTH, HEIGHT, PARAMS[]\n'
+            line = fid.readline()
+            assert re.search('^# Number of cameras: \d+\n$', line)
+            num_cams = int(re.findall(r"[-+]?\d*\.\d+|\d+", line)[0])
+            for _ in tqdm(range(num_cams), desc='reading cameras'):
+                elems = fid.readline().split()
+                camera_id = int(elems[0])
+                camera_type = elems[1]
+                if camera_type == "PINHOLE":
+                    width, height, focal_length_x, focal_length_y, cx, cy = list(map(float, elems[2:8]))
+                else:
+                    raise ValueError('Please rectify the 3D model to pinhole camera.')
+                cur_cam = PinholeCamera(width, height, focal_length_x, focal_length_y, cx, cy)
+                assert camera_id not in cameras
+                cameras[camera_id] = cur_cam
+        return cameras
+    @classmethod
+    def read_images_meta(cls, images_txt_path, images_dir):
+        images_meta = {}
+        with open(images_txt_path, "r") as fid:
+            line = fid.readline()
+            assert line == '# Image list with two lines of data per image:\n'
+            line = fid.readline()
+            assert line == '#   IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME\n'
+            line = fid.readline()
+            assert line == '#   POINTS2D[] as (X, Y, POINT3D_ID)\n'
+            line = fid.readline()
+            assert re.search('^# Number of images: \d+, mean observations per image: [-+]?\d*\.\d+|\d+\n$', line)
+            num_images, mean_ob_per_img = re.findall(r"[-+]?\d*\.\d+|\d+", line)
+            num_images = int(num_images)
+            mean_ob_per_img = float(mean_ob_per_img)
+            for _ in tqdm(range(num_images), desc='reading images meta'):
+                elems = fid.readline().split()
+                assert len(elems) == 10
+                image_path = os.path.join(images_dir, elems[9])
+                assert os.path.isfile(image_path)
+                image_id = int(elems[0])
+                qw, qx, qy, qz, tx, ty, tz = list(map(float, elems[1:8]))
+                t = Translation(np.array([tx, ty, tz], dtype=np.float32))
+                r = Rotation(np.array([qw, qx, qy, qz], dtype=np.float32))
+                camera_id = int(elems[8])
+                assert image_id not in images_meta
+                line = fid.readline()
+                if COVISIBILITY_CHECK:
+                    elems = line.split()
+                    elems = list(map(float, elems))
+                    elems = np.array(elems).reshape(-1, 3)
+                    point3d_id = set(elems[elems[:, 2] != -1][:, 2].astype(np.int))
+                    point3d_id = np.sort(np.array(list(point3d_id)))
+                    xyi = elems[elems[:, 2] != -1]
+                    xy = xyi[:, :2]
+                    idx = xyi[:, 2].astype(np.int)
+                    p2d = Point2D(idx, xy)
+                else:
+                    point3d_id = None
+                    p2d = None
+                images_meta[image_id] = ImageMeta(image_id, r, t, camera_id, image_path, point3d_id, p2d)
+        return images_meta
+class ColmapWithDepthAsciiReader(ColmapAsciiReader):
+    '''
+    Not all images have usable depth estimate from colmap.
+    A valid list is needed.
+    '''
+    @classmethod
+    def read_sfm_scene(cls, scene_dir, images_dir, depth_dir, crop_cam):
+        point_cloud_path = os.path.join(scene_dir, 'points3D.txt')
+        cameras_path = os.path.join(scene_dir, 'cameras.txt')
+        images_path = os.path.join(scene_dir, 'images.txt')
+        captures = cls.read_captures(images_path, cameras_path, images_dir, depth_dir, crop_cam)
+        if LOAD_PCD:
+            point_cloud = cls.read_point_cloud(point_cloud_path)
+        else:
+            point_cloud = None
+        sfm_scene = sfm_scenes.SfmScene(captures, point_cloud)
+        return sfm_scene
+    @classmethod
+    def read_sfm_scene_given_valid_list_path(cls, scene_dir, images_dir, depth_dir, valid_list_json_path, crop_cam):
+        point_cloud_path = os.path.join(scene_dir, 'points3D.txt')
+        cameras_path = os.path.join(scene_dir, 'cameras.txt')
+        images_path = os.path.join(scene_dir, 'images.txt')
+        valid_list = cls.read_valid_list(valid_list_json_path)
+        captures = cls.read_captures_with_depth_given_valid_list(images_path, cameras_path, images_dir, depth_dir, valid_list, crop_cam)
+        if LOAD_PCD:
+            point_cloud = cls.read_point_cloud(point_cloud_path)
+        else:
+            point_cloud = None
+        sfm_scene = sfm_scenes.SfmScene(captures, point_cloud)
+        return sfm_scene
+    @classmethod
+    def read_captures(cls, images_txt_path, cameras_txt_path, images_dir, depth_dir, crop_cam):
+        captures = []
+        cameras = cls.read_cameras(cameras_txt_path)
+        images_meta = cls.read_images_meta(images_txt_path, images_dir)
+        for key in images_meta.keys():
+            cur_cam_id = images_meta[key].camera_id
+            cur_cam = cameras[cur_cam_id]
+            cur_camera_pose = CameraPose(images_meta[key].t, images_meta[key].r)
+            cur_image_path = images_meta[key].image_path
+            try:
+                cur_depth_path = cls.image_path_2_depth_path(cur_image_path[len(images_dir) + 1:], depth_dir)
+            except:
+                print('{0} does not have depth at {1}'.format(cur_image_path, depth_dir))
+                # TODO
+                # continue
+                # exec(debug_utils.embed_breakpoint())
+                cur_depth_path = f'{cur_image_path}dummy'
+            cap = RGBDPinholeCapture(cur_image_path, cur_depth_path, cur_cam, cur_camera_pose, crop_cam)
+            cap.point3d_id = images_meta[key].point3d_id
+            cap.p2d = images_meta[key].p2d
+            cap.image_id = key
+            captures.append(cap)
+        return captures
+    @classmethod
+    def read_captures_with_depth_given_valid_list(cls, images_txt_path, cameras_txt_path, images_dir, depth_dir, valid_list, crop_cam):
+        captures = []
+        cameras = cls.read_cameras(cameras_txt_path)
+        images_meta = cls.read_images_meta_given_valid_list(images_txt_path, images_dir, valid_list)
+        for key in images_meta.keys():
+            cur_cam_id = images_meta[key].camera_id
+            cur_cam = cameras[cur_cam_id]
+            cur_camera_pose = CameraPose(images_meta[key].t, images_meta[key].r)
+            cur_image_path = images_meta[key].image_path
+            try:
+                cur_depth_path = cls.image_path_2_depth_path(cur_image_path, depth_dir)
+            except:
+                print('{0} does not have depth at {1}'.format(cur_image_path, depth_dir))
+                continue
+            cap = RGBDPinholeCapture(cur_image_path, cur_depth_path, cur_cam, cur_camera_pose, crop_cam)
+            cap.point3d_id = images_meta[key].point3d_id
+            cap.p2d = images_meta[key].p2d
+            cap.image_id = key
+            captures.append(cap)
+        return captures
+    @classmethod
+    def read_images_meta_given_valid_list(cls, images_txt_path, images_dir, valid_list):
+        images_meta = {}
+        with open(images_txt_path, "r") as fid:
+            line = fid.readline()
+            assert line == '# Image list with two lines of data per image:\n'
+            line = fid.readline()
+            assert line == '#   IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME\n'
+            line = fid.readline()
+            assert line == '#   POINTS2D[] as (X, Y, POINT3D_ID)\n'
+            line = fid.readline()
+            assert re.search('^# Number of images: \d+, mean observations per image:[-+]?\d*\.\d+|\d+\n$', line), line
+            num_images, mean_ob_per_img = re.findall(r"[-+]?\d*\.\d+|\d+", line)
+            num_images = int(num_images)
+            mean_ob_per_img = float(mean_ob_per_img)
+            for _ in tqdm(range(num_images), desc='reading images meta'):
+                elems = fid.readline().split()
+                assert len(elems) == 10
+                line = fid.readline()
+                image_path = os.path.join(images_dir, elems[9])
+                prefix = os.path.abspath(os.path.join(image_path, '../../../../')) + '/'
+                rel_image_path = image_path.replace(prefix, '')
+                if rel_image_path not in valid_list:
+                    continue
+                assert os.path.isfile(image_path), '{0} is not existing'.format(image_path)
+                image_id = int(elems[0])
+                qw, qx, qy, qz, tx, ty, tz = list(map(float, elems[1:8]))
+                t = Translation(np.array([tx, ty, tz], dtype=np.float32))
+                r = Rotation(np.array([qw, qx, qy, qz], dtype=np.float32))
+                camera_id = int(elems[8])
+                assert image_id not in images_meta
+                if COVISIBILITY_CHECK:
+                    elems = line.split()
+                    elems = list(map(float, elems))
+                    elems = np.array(elems).reshape(-1, 3)
+                    point3d_id = set(elems[elems[:, 2] != -1][:, 2].astype(np.int))
+                    point3d_id = np.sort(np.array(list(point3d_id)))
+                    xyi = elems[elems[:, 2] != -1]
+                    xy = xyi[:, :2]
+                    idx = xyi[:, 2].astype(np.int)
+                    p2d = Point2D(idx, xy)
+                else:
+                    point3d_id = None
+                    p2d = None
+                images_meta[image_id] = ImageMeta(image_id, r, t, camera_id, image_path, point3d_id, p2d)
+        return images_meta
+    @classmethod
+    def read_valid_list(cls, valid_list_json_path):
+        assert os.path.isfile(valid_list_json_path), valid_list_json_path
+        with open(valid_list_json_path, 'r') as f:
+            valid_list = json.load(f)
+        assert len(valid_list) == len(set(valid_list))
+        return set(valid_list)
+    @classmethod
+    def image_path_2_depth_path(cls, image_path, depth_dir):
+        depth_file = os.path.splitext(os.path.basename(image_path))[0] + '.h5'
+        depth_path = os.path.join(depth_dir, depth_file)
+        if not os.path.isfile(depth_path):
+            # depth_file = image_path + '.photometric.bin'
+            depth_file = image_path + '.geometric.bin'
+            depth_path = os.path.join(depth_dir, depth_file)
+        assert os.path.isfile(depth_path), '{0} is not file'.format(depth_path)
+        return depth_path

third_party/COTR/COTR/datasets/cotr_dataset.py ADDED Viewed

	@@ -0,0 +1,243 @@

+'''
+COTR dataset
+'''
+import random
+import numpy as np
+import torch
+from torchvision.transforms import functional as tvtf
+from torch.utils import data
+from COTR.datasets import megadepth_dataset
+from COTR.utils import debug_utils, utils, constants
+from COTR.projector import pcd_projector
+from COTR.cameras import capture
+from COTR.utils.utils import CropCamConfig
+from COTR.inference import inference_helper
+from COTR.inference.inference_helper import two_images_side_by_side
+class COTRDataset(data.Dataset):
+    def __init__(self, opt, dataset_type: str):
+        assert dataset_type in ['train', 'val', 'test']
+        assert len(opt.scenes_name_list) > 0
+        self.opt = opt
+        self.dataset_type = dataset_type
+        self.sfm_dataset = megadepth_dataset.MegadepthDataset(opt, dataset_type)
+        self.kp_pool = opt.kp_pool
+        self.num_kp = opt.num_kp
+        self.bidirectional = opt.bidirectional
+        self.need_rotation = opt.need_rotation
+        self.max_rotation = opt.max_rotation
+        self.rotation_chance = opt.rotation_chance
+    def _trim_corrs(self, in_corrs):
+        length = in_corrs.shape[0]
+        if length >= self.num_kp:
+            mask = np.random.choice(length, self.num_kp)
+            return in_corrs[mask]
+        else:
+            mask = np.random.choice(length, self.num_kp - length)
+            return np.concatenate([in_corrs, in_corrs[mask]], axis=0)
+    def __len__(self):
+        if self.dataset_type == 'val':
+            return min(1000, self.sfm_dataset.num_queries)
+        else:
+            return self.sfm_dataset.num_queries
+    def augment_with_rotation(self, query_cap, nn_cap):
+        if random.random() < self.rotation_chance:
+            theta = np.random.uniform(low=-1, high=1) * self.max_rotation
+            query_cap = capture.rotate_capture(query_cap, theta)
+        if random.random() < self.rotation_chance:
+            theta = np.random.uniform(low=-1, high=1) * self.max_rotation
+            nn_cap = capture.rotate_capture(nn_cap, theta)
+        return query_cap, nn_cap
+    def __getitem__(self, index):
+        assert self.opt.k_size == 1
+        query_cap, nn_caps = self.sfm_dataset.get_query_with_knn(index)
+        nn_cap = nn_caps[0]
+        if self.need_rotation:
+            query_cap, nn_cap = self.augment_with_rotation(query_cap, nn_cap)
+        nn_keypoints_y, nn_keypoints_x = np.where(nn_cap.depth_map > 0)
+        nn_keypoints_y = nn_keypoints_y[..., None]
+        nn_keypoints_x = nn_keypoints_x[..., None]
+        nn_keypoints_z = nn_cap.depth_map[np.floor(nn_keypoints_y).astype('int'), np.floor(nn_keypoints_x).astype('int')]
+        nn_keypoints_xy = np.concatenate([nn_keypoints_x, nn_keypoints_y], axis=1)
+        nn_keypoints_3d_world, valid_index_1 = pcd_projector.PointCloudProjector.pcd_2d_to_pcd_3d_np(nn_keypoints_xy, nn_keypoints_z, nn_cap.pinhole_cam.intrinsic_mat, motion=nn_cap.cam_pose.camera_to_world, return_index=True)
+        query_keypoints_xyz, valid_index_2 = pcd_projector.PointCloudProjector.pcd_3d_to_pcd_2d_np(
+            nn_keypoints_3d_world,
+            query_cap.pinhole_cam.intrinsic_mat,
+            query_cap.cam_pose.world_to_camera[0:3, :],
+            query_cap.image.shape[:2],
+            keep_z=True,
+            crop=True,
+            filter_neg=True,
+            norm_coord=False,
+            return_index=True,
+        )
+        query_keypoints_xy = query_keypoints_xyz[:, 0:2]
+        query_keypoints_z_proj = query_keypoints_xyz[:, 2:3]
+        query_keypoints_z = query_cap.depth_map[np.floor(query_keypoints_xy[:, 1:2]).astype('int'), np.floor(query_keypoints_xy[:, 0:1]).astype('int')]
+        mask = (abs(query_keypoints_z - query_keypoints_z_proj) < 0.5)[:, 0]
+        query_keypoints_xy = query_keypoints_xy[mask]
+        if query_keypoints_xy.shape[0] < self.num_kp:
+            return self.__getitem__(random.randint(0, self.__len__() - 1))
+        nn_keypoints_xy = nn_keypoints_xy[valid_index_1][valid_index_2][mask]
+        assert nn_keypoints_xy.shape == query_keypoints_xy.shape
+        corrs = np.concatenate([query_keypoints_xy, nn_keypoints_xy], axis=1)
+        corrs = self._trim_corrs(corrs)
+        # flip augmentation
+        if np.random.uniform() < 0.5:
+            corrs[:, 0] = constants.MAX_SIZE - 1 - corrs[:, 0]
+            corrs[:, 2] = constants.MAX_SIZE - 1 - corrs[:, 2]
+            sbs_img = two_images_side_by_side(np.fliplr(query_cap.image), np.fliplr(nn_cap.image))
+        else:
+            sbs_img = two_images_side_by_side(query_cap.image, nn_cap.image)
+        corrs[:, 2] += constants.MAX_SIZE
+        corrs /= np.array([constants.MAX_SIZE * 2, constants.MAX_SIZE, constants.MAX_SIZE * 2, constants.MAX_SIZE])
+        assert (0.0 <= corrs[:, 0]).all() and (corrs[:, 0] <= 0.5).all()
+        assert (0.0 <= corrs[:, 1]).all() and (corrs[:, 1] <= 1.0).all()
+        assert (0.5 <= corrs[:, 2]).all() and (corrs[:, 2] <= 1.0).all()
+        assert (0.0 <= corrs[:, 3]).all() and (corrs[:, 3] <= 1.0).all()
+        out = {
+            'image': tvtf.normalize(tvtf.to_tensor(sbs_img), (0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
+            'corrs': torch.from_numpy(corrs).float(),
+        }
+        if self.bidirectional:
+            out['queries'] = torch.from_numpy(np.concatenate([corrs[:, :2], corrs[:, 2:]], axis=0)).float()
+            out['targets'] = torch.from_numpy(np.concatenate([corrs[:, 2:], corrs[:, :2]], axis=0)).float()
+        else:
+            out['queries'] = torch.from_numpy(corrs[:, :2]).float()
+            out['targets'] = torch.from_numpy(corrs[:, 2:]).float()
+        return out
+class COTRZoomDataset(COTRDataset):
+    def __init__(self, opt, dataset_type: str):
+        assert opt.crop_cam in ['no_crop', 'crop_center']
+        assert opt.use_ram == False
+        super().__init__(opt, dataset_type)
+        self.zoom_start = opt.zoom_start
+        self.zoom_end = opt.zoom_end
+        self.zoom_levels = opt.zoom_levels
+        self.zoom_jitter = opt.zoom_jitter
+        self.zooms = np.logspace(np.log10(opt.zoom_start),
+                                 np.log10(opt.zoom_end),
+                                 num=opt.zoom_levels)
+    def get_corrs(self, from_cap, to_cap, reduced_size=None):
+        from_y, from_x = np.where(from_cap.depth_map > 0)
+        from_y, from_x = from_y[..., None], from_x[..., None]
+        if reduced_size is not None:
+            filter_idx = np.random.choice(from_y.shape[0], reduced_size, replace=False)
+            from_y, from_x = from_y[filter_idx], from_x[filter_idx]
+        from_z = from_cap.depth_map[np.floor(from_y).astype('int'), np.floor(from_x).astype('int')]
+        from_xy = np.concatenate([from_x, from_y], axis=1)
+        from_3d_world, valid_index_1 = pcd_projector.PointCloudProjector.pcd_2d_to_pcd_3d_np(from_xy, from_z, from_cap.pinhole_cam.intrinsic_mat, motion=from_cap.cam_pose.camera_to_world, return_index=True)
+        to_xyz, valid_index_2 = pcd_projector.PointCloudProjector.pcd_3d_to_pcd_2d_np(
+            from_3d_world,
+            to_cap.pinhole_cam.intrinsic_mat,
+            to_cap.cam_pose.world_to_camera[0:3, :],
+            to_cap.image.shape[:2],
+            keep_z=True,
+            crop=True,
+            filter_neg=True,
+            norm_coord=False,
+            return_index=True,
+        )
+        to_xy = to_xyz[:, 0:2]
+        to_z_proj = to_xyz[:, 2:3]
+        to_z = to_cap.depth_map[np.floor(to_xy[:, 1:2]).astype('int'), np.floor(to_xy[:, 0:1]).astype('int')]
+        mask = (abs(to_z - to_z_proj) < 0.5)[:, 0]
+        if mask.sum() > 0:
+            return np.concatenate([from_xy[valid_index_1][valid_index_2][mask], to_xy[mask]], axis=1)
+        else:
+            return None
+    def get_seed_corr(self, from_cap, to_cap, max_try=100):
+        seed_corr = self.get_corrs(from_cap, to_cap, reduced_size=max_try)
+        if seed_corr is None:
+            return None
+        shuffle = np.random.permutation(seed_corr.shape[0])
+        seed_corr = np.take(seed_corr, shuffle, axis=0)
+        return seed_corr[0]
+    def get_zoomed_cap(self, cap, pos, scale, jitter):
+        patch = inference_helper.get_patch_centered_at(cap.image, pos, scale=scale, return_content=False)
+        patch = inference_helper.get_patch_centered_at(cap.image,
+                                                  pos + np.array([patch.w, patch.h]) * np.random.uniform(-jitter, jitter, 2),
+                                                  scale=scale,
+                                                  return_content=False)
+        zoom_config = CropCamConfig(x=patch.x,
+                                    y=patch.y,
+                                    w=patch.w,
+                                    h=patch.h,
+                                    out_w=constants.MAX_SIZE,
+                                    out_h=constants.MAX_SIZE,
+                                    orig_w=cap.shape[1],
+                                    orig_h=cap.shape[0])
+        zoom_cap = capture.crop_capture(cap, zoom_config)
+        return zoom_cap
+    def __getitem__(self, index):
+        assert self.opt.k_size == 1
+        query_cap, nn_caps = self.sfm_dataset.get_query_with_knn(index)
+        nn_cap = nn_caps[0]
+        if self.need_rotation:
+            query_cap, nn_cap = self.augment_with_rotation(query_cap, nn_cap)
+        # find seed
+        seed_corr = self.get_seed_corr(nn_cap, query_cap)
+        if seed_corr is None:
+            return self.__getitem__(random.randint(0, self.__len__() - 1))
+        # crop cap
+        s = np.random.choice(self.zooms)
+        nn_zoom_cap = self.get_zoomed_cap(nn_cap, seed_corr[:2], s, 0)
+        query_zoom_cap = self.get_zoomed_cap(query_cap, seed_corr[2:], s, self.zoom_jitter)
+        assert nn_zoom_cap.shape == query_zoom_cap.shape == (constants.MAX_SIZE, constants.MAX_SIZE)
+        corrs = self.get_corrs(query_zoom_cap, nn_zoom_cap)
+        if corrs is None or corrs.shape[0] < self.num_kp:
+            return self.__getitem__(random.randint(0, self.__len__() - 1))
+        shuffle = np.random.permutation(corrs.shape[0])
+        corrs = np.take(corrs, shuffle, axis=0)
+        corrs = self._trim_corrs(corrs)
+        # flip augmentation
+        if np.random.uniform() < 0.5:
+            corrs[:, 0] = constants.MAX_SIZE - 1 - corrs[:, 0]
+            corrs[:, 2] = constants.MAX_SIZE - 1 - corrs[:, 2]
+            sbs_img = two_images_side_by_side(np.fliplr(query_zoom_cap.image), np.fliplr(nn_zoom_cap.image))
+        else:
+            sbs_img = two_images_side_by_side(query_zoom_cap.image, nn_zoom_cap.image)
+        corrs[:, 2] += constants.MAX_SIZE
+        corrs /= np.array([constants.MAX_SIZE * 2, constants.MAX_SIZE, constants.MAX_SIZE * 2, constants.MAX_SIZE])
+        assert (0.0 <= corrs[:, 0]).all() and (corrs[:, 0] <= 0.5).all()
+        assert (0.0 <= corrs[:, 1]).all() and (corrs[:, 1] <= 1.0).all()
+        assert (0.5 <= corrs[:, 2]).all() and (corrs[:, 2] <= 1.0).all()
+        assert (0.0 <= corrs[:, 3]).all() and (corrs[:, 3] <= 1.0).all()
+        out = {
+            'image': tvtf.normalize(tvtf.to_tensor(sbs_img), (0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
+            'corrs': torch.from_numpy(corrs).float(),
+        }
+        if self.bidirectional:
+            out['queries'] = torch.from_numpy(np.concatenate([corrs[:, :2], corrs[:, 2:]], axis=0)).float()
+            out['targets'] = torch.from_numpy(np.concatenate([corrs[:, 2:], corrs[:, :2]], axis=0)).float()
+        else:
+            out['queries'] = torch.from_numpy(corrs[:, :2]).float()
+            out['targets'] = torch.from_numpy(corrs[:, 2:]).float()
+        return out

third_party/COTR/COTR/datasets/megadepth_dataset.py ADDED Viewed

	@@ -0,0 +1,140 @@

+'''
+dataset specific layer for megadepth
+'''
+import os
+import json
+import random
+from collections import namedtuple
+import numpy as np
+from COTR.datasets import colmap_helper
+from COTR.global_configs import dataset_config
+from COTR.sfm_scenes import knn_search
+from COTR.utils import debug_utils, utils, constants
+SceneCapIndex = namedtuple('SceneCapIndex', ['scene_index', 'capture_index'])
+def prefix_of_img_path_for_magedepth(img_path):
+    '''
+    get the prefix for image of megadepth dataset
+    '''
+    prefix = os.path.abspath(os.path.join(img_path, '../../../..')) + '/'
+    return prefix
+class MegadepthSceneDataBase():
+    scenes = {}
+    knn_engine_dict = {}
+    @classmethod
+    def _load_scene(cls, opt, scene_dir_dict):
+        if scene_dir_dict['scene_dir'] not in cls.scenes:
+            if opt.info_level == 'rgb':
+                assert 0
+            elif opt.info_level == 'rgbd':
+                scene_dir = scene_dir_dict['scene_dir']
+                images_dir = scene_dir_dict['image_dir']
+                depth_dir = scene_dir_dict['depth_dir']
+                scene = colmap_helper.ColmapWithDepthAsciiReader.read_sfm_scene_given_valid_list_path(scene_dir, images_dir, depth_dir, dataset_config[opt.dataset_name]['valid_list_json'], opt.crop_cam)
+                if opt.use_ram:
+                    scene.read_data_to_ram(['image', 'depth'])
+            else:
+                raise ValueError()
+            knn_engine = knn_search.ReprojRatioKnnSearch(scene)
+            cls.scenes[scene_dir_dict['scene_dir']] = scene
+            cls.knn_engine_dict[scene_dir_dict['scene_dir']] = knn_engine
+        else:
+            pass
+class MegadepthDataset():
+    def __init__(self, opt, dataset_type):
+        assert dataset_type in ['train', 'val', 'test']
+        assert len(opt.scenes_name_list) > 0
+        self.opt = opt
+        self.dataset_type = dataset_type
+        self.use_ram = opt.use_ram
+        self.scenes_name_list = opt.scenes_name_list
+        self.scenes = None
+        self.knn_engine_list = None
+        self.total_caps_set = None
+        self.query_caps_set = None
+        self.db_caps_set = None
+        self.img_path_to_scene_cap_index_dict = {}
+        self.scene_index_to_db_caps_mask_dict = {}
+        self._load_scenes()
+    @property
+    def num_scenes(self):
+        return len(self.scenes)
+    @property
+    def num_queries(self):
+        return len(self.query_caps_set)
+    @property
+    def num_db(self):
+        return len(self.db_caps_set)
+    def get_scene_cap_index_by_index(self, index):
+        assert index < len(self.query_caps_set)
+        img_path = sorted(list(self.query_caps_set))[index]
+        scene_cap_index = self.img_path_to_scene_cap_index_dict[img_path]
+        return scene_cap_index
+    def _get_common_subset_caps_from_json(self, json_path, total_caps):
+        prefix = prefix_of_img_path_for_magedepth(list(total_caps)[0])
+        with open(json_path, 'r') as f:
+            common_caps = [prefix + cap for cap in json.load(f)]
+        common_caps = set(total_caps) & set(common_caps)
+        return common_caps
+    def _extend_img_path_to_scene_cap_index_dict(self, img_path_to_cap_index_dict, scene_id):
+        for key in img_path_to_cap_index_dict.keys():
+            self.img_path_to_scene_cap_index_dict[key] = SceneCapIndex(scene_id, img_path_to_cap_index_dict[key])
+    def _create_scene_index_to_db_caps_mask_dict(self, db_caps_set):
+        scene_index_to_db_caps_mask_dict = {}
+        for cap in db_caps_set:
+            scene_id, cap_id = self.img_path_to_scene_cap_index_dict[cap]
+            if scene_id not in scene_index_to_db_caps_mask_dict:
+                scene_index_to_db_caps_mask_dict[scene_id] = []
+            scene_index_to_db_caps_mask_dict[scene_id].append(cap_id)
+        for _k, _v in scene_index_to_db_caps_mask_dict.items():
+            scene_index_to_db_caps_mask_dict[_k] = np.array(sorted(_v))
+        return scene_index_to_db_caps_mask_dict
+    def _load_scenes(self):
+        scenes = []
+        knn_engine_list = []
+        total_caps_set = set()
+        for scene_id, scene_dir_dict in enumerate(self.scenes_name_list):
+            MegadepthSceneDataBase._load_scene(self.opt, scene_dir_dict)
+            scene = MegadepthSceneDataBase.scenes[scene_dir_dict['scene_dir']]
+            knn_engine = MegadepthSceneDataBase.knn_engine_dict[scene_dir_dict['scene_dir']]
+            total_caps_set = total_caps_set | set(scene.img_path_to_index_dict.keys())
+            self._extend_img_path_to_scene_cap_index_dict(scene.img_path_to_index_dict, scene_id)
+            scenes.append(scene)
+            knn_engine_list.append(knn_engine)
+        self.scenes = scenes
+        self.knn_engine_list = knn_engine_list
+        self.total_caps_set = total_caps_set
+        self.query_caps_set = self._get_common_subset_caps_from_json(dataset_config[self.opt.dataset_name][f'{self.dataset_type}_json'], total_caps_set)
+        self.db_caps_set = self._get_common_subset_caps_from_json(dataset_config[self.opt.dataset_name]['train_json'], total_caps_set)
+        self.scene_index_to_db_caps_mask_dict = self._create_scene_index_to_db_caps_mask_dict(self.db_caps_set)
+    def get_query_with_knn(self, index):
+        scene_index, cap_index = self.get_scene_cap_index_by_index(index)
+        query_cap = self.scenes[scene_index].captures[cap_index]
+        knn_engine = self.knn_engine_list[scene_index]
+        if scene_index in self.scene_index_to_db_caps_mask_dict:
+            db_mask = self.scene_index_to_db_caps_mask_dict[scene_index]
+        else:
+            db_mask = None
+        pool = knn_engine.get_knn(query_cap, self.opt.pool_size, db_mask=db_mask)
+        nn_caps = random.sample(pool, min(len(pool), self.opt.k_size))
+        return query_cap, nn_caps

third_party/COTR/COTR/global_configs/__init__.py ADDED Viewed

	@@ -0,0 +1,10 @@

+import os
+import json
+__location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
+with open(os.path.join(__location__, 'dataset_config.json'), 'r') as f:
+    dataset_config = json.load(f)
+with open(os.path.join(__location__, 'commons.json'), 'r') as f:
+    general_config = json.load(f)
+# assert os.path.isdir(general_config['out']), f'Please create {general_config["out"]}'
+# assert os.path.isdir(general_config['tb_out']), f'Please create {general_config["tb_out"]}'

third_party/COTR/COTR/global_configs/commons.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"out": "../../out", "tb_out": "../../tb_out"}

third_party/COTR/COTR/global_configs/dataset_config.json ADDED Viewed

	@@ -0,0 +1,41 @@

+{
+  "megadepth": {
+    "valid_list_json": "/media/jiangwei/data_ssd/MegaDepth_v1_SfM/megadepth_valid_list.json",
+    "train_json": "/media/jiangwei/data_ssd/MegaDepth_v1_SfM/megadepth_train.json",
+    "val_json": "/media/jiangwei/data_ssd/MegaDepth_v1_SfM/megadepth_val.json",
+    "test_json": "/media/jiangwei/data_ssd/MegaDepth_v1_SfM/megadepth_test.json",
+    "scene_dir": "/media/jiangwei/data_ssd/MegaDepth_v1_SfM/{0}/sparse/manhattan/{1}_rectified/sparse",
+    "image_dir": "/media/jiangwei/data_ssd/MegaDepth_v1/phoenix/S6/zl548/MegaDepth_v1/{0}/dense{1}/imgs",
+    "depth_dir": "/media/jiangwei/data_ssd/MegaDepth_v1/phoenix/S6/zl548/MegaDepth_v1/{0}/dense{1}/depths"
+  },
+  "megadepth_sushi": {
+    "valid_list_json": "/scratch/dataset/megadepth/MegaDepth_v1_SfM/megadepth_valid_list.json",
+    "train_json": "/scratch/programs/COTR/sample_data/megadepth_train.json",
+    "val_json": "/scratch/programs/COTR/sample_data/megadepth_val.json",
+    "test_json": "/scratch/dataset/megadepth/MegaDepth_v1_SfM/megadepth_test.json",
+    "scene_dir": "/scratch/dataset/megadepth/MegaDepth_v1_SfM/{0}/sparse/manhattan/{1}_rectified/sparse",
+    "image_dir": "/scratch/dataset/megadepth/MegaDepth_v1/phoenix/S6/zl548/MegaDepth_v1/{0}/dense{1}/imgs",
+    "depth_dir": "/scratch/dataset/megadepth/MegaDepth_v1/phoenix/S6/zl548/MegaDepth_v1/{0}/dense{1}/depths"
+  },
+  "megadepth_sockeye": {
+    "valid_list_json": "/project/pr-kmyi-1/jiangwei/datasets/megadepth/MegaDepth_v1_SfM/megadepth_valid_list.json",
+    "train_json": "/project/pr-kmyi-1/jiangwei/datasets/megadepth/MegaDepth_v1_SfM/megadepth_train.json",
+    "val_json": "/project/pr-kmyi-1/jiangwei/datasets/megadepth/MegaDepth_v1_SfM/megadepth_val.json",
+    "test_json": "/project/pr-kmyi-1/jiangwei/datasets/megadepth/MegaDepth_v1_SfM/megadepth_test.json",
+    "scene_dir": "/project/pr-kmyi-1/jiangwei/datasets/megadepth/MegaDepth_v1_SfM/{0}/sparse/manhattan/{1}_rectified/sparse",
+    "image_dir": "/project/pr-kmyi-1/jiangwei/datasets/megadepth/MegaDepth_v1/phoenix/S6/zl548/MegaDepth_v1/{0}/dense{1}/imgs",
+    "depth_dir": "/project/pr-kmyi-1/jiangwei/datasets/megadepth/MegaDepth_v1/phoenix/S6/zl548/MegaDepth_v1/{0}/dense{1}/depths"
+  },
+  "megadepth_snubfin": {
+    "valid_list_json": "/ubc/cs/research/kmyi/datasets/megadepth/MegaDepth_v1_SfM/megadepth_valid_list.json",
+    "train_json": "/ubc/cs/research/kmyi/jw221/programs/COTR/sample_data/megadepth_train.json",
+    "val_json": "/ubc/cs/research/kmyi/jw221/programs/COTR/sample_data/megadepth_val.json",
+    "test_json": "/ubc/cs/research/kmyi/datasets/megadepth/MegaDepth_v1_SfM/megadepth_test.json",
+    "scene_dir": "/ubc/cs/research/kmyi/datasets/megadepth/MegaDepth_v1_SfM/{0}/sparse/manhattan/{1}_rectified/sparse",
+    "image_dir": "/ubc/cs/research/kmyi/datasets/megadepth/MegaDepth_v1/phoenix/S6/zl548/MegaDepth_v1/{0}/dense{1}/imgs",
+    "depth_dir": "/ubc/cs/research/kmyi/datasets/megadepth/MegaDepth_v1/phoenix/S6/zl548/MegaDepth_v1/{0}/dense{1}/depths"
+  }
+}

third_party/COTR/COTR/inference/inference_helper.py ADDED Viewed

	@@ -0,0 +1,311 @@

+import warnings
+import cv2
+import numpy as np
+import torch
+from torchvision.transforms import functional as tvtf
+from tqdm import tqdm
+import PIL
+from COTR.utils import utils, debug_utils
+from COTR.utils.constants import MAX_SIZE
+from COTR.cameras.capture import crop_center_max_np, pad_to_square_np
+from COTR.utils.utils import ImagePatch
+THRESHOLD_SPARSE = 0.02
+THRESHOLD_PIXELS_RELATIVE = 0.02
+BASE_ZOOM = 1.0
+THRESHOLD_AREA = 0.02
+LARGE_GPU = True
+def find_prediction_loop(arr):
+    '''
+    loop ends at last element
+    '''
+    assert arr.shape[1] == 2, 'requires shape (N, 2)'
+    start_index = np.where(np.prod(arr[:-1] == arr[-1], axis=1))[0][0]
+    return arr[start_index:-1]
+def two_images_side_by_side(img_a, img_b):
+    assert img_a.shape == img_b.shape, f'{img_a.shape} vs {img_b.shape}'
+    assert img_a.dtype == img_b.dtype
+    h, w, c = img_a.shape
+    canvas = np.zeros((h, 2 * w, c), dtype=img_a.dtype)
+    canvas[:, 0 * w:1 * w, :] = img_a
+    canvas[:, 1 * w:2 * w, :] = img_b
+    return canvas
+def to_square_patches(img):
+    patches = []
+    h, w, _ = img.shape
+    short = size = min(h, w)
+    long = max(h, w)
+    if long == short:
+        patch_0 = ImagePatch(img[:size, :size], 0, 0, size, size, w, h)
+        patches = [patch_0]
+    elif long <= size * 2:
+        warnings.warn('Spatial smoothness in dense optical flow is lost, but sparse matching and triangulation should be fine')
+        patch_0 = ImagePatch(img[:size, :size], 0, 0, size, size, w, h)
+        patch_1 = ImagePatch(img[-size:, -size:], w - size, h - size, size, size, w, h)
+        patches = [patch_0, patch_1]
+        # patches += subdivide_patch(patch_0)
+        # patches += subdivide_patch(patch_1)
+    else:
+        raise NotImplementedError
+    return patches
+def merge_flow_patches(corrs):
+    confidence = np.ones([corrs[0].oh, corrs[0].ow]) * 100
+    flow = np.zeros([corrs[0].oh, corrs[0].ow, 2])
+    cmap = np.ones([corrs[0].oh, corrs[0].ow]) * -1
+    for i, c in enumerate(corrs):
+        temp = np.ones([c.oh, c.ow]) * 100
+        temp[c.y:c.y + c.h, c.x:c.x + c.w] = c.patch[..., 2]
+        tempf = np.zeros([c.oh, c.ow, 2])
+        tempf[c.y:c.y + c.h, c.x:c.x + c.w] = c.patch[..., :2]
+        min_ind = np.stack([temp, confidence], axis=-1).argmin(axis=-1)
+        min_ind = min_ind == 0
+        confidence[min_ind] = temp[min_ind]
+        flow[min_ind] = tempf[min_ind]
+        cmap[min_ind] = i
+    return flow, confidence, cmap
+def get_patch_centered_at(img, pos, scale=1.0, return_content=True, img_shape=None):
+    '''
+    pos - [x, y]
+    '''
+    if img_shape is None:
+        img_shape = img.shape
+    h, w, _ = img_shape
+    short = min(h, w)
+    scale = np.clip(scale, 0.0, 1.0)
+    size = short * scale
+    size = int((size // 2) * 2)
+    lu_y = int(pos[1] - size // 2)
+    lu_x = int(pos[0] - size // 2)
+    if lu_y < 0:
+        lu_y -= lu_y
+    if lu_x < 0:
+        lu_x -= lu_x
+    if lu_y + size > h:
+        lu_y -= (lu_y + size) - (h)
+    if lu_x + size > w:
+        lu_x -= (lu_x + size) - (w)
+    if return_content:
+        return ImagePatch(img[lu_y:lu_y + size, lu_x:lu_x + size], lu_x, lu_y, size, size, w, h)
+    else:
+        return ImagePatch(None, lu_x, lu_y, size, size, w, h)
+def cotr_patch_flow_exhaustive(model, patches_a, patches_b):
+    def one_pass(model, img_a, img_b):
+        device = next(model.parameters()).device
+        assert img_a.shape[0] == img_a.shape[1]
+        assert img_b.shape[0] == img_b.shape[1]
+        img_a = np.array(PIL.Image.fromarray(img_a).resize((MAX_SIZE, MAX_SIZE), resample=PIL.Image.BILINEAR))
+        img_b = np.array(PIL.Image.fromarray(img_b).resize((MAX_SIZE, MAX_SIZE), resample=PIL.Image.BILINEAR))
+        img = two_images_side_by_side(img_a, img_b)
+        img = tvtf.normalize(tvtf.to_tensor(img), (0.485, 0.456, 0.406), (0.229, 0.224, 0.225)).float()[None]
+        img = img.to(device)
+        q_list = []
+        for i in range(MAX_SIZE):
+            queries = []
+            for j in range(MAX_SIZE * 2):
+                queries.append([(j) / (MAX_SIZE * 2), i / MAX_SIZE])
+            queries = np.array(queries)
+            q_list.append(queries)
+        if LARGE_GPU:
+            try:
+                queries = torch.from_numpy(np.concatenate(q_list))[None].float().to(device)
+                out = model.forward(img, queries)['pred_corrs'].detach().cpu().numpy()[0]
+                out_list = out.reshape(MAX_SIZE, MAX_SIZE * 2, -1)
+            except:
+                assert 0, 'set LARGE_GPU to False'
+        else:
+            out_list = []
+            for q in q_list:
+                queries = torch.from_numpy(q)[None].float().to(device)
+                out = model.forward(img, queries)['pred_corrs'].detach().cpu().numpy()[0]
+                out_list.append(out)
+            out_list = np.array(out_list)
+        in_grid = torch.from_numpy(np.array(q_list)).float()[None] * 2 - 1
+        out_grid = torch.from_numpy(out_list).float()[None] * 2 - 1
+        cycle_grid = torch.nn.functional.grid_sample(out_grid.permute(0, 3, 1, 2), out_grid).permute(0, 2, 3, 1)
+        confidence = torch.norm(cycle_grid[0, ...] - in_grid[0, ...], dim=-1)
+        corr = out_grid[0].clone()
+        corr[:, :MAX_SIZE, 0] = corr[:, :MAX_SIZE, 0] * 2 - 1
+        corr[:, MAX_SIZE:, 0] = corr[:, MAX_SIZE:, 0] * 2 + 1
+        corr = torch.cat([corr, confidence[..., None]], dim=-1).numpy()
+        return corr[:, :MAX_SIZE, :], corr[:, MAX_SIZE:, :]
+    corrs_a = []
+    corrs_b = []
+    for p_i in patches_a:
+        for p_j in patches_b:
+            c_i, c_j = one_pass(model, p_i.patch, p_j.patch)
+            base_corners = np.array([[-1, -1], [1, -1], [1, 1], [-1, 1]])
+            real_corners_j = (np.array([[p_j.x, p_j.y], [p_j.x + p_j.w, p_j.y], [p_j.x + p_j.w, p_j.y + p_j.h], [p_j.x, p_j.y + p_j.h]]) / np.array([p_j.ow, p_j.oh])) * 2 + np.array([-1, -1])
+            real_corners_i = (np.array([[p_i.x, p_i.y], [p_i.x + p_i.w, p_i.y], [p_i.x + p_i.w, p_i.y + p_i.h], [p_i.x, p_i.y + p_i.h]]) / np.array([p_i.ow, p_i.oh])) * 2 + np.array([-1, -1])
+            T_i = cv2.getAffineTransform(base_corners[:3].astype(np.float32), real_corners_j[:3].astype(np.float32))
+            T_j = cv2.getAffineTransform(base_corners[:3].astype(np.float32), real_corners_i[:3].astype(np.float32))
+            c_i[..., :2] = c_i[..., :2] @ T_i[:2, :2] + T_i[:, 2]
+            c_j[..., :2] = c_j[..., :2] @ T_j[:2, :2] + T_j[:, 2]
+            c_i = utils.float_image_resize(c_i, (p_i.h, p_i.w))
+            c_j = utils.float_image_resize(c_j, (p_j.h, p_j.w))
+            c_i = ImagePatch(c_i, p_i.x, p_i.y, p_i.w, p_i.h, p_i.ow, p_i.oh)
+            c_j = ImagePatch(c_j, p_j.x, p_j.y, p_j.w, p_j.h, p_j.ow, p_j.oh)
+            corrs_a.append(c_i)
+            corrs_b.append(c_j)
+    return corrs_a, corrs_b
+def cotr_flow(model, img_a, img_b):
+    # assert img_a.shape[0] == img_a.shape[1]
+    # assert img_b.shape[0] == img_b.shape[1]
+    patches_a = to_square_patches(img_a)
+    patches_b = to_square_patches(img_b)
+    corrs_a, corrs_b = cotr_patch_flow_exhaustive(model, patches_a, patches_b)
+    corr_a, con_a, cmap_a = merge_flow_patches(corrs_a)
+    corr_b, con_b, cmap_b = merge_flow_patches(corrs_b)
+    resample_a = utils.torch_img_to_np_img(torch.nn.functional.grid_sample(utils.np_img_to_torch_img(img_b)[None].float(),
+                                                                           torch.from_numpy(corr_a)[None].float())[0])
+    resample_b = utils.torch_img_to_np_img(torch.nn.functional.grid_sample(utils.np_img_to_torch_img(img_a)[None].float(),
+                                                                           torch.from_numpy(corr_b)[None].float())[0])
+    return corr_a, con_a, resample_a, corr_b, con_b, resample_b
+def cotr_corr_base(model, img_a, img_b, queries_a):
+    def one_pass(model, img_a, img_b, queries):
+        device = next(model.parameters()).device
+        assert img_a.shape[0] == img_a.shape[1]
+        assert img_b.shape[0] == img_b.shape[1]
+        img_a = np.array(PIL.Image.fromarray(img_a).resize((MAX_SIZE, MAX_SIZE), resample=PIL.Image.BILINEAR))
+        img_b = np.array(PIL.Image.fromarray(img_b).resize((MAX_SIZE, MAX_SIZE), resample=PIL.Image.BILINEAR))
+        img = two_images_side_by_side(img_a, img_b)
+        img = tvtf.normalize(tvtf.to_tensor(img), (0.485, 0.456, 0.406), (0.229, 0.224, 0.225)).float()[None]
+        img = img.to(device)
+        queries = torch.from_numpy(queries)[None].float().to(device)
+        out = model.forward(img, queries)['pred_corrs'].clone().detach()
+        cycle = model.forward(img, out)['pred_corrs'].clone().detach()
+        queries = queries.cpu().numpy()[0]
+        out = out.cpu().numpy()[0]
+        cycle = cycle.cpu().numpy()[0]
+        conf = np.linalg.norm(queries - cycle, axis=1, keepdims=True)
+        return np.concatenate([out, conf], axis=1)
+    patches_a = to_square_patches(img_a)
+    patches_b = to_square_patches(img_b)
+    pred_list = []
+    for p_i in patches_a:
+        for p_j in patches_b:
+            normalized_queries_a = queries_a.copy()
+            mask = (normalized_queries_a[:, 0] >= p_i.x) & (normalized_queries_a[:, 1] >= p_i.y) & (normalized_queries_a[:, 0] <= p_i.x + p_i.w) & (normalized_queries_a[:, 1] <= p_i.y + p_i.h)
+            normalized_queries_a[:, 0] -= p_i.x
+            normalized_queries_a[:, 1] -= p_i.y
+            normalized_queries_a[:, 0] /= 2 * p_i.w
+            normalized_queries_a[:, 1] /= p_i.h
+            pred = one_pass(model, p_i.patch, p_j.patch, normalized_queries_a)
+            pred[~mask, 2] = np.inf
+            pred[:, 0] -= 0.5
+            pred[:, 0] *= 2 * p_j.w
+            pred[:, 0] += p_j.x
+            pred[:, 1] *= p_j.h
+            pred[:, 1] += p_j.y
+            pred_list.append(pred)
+    pred_list = np.stack(pred_list).transpose(1, 0, 2)
+    out = []
+    for item in pred_list:
+        out.append(item[np.argmin(item[..., 2], axis=0)])
+    out = np.array(out)[..., :2]
+    return np.concatenate([queries_a, out], axis=1)
+try:
+    from vispy import gloo
+    from vispy import app
+    from vispy.util.ptime import time
+    from scipy.spatial import Delaunay
+    from vispy.gloo.wrappers import read_pixels
+    app.use_app('glfw')
+    vertex_shader = """
+        attribute vec4 color;
+        attribute vec2 position;
+        varying vec4 v_color;
+        void main()
+        {
+            gl_Position = vec4(position, 0.0, 1.0);
+            v_color = color;
+        } """
+    fragment_shader = """
+        varying vec4 v_color;
+        void main()
+        {
+            gl_FragColor = v_color;
+        } """
+    class Canvas(app.Canvas):
+        def __init__(self, mesh, color, size):
+            # We hide the canvas upon creation.
+            app.Canvas.__init__(self, show=False, size=size)
+            self._t0 = time()
+            # Texture where we render the scene.
+            self._rendertex = gloo.Texture2D(shape=self.size[::-1] + (4,), internalformat='rgba32f')
+            # FBO.
+            self._fbo = gloo.FrameBuffer(self._rendertex,
+                                        gloo.RenderBuffer(self.size[::-1]))
+            # Regular program that will be rendered to the FBO.
+            self.program = gloo.Program(vertex_shader, fragment_shader)
+            self.program["position"] = mesh
+            self.program['color'] = color
+            # We manually draw the hidden canvas.
+            self.update()
+        def on_draw(self, event):
+            # Render in the FBO.
+            with self._fbo:
+                gloo.clear('black')
+                gloo.set_viewport(0, 0, *self.size)
+                self.program.draw()
+                # Retrieve the contents of the FBO texture.
+                self.im = read_pixels((0, 0, self.size[0], self.size[1]), True, out_type='float')
+            self._time = time() - self._t0
+            # Immediately exit the application.
+            app.quit()
+    def triangulate_corr(corr, from_shape, to_shape):
+        corr = corr.copy()
+        to_shape = to_shape[:2]
+        from_shape = from_shape[:2]
+        corr = corr / np.concatenate([from_shape[::-1], to_shape[::-1]])
+        tri = Delaunay(corr[:, :2])
+        mesh = corr[:, :2][tri.simplices].astype(np.float32) * 2 - 1
+        mesh[..., 1] *= -1
+        color = corr[:, 2:][tri.simplices].astype(np.float32)
+        color = np.concatenate([color, np.ones_like(color[..., 0:2])], axis=-1)
+        c = Canvas(mesh.reshape(-1, 2), color.reshape(-1, 4), size=(from_shape[::-1]))
+        app.run()
+        render = c.im.copy()
+        render = render[..., :2]
+        render *= np.array(to_shape[::-1])
+        return render
+except:
+    print('cannot use vispy, setting triangulate_corr as None')
+    triangulate_corr = None

third_party/COTR/COTR/inference/refinement_task.py ADDED Viewed

	@@ -0,0 +1,191 @@

+import time
+import numpy as np
+import torch
+from torchvision.transforms import functional as tvtf
+import imageio
+import PIL
+from COTR.inference.inference_helper import BASE_ZOOM, THRESHOLD_PIXELS_RELATIVE, get_patch_centered_at, two_images_side_by_side, find_prediction_loop
+from COTR.utils import debug_utils, utils
+from COTR.utils.constants import MAX_SIZE
+from COTR.utils.utils import ImagePatch
+class RefinementTask():
+    def __init__(self, image_from, image_to, loc_from, loc_to, area_from, area_to, converge_iters, zoom_ins, identifier=None):
+        self.identifier = identifier
+        self.image_from = image_from
+        self.image_to = image_to
+        self.loc_from = loc_from
+        self.best_loc_to = loc_to
+        self.cur_loc_to = loc_to
+        self.area_from = area_from
+        self.area_to = area_to
+        if self.area_from < self.area_to:
+            self.s_from = BASE_ZOOM
+            self.s_to = BASE_ZOOM * np.sqrt(self.area_to / self.area_from)
+        else:
+            self.s_to = BASE_ZOOM
+            self.s_from = BASE_ZOOM * np.sqrt(self.area_from / self.area_to)
+        self.cur_job = {}
+        self.status = 'unfinished'
+        self.result = 'unknown'
+        self.converge_iters = converge_iters
+        self.zoom_ins = zoom_ins
+        self.cur_zoom_idx = 0
+        self.cur_iter = 0
+        self.total_iter = 0
+        self.loc_to_at_zoom = []
+        self.loc_history = [loc_to]
+        self.all_loc_to_dict = {}
+        self.job_history = []
+        self.submitted = False
+    @property
+    def cur_zoom(self):
+        return self.zoom_ins[self.cur_zoom_idx]
+    @property
+    def confidence_scaling_factor(self):
+        if self.cur_zoom_idx > 0:
+            conf_scaling = float(self.cur_zoom) / float(self.zoom_ins[0])
+        else:
+            conf_scaling = 1.0
+        return conf_scaling
+    def peek(self):
+        assert self.status == 'unfinished'
+        patch_from = get_patch_centered_at(None, self.loc_from, scale=self.s_from * self.cur_zoom, return_content=False, img_shape=self.image_from.shape)
+        patch_to = get_patch_centered_at(None, self.cur_loc_to, scale=self.s_to * self.cur_zoom, return_content=False, img_shape=self.image_to.shape)
+        top_job = {'patch_from': patch_from,
+                    'patch_to': patch_to,
+                    'loc_from': self.loc_from,
+                    'loc_to': self.cur_loc_to,
+                    }
+        return top_job
+    def get_task_pilot(self, pilot):
+        assert self.status == 'unfinished'
+        patch_from = ImagePatch(None, pilot.cur_job['patch_from'].x, pilot.cur_job['patch_from'].y, pilot.cur_job['patch_from'].w, pilot.cur_job['patch_from'].h, pilot.cur_job['patch_from'].ow, pilot.cur_job['patch_from'].oh)
+        patch_to   = ImagePatch(None, pilot.cur_job['patch_to'].x, pilot.cur_job['patch_to'].y, pilot.cur_job['patch_to'].w, pilot.cur_job['patch_to'].h, pilot.cur_job['patch_to'].ow, pilot.cur_job['patch_to'].oh)
+        query = torch.from_numpy((np.array(self.loc_from) - np.array([patch_from.x, patch_from.y])) / np.array([patch_from.w * 2, patch_from.h]))[None].float()
+        self.cur_job = {'patch_from': patch_from,
+                        'patch_to': patch_to,
+                        'loc_from': self.loc_from,
+                        'loc_to': self.cur_loc_to,
+                        'img': None,
+                        }
+        self.job_history.append((patch_from.h, patch_from.w, patch_to.h, patch_to.w))
+        assert self.submitted == False
+        self.submitted = True
+        return None, query
+    def get_task_fast(self):
+        assert self.status == 'unfinished'
+        patch_from = get_patch_centered_at(self.image_from, self.loc_from, scale=self.s_from * self.cur_zoom, return_content=False)
+        patch_to = get_patch_centered_at(self.image_to, self.cur_loc_to, scale=self.s_to * self.cur_zoom, return_content=False)
+        query = torch.from_numpy((np.array(self.loc_from) - np.array([patch_from.x, patch_from.y])) / np.array([patch_from.w * 2, patch_from.h]))[None].float()
+        self.cur_job = {'patch_from': patch_from,
+                        'patch_to': patch_to,
+                        'loc_from': self.loc_from,
+                        'loc_to': self.cur_loc_to,
+                        'img': None,
+                        }
+        self.job_history.append((patch_from.h, patch_from.w, patch_to.h, patch_to.w))
+        assert self.submitted == False
+        self.submitted = True
+        return None, query
+    def get_task(self):
+        assert self.status == 'unfinished'
+        patch_from = get_patch_centered_at(self.image_from, self.loc_from, scale=self.s_from * self.cur_zoom)
+        patch_to = get_patch_centered_at(self.image_to, self.cur_loc_to, scale=self.s_to * self.cur_zoom)
+        query = torch.from_numpy((np.array(self.loc_from) - np.array([patch_from.x, patch_from.y])) / np.array([patch_from.w * 2, patch_from.h]))[None].float()
+        img_from = patch_from.patch
+        img_to = patch_to.patch
+        assert img_from.shape[0] == img_from.shape[1]
+        assert img_to.shape[0] == img_to.shape[1]
+        img_from = np.array(PIL.Image.fromarray(img_from).resize((MAX_SIZE, MAX_SIZE), resample=PIL.Image.BILINEAR))
+        img_to = np.array(PIL.Image.fromarray(img_to).resize((MAX_SIZE, MAX_SIZE), resample=PIL.Image.BILINEAR))
+        img = two_images_side_by_side(img_from, img_to)
+        img = tvtf.normalize(tvtf.to_tensor(img), (0.485, 0.456, 0.406), (0.229, 0.224, 0.225)).float()
+        self.cur_job = {'patch_from': ImagePatch(None, patch_from.x, patch_from.y, patch_from.w, patch_from.h, patch_from.ow, patch_from.oh),
+                        'patch_to': ImagePatch(None, patch_to.x, patch_to.y, patch_to.w, patch_to.h, patch_to.ow, patch_to.oh),
+                        'loc_from': self.loc_from,
+                        'loc_to': self.cur_loc_to,
+                        }
+        self.job_history.append((patch_from.h, patch_from.w, patch_to.h, patch_to.w))
+        assert self.submitted == False
+        self.submitted = True
+        return img, query
+    def next_zoom(self):
+        if self.cur_zoom_idx >= len(self.zoom_ins) - 1:
+            self.status = 'finished'
+            if self.conclude() is None:
+                self.result = 'bad'
+            else:
+                self.result = 'good'
+        self.cur_zoom_idx += 1
+        self.cur_iter = 0
+        self.loc_to_at_zoom = []
+    def scale_to_loc(self, raw_to_loc):
+        raw_to_loc = raw_to_loc.copy()
+        patch_b = self.cur_job['patch_to']
+        raw_to_loc[0] = (raw_to_loc[0] - 0.5) * 2
+        loc_to = raw_to_loc * np.array([patch_b.w, patch_b.h])
+        loc_to = loc_to + np.array([patch_b.x, patch_b.y])
+        return loc_to
+    def step(self, raw_to_loc):
+        assert self.submitted == True
+        self.submitted = False
+        loc_to = self.scale_to_loc(raw_to_loc)
+        self.total_iter += 1
+        self.loc_to_at_zoom.append(loc_to)
+        self.cur_loc_to = loc_to
+        zoom_finished = False
+        if self.cur_zoom_idx == len(self.zoom_ins) - 1:
+            # converge at the last level
+            if len(self.loc_to_at_zoom) >= 2:
+                zoom_finished = np.prod(self.loc_to_at_zoom[:-1] == loc_to, axis=1, keepdims=True).any()
+            if self.cur_iter >= self.converge_iters - 1:
+                zoom_finished = True
+            self.cur_iter += 1
+        else:
+            # finish immediately for other levels
+            zoom_finished = True
+        if zoom_finished:
+            self.all_loc_to_dict[self.cur_zoom] = np.array(self.loc_to_at_zoom).copy()
+            last_level_loc_to = self.all_loc_to_dict[self.cur_zoom]
+            if len(last_level_loc_to) >= 2:
+                has_loop = np.prod(last_level_loc_to[:-1] == last_level_loc_to[-1], axis=1, keepdims=True).any()
+                if has_loop:
+                    loop = find_prediction_loop(last_level_loc_to)
+                    loc_to = loop.mean(axis=0)
+            self.loc_history.append(loc_to)
+            self.best_loc_to = loc_to
+            self.cur_loc_to = self.best_loc_to
+            self.next_zoom()
+    def conclude(self, force=False):
+        loc_history = np.array(self.loc_history)
+        if (force == False) and (max(loc_history.std(axis=0)) >= THRESHOLD_PIXELS_RELATIVE * max(*self.image_to.shape)):
+            return None
+        return np.concatenate([self.loc_from, self.best_loc_to])
+    def conclude_intermedia(self):
+        return np.concatenate([np.array(self.loc_history), np.array(self.job_history)], axis=1)

third_party/COTR/COTR/inference/sparse_engine.py ADDED Viewed

	@@ -0,0 +1,427 @@

+'''
+Inference engine for sparse image pair correspondences
+'''
+import time
+import random
+import numpy as np
+import torch
+from COTR.inference.inference_helper import THRESHOLD_SPARSE, THRESHOLD_AREA, cotr_flow, cotr_corr_base
+from COTR.inference.refinement_task import RefinementTask
+from COTR.utils import debug_utils, utils
+from COTR.cameras.capture import stretch_to_square_np
+class SparseEngine():
+    def __init__(self, model, batch_size, mode='stretching'):
+        assert mode in ['stretching', 'tile']
+        self.model = model
+        self.batch_size = batch_size
+        self.total_tasks = 0
+        self.mode = mode
+    def form_batch(self, tasks, zoom=None):
+        counter = 0
+        task_ref = []
+        img_batch = []
+        query_batch = []
+        for t in tasks:
+            if t.status == 'unfinished' and t.submitted == False:
+                if zoom is not None and t.cur_zoom != zoom:
+                    continue
+                task_ref.append(t)
+                img, query = t.get_task()
+                img_batch.append(img)
+                query_batch.append(query)
+                counter += 1
+                if counter >= self.batch_size:
+                    break
+        if len(task_ref) == 0:
+            return [], [], []
+        img_batch = torch.stack(img_batch)
+        query_batch = torch.stack(query_batch)
+        return task_ref, img_batch, query_batch
+    def infer_batch(self, img_batch, query_batch):
+        self.total_tasks += img_batch.shape[0]
+        device = next(self.model.parameters()).device
+        img_batch = img_batch.to(device)
+        query_batch = query_batch.to(device)
+        out = self.model(img_batch, query_batch)['pred_corrs'].clone().detach()
+        out = out.cpu().numpy()[:, 0, :]
+        if utils.has_nan(out):
+            raise ValueError('NaN in prediction')
+        return out
+    def conclude_tasks(self, tasks, return_idx=False, force=False,
+                       offset_x_from=0,
+                       offset_y_from=0,
+                       offset_x_to=0,
+                       offset_y_to=0,
+                       img_a_shape=None,
+                       img_b_shape=None):
+        corrs = []
+        idx = []
+        for t in tasks:
+            if t.status == 'finished':
+                out = t.conclude(force)
+                if out is not None:
+                    corrs.append(np.array(out))
+                    idx.append(t.identifier)
+        corrs = np.array(corrs)
+        idx = np.array(idx)
+        if corrs.shape[0] > 0:
+            corrs -= np.array([offset_x_from, offset_y_from, offset_x_to, offset_y_to])
+            if img_a_shape is not None and img_b_shape is not None and not force:
+                border_mask = np.prod(corrs < np.concatenate([img_a_shape[::-1], img_b_shape[::-1]]), axis=1)
+                border_mask = (np.prod(corrs > np.array([0, 0, 0, 0]), axis=1) * border_mask).astype(np.bool)
+                corrs = corrs[border_mask]
+                idx = idx[border_mask]
+        if return_idx:
+            return corrs, idx
+        return corrs
+    def num_finished_tasks(self, tasks):
+        counter = 0
+        for t in tasks:
+            if t.status == 'finished':
+                counter += 1
+        return counter
+    def num_good_tasks(self, tasks):
+        counter = 0
+        for t in tasks:
+            if t.result == 'good':
+                counter += 1
+        return counter
+    def gen_tasks_w_known_scale(self, img_a, img_b, queries_a, areas, zoom_ins=[1.0], converge_iters=1, max_corrs=1000):
+        assert self.mode == 'tile'
+        corr_a = cotr_corr_base(self.model, img_a, img_b, queries_a)
+        tasks = []
+        for c in corr_a:
+            tasks.append(RefinementTask(img_a, img_b, c[:2], c[2:], areas[0], areas[1], converge_iters, zoom_ins))
+        return tasks
+    def gen_tasks(self, img_a, img_b, zoom_ins=[1.0], converge_iters=1, max_corrs=1000, queries_a=None, force=False, areas=None):
+        if areas is not None:
+            assert queries_a is not None
+            assert force == True
+            assert max_corrs >= queries_a.shape[0]
+            return self.gen_tasks_w_known_scale(img_a, img_b, queries_a, areas, zoom_ins=zoom_ins, converge_iters=converge_iters, max_corrs=max_corrs)
+        if self.mode == 'stretching':
+            if img_a.shape[0] != img_a.shape[1] or img_b.shape[0] != img_b.shape[1]:
+                img_a_shape = img_a.shape
+                img_b_shape = img_b.shape
+                img_a_sq = stretch_to_square_np(img_a.copy())
+                img_b_sq = stretch_to_square_np(img_b.copy())
+                corr_a, con_a, resample_a, corr_b, con_b, resample_b = cotr_flow(self.model,
+                                                                                 img_a_sq,
+                                                                                 img_b_sq
+                                                                                 )
+                corr_a = utils.float_image_resize(corr_a, img_a_shape[:2])
+                con_a = utils.float_image_resize(con_a, img_a_shape[:2])
+                resample_a = utils.float_image_resize(resample_a, img_a_shape[:2])
+                corr_b = utils.float_image_resize(corr_b, img_b_shape[:2])
+                con_b = utils.float_image_resize(con_b, img_b_shape[:2])
+                resample_b = utils.float_image_resize(resample_b, img_b_shape[:2])
+            else:
+                corr_a, con_a, resample_a, corr_b, con_b, resample_b = cotr_flow(self.model,
+                                                                                 img_a,
+                                                                                 img_b
+                                                                                 )
+        elif self.mode == 'tile':
+            corr_a, con_a, resample_a, corr_b, con_b, resample_b = cotr_flow(self.model,
+                                                                             img_a,
+                                                                             img_b
+                                                                             )
+        else:
+            raise ValueError(f'unsupported mode: {self.mode}')
+        mask_a = con_a < THRESHOLD_SPARSE
+        mask_b = con_b < THRESHOLD_SPARSE
+        area_a = (con_a < THRESHOLD_AREA).sum() / mask_a.size
+        area_b = (con_b < THRESHOLD_AREA).sum() / mask_b.size
+        tasks = []
+        if queries_a is None:
+            index_a = np.where(mask_a)
+            index_a = np.array(index_a).T
+            index_a = index_a[np.random.choice(len(index_a), min(max_corrs, len(index_a)))]
+            index_b = np.where(mask_b)
+            index_b = np.array(index_b).T
+            index_b = index_b[np.random.choice(len(index_b), min(max_corrs, len(index_b)))]
+            for pos in index_a:
+                loc_from = pos[::-1]
+                loc_to = (corr_a[tuple(np.floor(pos).astype('int'))].copy() * 0.5 + 0.5) * img_b.shape[:2][::-1]
+                tasks.append(RefinementTask(img_a, img_b, loc_from, loc_to, area_a, area_b, converge_iters, zoom_ins))
+            for pos in index_b:
+                '''
+                trick: suppose to fix the query point location(loc_from),
+                but here it fixes the first guess(loc_to).
+                '''
+                loc_from = pos[::-1]
+                loc_to = (corr_b[tuple(np.floor(pos).astype('int'))].copy() * 0.5 + 0.5) * img_a.shape[:2][::-1]
+                tasks.append(RefinementTask(img_a, img_b, loc_to, loc_from, area_a, area_b, converge_iters, zoom_ins))
+        else:
+            if force:
+                for i, loc_from in enumerate(queries_a):
+                    pos = loc_from[::-1]
+                    pos = np.array([np.clip(pos[0], 0, corr_a.shape[0] - 1), np.clip(pos[1], 0, corr_a.shape[1] - 1)], dtype=np.int)
+                    loc_to = (corr_a[tuple(pos)].copy() * 0.5 + 0.5) * img_b.shape[:2][::-1]
+                    tasks.append(RefinementTask(img_a, img_b, loc_from, loc_to, area_a, area_b, converge_iters, zoom_ins, identifier=i))
+            else:
+                for i, loc_from in enumerate(queries_a):
+                    pos = loc_from[::-1]
+                    if (pos > np.array(img_a.shape[:2]) - 1).any() or (pos < 0).any():
+                        continue
+                    if mask_a[tuple(np.floor(pos).astype('int'))]:
+                        loc_to = (corr_a[tuple(np.floor(pos).astype('int'))].copy() * 0.5 + 0.5) * img_b.shape[:2][::-1]
+                        tasks.append(RefinementTask(img_a, img_b, loc_from, loc_to, area_a, area_b, converge_iters, zoom_ins, identifier=i))
+                if len(tasks) < max_corrs:
+                    extra = max_corrs - len(tasks)
+                    counter = 0
+                    for i, loc_from in enumerate(queries_a):
+                        if counter >= extra:
+                            break
+                        pos = loc_from[::-1]
+                        if (pos > np.array(img_a.shape[:2]) - 1).any() or (pos < 0).any():
+                            continue
+                        if mask_a[tuple(np.floor(pos).astype('int'))] == False:
+                            loc_to = (corr_a[tuple(np.floor(pos).astype('int'))].copy() * 0.5 + 0.5) * img_b.shape[:2][::-1]
+                            tasks.append(RefinementTask(img_a, img_b, loc_from, loc_to, area_a, area_b, converge_iters, zoom_ins, identifier=i))
+                            counter += 1
+        return tasks
+    def cotr_corr_multiscale(self, img_a, img_b, zoom_ins=[1.0], converge_iters=1, max_corrs=1000, queries_a=None, return_idx=False, force=False, return_tasks_only=False, areas=None):
+        '''
+        currently only support fixed queries_a
+        '''
+        img_a = img_a.copy()
+        img_b = img_b.copy()
+        img_a_shape = img_a.shape[:2]
+        img_b_shape = img_b.shape[:2]
+        if queries_a is not None:
+            queries_a = queries_a.copy()
+        tasks = self.gen_tasks(img_a, img_b, zoom_ins, converge_iters, max_corrs, queries_a, force, areas)
+        while True:
+            num_g = self.num_good_tasks(tasks)
+            print(f'{num_g} / {max_corrs} | {self.num_finished_tasks(tasks)} / {len(tasks)}')
+            task_ref, img_batch, query_batch = self.form_batch(tasks)
+            if len(task_ref) == 0:
+                break
+            if num_g >= max_corrs:
+                break
+            out = self.infer_batch(img_batch, query_batch)
+            for t, o in zip(task_ref, out):
+                t.step(o)
+        if return_tasks_only:
+            return tasks
+        if return_idx:
+            corrs, idx = self.conclude_tasks(tasks, return_idx=True, force=force,
+                                             img_a_shape=img_a_shape,
+                                             img_b_shape=img_b_shape,)
+            corrs = corrs[:max_corrs]
+            idx = idx[:max_corrs]
+            return corrs, idx
+        else:
+            corrs = self.conclude_tasks(tasks, force=force,
+                                        img_a_shape=img_a_shape,
+                                        img_b_shape=img_b_shape,)
+            corrs = corrs[:max_corrs]
+            return corrs
+    def cotr_corr_multiscale_with_cycle_consistency(self, img_a, img_b, zoom_ins=[1.0], converge_iters=1, max_corrs=1000, queries_a=None, return_idx=False, return_cycle_error=False):
+        EXTRACTION_RATE = 0.3
+        temp_max_corrs = int(max_corrs / EXTRACTION_RATE)
+        if queries_a is not None:
+            temp_max_corrs = min(temp_max_corrs, queries_a.shape[0])
+            queries_a = queries_a.copy()
+        corr_f, idx_f = self.cotr_corr_multiscale(img_a.copy(), img_b.copy(),
+                                                  zoom_ins=zoom_ins,
+                                                  converge_iters=converge_iters,
+                                                  max_corrs=temp_max_corrs,
+                                                  queries_a=queries_a,
+                                                  return_idx=True)
+        assert corr_f.shape[0] > 0
+        corr_b, idx_b = self.cotr_corr_multiscale(img_b.copy(), img_a.copy(),
+                                                  zoom_ins=zoom_ins,
+                                                  converge_iters=converge_iters,
+                                                  max_corrs=corr_f.shape[0],
+                                                  queries_a=corr_f[:, 2:].copy(),
+                                                  return_idx=True)
+        assert corr_b.shape[0] > 0
+        cycle_errors = np.linalg.norm(corr_f[idx_b][:, :2] - corr_b[:, 2:], axis=1)
+        order = np.argsort(cycle_errors)
+        out = [corr_f[idx_b][order][:max_corrs]]
+        if return_idx:
+            out.append(idx_f[idx_b][order][:max_corrs])
+        if return_cycle_error:
+            out.append(cycle_errors[order][:max_corrs])
+        if len(out) == 1:
+            out = out[0]
+        return out
+class FasterSparseEngine(SparseEngine):
+    '''
+    search and merge nearby tasks to accelerate inference speed.
+    It will make spatial accuracy slightly worse.
+    '''
+    def __init__(self, model, batch_size, mode='stretching', max_load=256):
+        super().__init__(model, batch_size, mode=mode)
+        self.max_load = max_load
+    def infer_batch_grouped(self, img_batch, query_batch):
+        device = next(self.model.parameters()).device
+        img_batch = img_batch.to(device)
+        query_batch = query_batch.to(device)
+        out = self.model(img_batch, query_batch)['pred_corrs'].clone().detach().cpu().numpy()
+        return out
+    def get_tasks_map(self, zoom, tasks):
+        maps = []
+        ids = []
+        for i, t in enumerate(tasks):
+            if t.status == 'unfinished' and t.submitted == False and t.cur_zoom == zoom:
+                t_info = t.peek()
+                point = np.concatenate([t_info['loc_from'], t_info['loc_to']])
+                maps.append(point)
+                ids.append(i)
+        return np.array(maps), np.array(ids)
+    def form_squad(self, zoom, pilot, pilot_id, tasks, tasks_map, task_ids, bookkeeping):
+        assert pilot.status == 'unfinished' and pilot.submitted == False and pilot.cur_zoom == zoom
+        SAFE_AREA = 0.5
+        pilot_info = pilot.peek()
+        pilot_from_center_x = pilot_info['patch_from'].x + pilot_info['patch_from'].w/2
+        pilot_from_center_y = pilot_info['patch_from'].y + pilot_info['patch_from'].h/2
+        pilot_from_left  = pilot_from_center_x - pilot_info['patch_from'].w/2 * SAFE_AREA
+        pilot_from_right = pilot_from_center_x + pilot_info['patch_from'].w/2 * SAFE_AREA
+        pilot_from_upper = pilot_from_center_y - pilot_info['patch_from'].h/2 * SAFE_AREA
+        pilot_from_lower = pilot_from_center_y + pilot_info['patch_from'].h/2 * SAFE_AREA
+        pilot_to_center_x = pilot_info['patch_to'].x + pilot_info['patch_to'].w/2
+        pilot_to_center_y = pilot_info['patch_to'].y + pilot_info['patch_to'].h/2
+        pilot_to_left  = pilot_to_center_x - pilot_info['patch_to'].w/2 * SAFE_AREA
+        pilot_to_right = pilot_to_center_x + pilot_info['patch_to'].w/2 * SAFE_AREA
+        pilot_to_upper = pilot_to_center_y - pilot_info['patch_to'].h/2 * SAFE_AREA
+        pilot_to_lower = pilot_to_center_y + pilot_info['patch_to'].h/2 * SAFE_AREA
+        img, query = pilot.get_task()
+        assert pilot.submitted == True
+        members = [pilot]
+        queries = [query]
+        bookkeeping[pilot_id] = False
+        loads = np.where(((tasks_map[:, 0] > pilot_from_left) &
+                          (tasks_map[:, 0] < pilot_from_right) &
+                          (tasks_map[:, 1] > pilot_from_upper) &
+                          (tasks_map[:, 1] < pilot_from_lower) &
+                          (tasks_map[:, 2] > pilot_to_left) &
+                          (tasks_map[:, 2] < pilot_to_right) &
+                          (tasks_map[:, 3] > pilot_to_upper) &
+                          (tasks_map[:, 3] < pilot_to_lower)) *
+                         bookkeeping)[0][: self.max_load]
+        for ti in task_ids[loads]:
+            t = tasks[ti]
+            assert t.status == 'unfinished' and t.submitted == False and t.cur_zoom == zoom
+            _, query = t.get_task_pilot(pilot)
+            members.append(t)
+            queries.append(query)
+        queries = torch.stack(queries, axis=1)
+        bookkeeping[loads] = False
+        return members, img, queries, bookkeeping
+    def form_grouped_batch(self, zoom, tasks):
+        counter = 0
+        task_ref = []
+        img_batch = []
+        query_batch = []
+        tasks_map, task_ids = self.get_tasks_map(zoom, tasks)
+        shuffle = np.random.permutation(tasks_map.shape[0])
+        tasks_map = np.take(tasks_map, shuffle, axis=0)
+        task_ids = np.take(task_ids, shuffle, axis=0)
+        bookkeeping = np.ones_like(task_ids).astype(bool)
+        for i, ti in enumerate(task_ids):
+            t = tasks[ti]
+            if t.status == 'unfinished' and t.submitted == False and t.cur_zoom == zoom:
+                members, img, queries, bookkeeping = self.form_squad(zoom, t, i, tasks, tasks_map, task_ids, bookkeeping)
+                task_ref.append(members)
+                img_batch.append(img)
+                query_batch.append(queries)
+                counter += 1
+                if counter >= self.batch_size:
+                    break
+        if len(task_ref) == 0:
+            return [], [], []
+        max_len = max([q.shape[1] for q in query_batch])
+        for i in range(len(query_batch)):
+            q = query_batch[i]
+            query_batch[i] = torch.cat([q, torch.zeros([1, max_len - q.shape[1], 2])], axis=1)
+        img_batch = torch.stack(img_batch)
+        query_batch = torch.cat(query_batch)
+        return task_ref, img_batch, query_batch
+    def cotr_corr_multiscale(self, img_a, img_b, zoom_ins=[1.0], converge_iters=1, max_corrs=1000, queries_a=None, return_idx=False, force=False, return_tasks_only=False, areas=None):
+        '''
+        currently only support fixed queries_a
+        '''
+        img_a = img_a.copy()
+        img_b = img_b.copy()
+        img_a_shape = img_a.shape[:2]
+        img_b_shape = img_b.shape[:2]
+        if queries_a is not None:
+            queries_a = queries_a.copy()
+        tasks = self.gen_tasks(img_a, img_b, zoom_ins, converge_iters, max_corrs, queries_a, force, areas)
+        for zm in zoom_ins:
+            print(f'======= Zoom: {zm} ======')
+            while True:
+                num_g = self.num_good_tasks(tasks)
+                task_ref, img_batch, query_batch = self.form_grouped_batch(zm, tasks)
+                if len(task_ref) == 0:
+                    break
+                if num_g >= max_corrs:
+                    break
+                out = self.infer_batch_grouped(img_batch, query_batch)
+                num_steps = 0
+                for i, temp in enumerate(task_ref):
+                    for j, t in enumerate(temp):
+                        t.step(out[i, j])
+                        num_steps += 1
+                print(f'solved {num_steps} sub-tasks in one invocation with {img_batch.shape[0]} image pairs')
+                if num_steps <= self.batch_size:
+                    break
+        # Rollback to default inference, because of too few valid tasks can be grouped together.
+        while True:
+            num_g = self.num_good_tasks(tasks)
+            print(f'{num_g} / {max_corrs} | {self.num_finished_tasks(tasks)} / {len(tasks)}')
+            task_ref, img_batch, query_batch = self.form_batch(tasks, zm)
+            if len(task_ref) == 0:
+                break
+            if num_g >= max_corrs:
+                break
+            out = self.infer_batch(img_batch, query_batch)
+            for t, o in zip(task_ref, out):
+                t.step(o)
+        if return_tasks_only:
+            return tasks
+        if return_idx:
+            corrs, idx = self.conclude_tasks(tasks, return_idx=True, force=force,
+                                             img_a_shape=img_a_shape,
+                                             img_b_shape=img_b_shape,)
+            corrs = corrs[:max_corrs]
+            idx = idx[:max_corrs]
+            return corrs, idx
+        else:
+            corrs = self.conclude_tasks(tasks, force=force,
+                                        img_a_shape=img_a_shape,
+                                        img_b_shape=img_b_shape,)
+            corrs = corrs[:max_corrs]
+            return corrs

third_party/COTR/COTR/models/__init__.py ADDED Viewed

	@@ -0,0 +1,10 @@

+'''
+The COTR model is modified from DETR code base.
+https://github.com/facebookresearch/detr
+'''
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+from .cotr_model import build
+def build_model(args):
+    return build(args)

third_party/COTR/COTR/models/backbone.py ADDED Viewed

	@@ -0,0 +1,135 @@

+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+"""
+Backbone modules.
+"""
+from collections import OrderedDict
+import torch
+import torch.nn.functional as F
+import torchvision
+from torch import nn
+from torchvision.models._utils import IntermediateLayerGetter
+from typing import Dict, List
+from .misc import NestedTensor
+from .position_encoding import build_position_encoding
+from COTR.utils import debug_utils, constants
+class FrozenBatchNorm2d(torch.nn.Module):
+    """
+    BatchNorm2d where the batch statistics and the affine parameters are fixed.
+    Copy-paste from torchvision.misc.ops with added eps before rqsrt,
+    without which any other models than torchvision.models.resnet[18,34,50,101]
+    produce nans.
+    """
+    def __init__(self, n):
+        super(FrozenBatchNorm2d, self).__init__()
+        self.register_buffer("weight", torch.ones(n))
+        self.register_buffer("bias", torch.zeros(n))
+        self.register_buffer("running_mean", torch.zeros(n))
+        self.register_buffer("running_var", torch.ones(n))
+    def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
+                              missing_keys, unexpected_keys, error_msgs):
+        num_batches_tracked_key = prefix + 'num_batches_tracked'
+        if num_batches_tracked_key in state_dict:
+            del state_dict[num_batches_tracked_key]
+        super(FrozenBatchNorm2d, self)._load_from_state_dict(
+            state_dict, prefix, local_metadata, strict,
+            missing_keys, unexpected_keys, error_msgs)
+    def forward(self, x):
+        # move reshapes to the beginning
+        # to make it fuser-friendly
+        w = self.weight.reshape(1, -1, 1, 1)
+        b = self.bias.reshape(1, -1, 1, 1)
+        rv = self.running_var.reshape(1, -1, 1, 1)
+        rm = self.running_mean.reshape(1, -1, 1, 1)
+        eps = 1e-5
+        scale = w * (rv + eps).rsqrt()
+        bias = b - rm * scale
+        return x * scale + bias
+class BackboneBase(nn.Module):
+    def __init__(self, backbone: nn.Module, train_backbone: bool, num_channels: int, return_interm_layers: bool, layer='layer3'):
+        super().__init__()
+        for name, parameter in backbone.named_parameters():
+            if not train_backbone or 'layer2' not in name and 'layer3' not in name and 'layer4' not in name:
+                parameter.requires_grad_(False)
+                # print(f'freeze {name}')
+        if return_interm_layers:
+            return_layers = {"layer1": "0", "layer2": "1", "layer3": "2", "layer4": "3"}
+        else:
+            return_layers = {layer: "0"}
+        self.body = IntermediateLayerGetter(backbone, return_layers=return_layers)
+        self.num_channels = num_channels
+    def forward_raw(self, x):
+        y = self.body(x)
+        assert len(y.keys()) == 1
+        return y['0']
+    def forward(self, tensor_list: NestedTensor):
+        assert tensor_list.tensors.shape[-2:] == (constants.MAX_SIZE, constants.MAX_SIZE * 2)
+        left = self.body(tensor_list.tensors[..., 0:constants.MAX_SIZE])
+        right = self.body(tensor_list.tensors[..., constants.MAX_SIZE:2 * constants.MAX_SIZE])
+        xs = {}
+        for k in left.keys():
+            xs[k] = torch.cat([left[k], right[k]], dim=-1)
+        out: Dict[str, NestedTensor] = {}
+        for name, x in xs.items():
+            m = tensor_list.mask
+            assert m is not None
+            mask = F.interpolate(m[None].float(), size=x.shape[-2:]).to(torch.bool)[0]
+            out[name] = NestedTensor(x, mask)
+        return out
+class Backbone(BackboneBase):
+    """ResNet backbone with frozen BatchNorm."""
+    def __init__(self, name: str,
+                 train_backbone: bool,
+                 return_interm_layers: bool,
+                 dilation: bool,
+                 layer='layer3',
+                 num_channels=1024):
+        backbone = getattr(torchvision.models, name)(
+            replace_stride_with_dilation=[False, False, dilation],
+            pretrained=True, norm_layer=FrozenBatchNorm2d)
+        super().__init__(backbone, train_backbone, num_channels, return_interm_layers, layer)
+class Joiner(nn.Sequential):
+    def __init__(self, backbone, position_embedding):
+        super().__init__(backbone, position_embedding)
+    def forward(self, tensor_list: NestedTensor):
+        xs = self[0](tensor_list)
+        out: List[NestedTensor] = []
+        pos = []
+        for name, x in xs.items():
+            out.append(x)
+            # position encoding
+            pos.append(self[1](x).to(x.tensors.dtype))
+        return out, pos
+def build_backbone(args):
+    position_embedding = build_position_encoding(args)
+    if hasattr(args, 'lr_backbone'):
+        train_backbone = args.lr_backbone > 0
+    else:
+        train_backbone = False
+    backbone = Backbone(args.backbone, train_backbone, False, args.dilation, layer=args.layer, num_channels=args.dim_feedforward)
+    model = Joiner(backbone, position_embedding)
+    model.num_channels = backbone.num_channels
+    return model

third_party/COTR/COTR/models/cotr_model.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import math
+import numpy as np
+import torch
+import torch.nn.functional as F
+from torch import nn
+from COTR.utils import debug_utils, constants, utils
+from .misc import (NestedTensor, nested_tensor_from_tensor_list)
+from .backbone import build_backbone
+from .transformer import build_transformer
+from .position_encoding import NerfPositionalEncoding, MLP
+class COTR(nn.Module):
+    def __init__(self, backbone, transformer, sine_type='lin_sine'):
+        super().__init__()
+        self.transformer = transformer
+        hidden_dim = transformer.d_model
+        self.corr_embed = MLP(hidden_dim, hidden_dim, 2, 3)
+        self.query_proj = NerfPositionalEncoding(hidden_dim // 4, sine_type)
+        self.input_proj = nn.Conv2d(backbone.num_channels, hidden_dim, kernel_size=1)
+        self.backbone = backbone
+    def forward(self, samples: NestedTensor, queries):
+        if isinstance(samples, (list, torch.Tensor)):
+            samples = nested_tensor_from_tensor_list(samples)
+        features, pos = self.backbone(samples)
+        src, mask = features[-1].decompose()
+        assert mask is not None
+        _b, _q, _ = queries.shape
+        queries = queries.reshape(-1, 2)
+        queries = self.query_proj(queries).reshape(_b, _q, -1)
+        queries = queries.permute(1, 0, 2)
+        hs = self.transformer(self.input_proj(src), mask, queries, pos[-1])[0]
+        outputs_corr = self.corr_embed(hs)
+        out = {'pred_corrs': outputs_corr[-1]}
+        return out
+def build(args):
+    backbone = build_backbone(args)
+    transformer = build_transformer(args)
+    model = COTR(
+        backbone,
+        transformer,
+        sine_type=args.position_embedding,
+    )
+    return model

third_party/COTR/COTR/models/misc.py ADDED Viewed

	@@ -0,0 +1,112 @@

+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+"""
+Misc functions, including distributed helpers.
+Mostly copy-paste from torchvision references.
+"""
+import os
+import subprocess
+import time
+from collections import defaultdict, deque
+import datetime
+import pickle
+from typing import Optional, List
+import torch
+import torch.distributed as dist
+from torch import Tensor
+# needed due to empty tensor bug in pytorch and torchvision 0.5
+import torchvision
+if float(torchvision.__version__.split('.')[1]) < 7:
+    from torchvision.ops import _new_empty_tensor
+    from torchvision.ops.misc import _output_size
+def _max_by_axis(the_list):
+    # type: (List[List[int]]) -> List[int]
+    maxes = the_list[0]
+    for sublist in the_list[1:]:
+        for index, item in enumerate(sublist):
+            maxes[index] = max(maxes[index], item)
+    return maxes
+class NestedTensor(object):
+    def __init__(self, tensors, mask: Optional[Tensor]):
+        self.tensors = tensors
+        self.mask = mask
+    def to(self, device):
+        # type: (Device) -> NestedTensor # noqa
+        cast_tensor = self.tensors.to(device)
+        mask = self.mask
+        if mask is not None:
+            assert mask is not None
+            cast_mask = mask.to(device)
+        else:
+            cast_mask = None
+        return NestedTensor(cast_tensor, cast_mask)
+    def decompose(self):
+        return self.tensors, self.mask
+    def __repr__(self):
+        return str(self.tensors)
+def nested_tensor_from_tensor_list(tensor_list: List[Tensor]):
+    # TODO make this more general
+    if tensor_list[0].ndim == 3:
+        if torchvision._is_tracing():
+            # nested_tensor_from_tensor_list() does not export well to ONNX
+            # call _onnx_nested_tensor_from_tensor_list() instead
+            return _onnx_nested_tensor_from_tensor_list(tensor_list)
+        # TODO make it support different-sized images
+        max_size = _max_by_axis([list(img.shape) for img in tensor_list])
+        # min_size = tuple(min(s) for s in zip(*[img.shape for img in tensor_list]))
+        batch_shape = [len(tensor_list)] + max_size
+        b, c, h, w = batch_shape
+        dtype = tensor_list[0].dtype
+        device = tensor_list[0].device
+        tensor = torch.zeros(batch_shape, dtype=dtype, device=device)
+        mask = torch.ones((b, h, w), dtype=torch.bool, device=device)
+        for img, pad_img, m in zip(tensor_list, tensor, mask):
+            pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
+            m[: img.shape[1], :img.shape[2]] = False
+    else:
+        raise ValueError('not supported')
+    return NestedTensor(tensor, mask)
+# _onnx_nested_tensor_from_tensor_list() is an implementation of
+# nested_tensor_from_tensor_list() that is supported by ONNX tracing.
+@torch.jit.unused
+def _onnx_nested_tensor_from_tensor_list(tensor_list: List[Tensor]) -> NestedTensor:
+    max_size = []
+    for i in range(tensor_list[0].dim()):
+        max_size_i = torch.max(torch.stack([img.shape[i] for img in tensor_list]).to(torch.float32)).to(torch.int64)
+        max_size.append(max_size_i)
+    max_size = tuple(max_size)
+    # work around for
+    # pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
+    # m[: img.shape[1], :img.shape[2]] = False
+    # which is not yet supported in onnx
+    padded_imgs = []
+    padded_masks = []
+    for img in tensor_list:
+        padding = [(s1 - s2) for s1, s2 in zip(max_size, tuple(img.shape))]
+        padded_img = torch.nn.functional.pad(img, (0, padding[2], 0, padding[1], 0, padding[0]))
+        padded_imgs.append(padded_img)
+        m = torch.zeros_like(img[0], dtype=torch.int, device=img.device)
+        padded_mask = torch.nn.functional.pad(m, (0, padding[2], 0, padding[1]), "constant", 1)
+        padded_masks.append(padded_mask.to(torch.bool))
+    tensor = torch.stack(padded_imgs)
+    mask = torch.stack(padded_masks)
+    return NestedTensor(tensor, mask=mask)

third_party/COTR/COTR/models/position_encoding.py ADDED Viewed

	@@ -0,0 +1,83 @@

+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+"""
+Various positional encodings for the transformer.
+"""
+import math
+import torch
+from torch import nn
+import torch.nn.functional as F
+from .misc import NestedTensor
+from COTR.utils import debug_utils
+class MLP(nn.Module):
+    """ Very simple multi-layer perceptron (also called FFN)"""
+    def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
+        super().__init__()
+        self.num_layers = num_layers
+        h = [hidden_dim] * (num_layers - 1)
+        self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim]))
+    def forward(self, x):
+        for i, layer in enumerate(self.layers):
+            x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x)
+        return x
+class NerfPositionalEncoding(nn.Module):
+    def __init__(self, depth=10, sine_type='lin_sine'):
+        '''
+        out_dim = in_dim * depth * 2
+        '''
+        super().__init__()
+        if sine_type == 'lin_sine':
+            self.bases = [i+1 for i in range(depth)]
+        elif sine_type == 'exp_sine':
+            self.bases = [2**i for i in range(depth)]
+        print(f'using {sine_type} as positional encoding')
+    @torch.no_grad()
+    def forward(self, inputs):
+        out = torch.cat([torch.sin(i * math.pi * inputs) for i in self.bases] + [torch.cos(i * math.pi * inputs) for i in self.bases], axis=-1)
+        assert torch.isnan(out).any() == False
+        return out
+class PositionEmbeddingSine(nn.Module):
+    """
+    This is a more standard version of the position embedding, very similar to the one
+    used by the Attention is all you need paper, generalized to work on images.
+    """
+    def __init__(self, num_pos_feats=64, temperature=10000, normalize=False, scale=None, sine_type='lin_sine'):
+        super().__init__()
+        self.num_pos_feats = num_pos_feats
+        self.temperature = temperature
+        self.normalize = normalize
+        self.sine = NerfPositionalEncoding(num_pos_feats//2, sine_type)
+    @torch.no_grad()
+    def forward(self, tensor_list: NestedTensor):
+        x = tensor_list.tensors
+        mask = tensor_list.mask
+        assert mask is not None
+        not_mask = ~mask
+        y_embed = not_mask.cumsum(1, dtype=torch.float32)
+        x_embed = not_mask.cumsum(2, dtype=torch.float32)
+        eps = 1e-6
+        y_embed = (y_embed-0.5) / (y_embed[:, -1:, :] + eps)
+        x_embed = (x_embed-0.5) / (x_embed[:, :, -1:] + eps)
+        pos = torch.stack([x_embed, y_embed], dim=-1)
+        return self.sine(pos).permute(0, 3, 1, 2)
+def build_position_encoding(args):
+    N_steps = args.hidden_dim // 2
+    if args.position_embedding in ('lin_sine', 'exp_sine'):
+        # TODO find a better way of exposing other arguments
+        position_embedding = PositionEmbeddingSine(N_steps, normalize=True, sine_type=args.position_embedding)
+    else:
+        raise ValueError(f"not supported {args.position_embedding}")
+    return position_embedding

third_party/COTR/COTR/models/transformer.py ADDED Viewed

	@@ -0,0 +1,228 @@

+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+"""
+COTR/DETR Transformer class.
+Copy-paste from torch.nn.Transformer with modifications:
+    * positional encodings are passed in MHattention
+    * extra LN at the end of encoder is removed
+    * decoder returns a stack of activations from all decoding layers
+"""
+import copy
+from typing import Optional, List
+import torch
+import torch.nn.functional as F
+from torch import nn, Tensor
+from COTR.utils import debug_utils
+class Transformer(nn.Module):
+    def __init__(self, d_model=512, nhead=8, num_encoder_layers=6,
+                 num_decoder_layers=6, dim_feedforward=2048, dropout=0.1,
+                 activation="relu", return_intermediate_dec=False):
+        super().__init__()
+        encoder_layer = TransformerEncoderLayer(d_model, nhead, dim_feedforward,
+                                                dropout, activation)
+        self.encoder = TransformerEncoder(encoder_layer, num_encoder_layers)
+        decoder_layer = TransformerDecoderLayer(d_model, nhead, dim_feedforward,
+                                                dropout, activation)
+        decoder_norm = nn.LayerNorm(d_model)
+        self.decoder = TransformerDecoder(decoder_layer, num_decoder_layers, decoder_norm,
+                                          return_intermediate=return_intermediate_dec)
+        self._reset_parameters()
+        self.d_model = d_model
+        self.nhead = nhead
+    def _reset_parameters(self):
+        for p in self.parameters():
+            if p.dim() > 1:
+                nn.init.xavier_uniform_(p)
+    def forward(self, src, mask, query_embed, pos_embed):
+        # flatten NxCxHxW to HWxNxC
+        bs, c, h, w = src.shape
+        src = src.flatten(2).permute(2, 0, 1)
+        pos_embed = pos_embed.flatten(2).permute(2, 0, 1)
+        mask = mask.flatten(1)
+        tgt = torch.zeros_like(query_embed)
+        memory = self.encoder(src, src_key_padding_mask=mask, pos=pos_embed)
+        hs = self.decoder(tgt, memory, memory_key_padding_mask=mask,
+                          pos=pos_embed, query_pos=query_embed)
+        return hs.transpose(1, 2), memory.permute(1, 2, 0).view(bs, c, h, w)
+class TransformerEncoder(nn.Module):
+    def __init__(self, encoder_layer, num_layers):
+        super().__init__()
+        self.layers = _get_clones(encoder_layer, num_layers)
+        self.num_layers = num_layers
+    def forward(self, src,
+                mask: Optional[Tensor] = None,
+                src_key_padding_mask: Optional[Tensor] = None,
+                pos: Optional[Tensor] = None):
+        output = src
+        for layer in self.layers:
+            output = layer(output, src_mask=mask,
+                           src_key_padding_mask=src_key_padding_mask, pos=pos)
+        return output
+class TransformerDecoder(nn.Module):
+    def __init__(self, decoder_layer, num_layers, norm=None, return_intermediate=False):
+        super().__init__()
+        self.layers = _get_clones(decoder_layer, num_layers)
+        self.num_layers = num_layers
+        self.norm = norm
+        self.return_intermediate = return_intermediate
+    def forward(self, tgt, memory,
+                tgt_mask: Optional[Tensor] = None,
+                memory_mask: Optional[Tensor] = None,
+                tgt_key_padding_mask: Optional[Tensor] = None,
+                memory_key_padding_mask: Optional[Tensor] = None,
+                pos: Optional[Tensor] = None,
+                query_pos: Optional[Tensor] = None):
+        output = tgt
+        intermediate = []
+        for layer in self.layers:
+            output = layer(output, memory, tgt_mask=tgt_mask,
+                           memory_mask=memory_mask,
+                           tgt_key_padding_mask=tgt_key_padding_mask,
+                           memory_key_padding_mask=memory_key_padding_mask,
+                           pos=pos, query_pos=query_pos)
+            if self.return_intermediate:
+                intermediate.append(self.norm(output))
+        if self.norm is not None:
+            output = self.norm(output)
+            if self.return_intermediate:
+                intermediate.pop()
+                intermediate.append(output)
+        if self.return_intermediate:
+            return torch.stack(intermediate)
+        return output.unsqueeze(0)
+class TransformerEncoderLayer(nn.Module):
+    def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1,
+                 activation="relu"):
+        super().__init__()
+        self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
+        # Implementation of Feedforward model
+        self.linear1 = nn.Linear(d_model, dim_feedforward)
+        self.dropout = nn.Dropout(dropout)
+        self.linear2 = nn.Linear(dim_feedforward, d_model)
+        self.norm1 = nn.LayerNorm(d_model)
+        self.norm2 = nn.LayerNorm(d_model)
+        self.dropout1 = nn.Dropout(dropout)
+        self.dropout2 = nn.Dropout(dropout)
+        self.activation = _get_activation_fn(activation)
+    def with_pos_embed(self, tensor, pos: Optional[Tensor]):
+        return tensor if pos is None else tensor + pos
+    def forward(self,
+                src,
+                src_mask: Optional[Tensor] = None,
+                src_key_padding_mask: Optional[Tensor] = None,
+                pos: Optional[Tensor] = None):
+        q = k = self.with_pos_embed(src, pos)
+        src2 = self.self_attn(query=q,
+                              key=k,
+                              value=src,
+                              attn_mask=src_mask,
+                              key_padding_mask=src_key_padding_mask)[0]
+        src = src + self.dropout1(src2)
+        src = self.norm1(src)
+        src2 = self.linear2(self.dropout(self.activation(self.linear1(src))))
+        src = src + self.dropout2(src2)
+        src = self.norm2(src)
+        return src
+class TransformerDecoderLayer(nn.Module):
+    def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1,
+                 activation="relu"):
+        super().__init__()
+        self.multihead_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
+        # Implementation of Feedforward model
+        self.linear1 = nn.Linear(d_model, dim_feedforward)
+        self.dropout = nn.Dropout(dropout)
+        self.linear2 = nn.Linear(dim_feedforward, d_model)
+        self.norm1 = nn.LayerNorm(d_model)
+        self.norm2 = nn.LayerNorm(d_model)
+        self.norm3 = nn.LayerNorm(d_model)
+        self.dropout1 = nn.Dropout(dropout)
+        self.dropout2 = nn.Dropout(dropout)
+        self.dropout3 = nn.Dropout(dropout)
+        self.activation = _get_activation_fn(activation)
+    def with_pos_embed(self, tensor, pos: Optional[Tensor]):
+        return tensor if pos is None else tensor + pos
+    def forward(self, tgt, memory,
+                tgt_mask: Optional[Tensor] = None,
+                memory_mask: Optional[Tensor] = None,
+                tgt_key_padding_mask: Optional[Tensor] = None,
+                memory_key_padding_mask: Optional[Tensor] = None,
+                pos: Optional[Tensor] = None,
+                query_pos: Optional[Tensor] = None):
+        tgt2 = self.multihead_attn(query=self.with_pos_embed(tgt, query_pos),
+                                   key=self.with_pos_embed(memory, pos),
+                                   value=memory, attn_mask=memory_mask,
+                                   key_padding_mask=memory_key_padding_mask)[0]
+        tgt = tgt + self.dropout2(tgt2)
+        tgt = self.norm2(tgt)
+        tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt))))
+        tgt = tgt + self.dropout3(tgt2)
+        tgt = self.norm3(tgt)
+        return tgt
+def _get_clones(module, N):
+    return nn.ModuleList([copy.deepcopy(module) for i in range(N)])
+def build_transformer(args):
+    return Transformer(
+        d_model=args.hidden_dim,
+        dropout=args.dropout,
+        nhead=args.nheads,
+        dim_feedforward=args.dim_feedforward,
+        num_encoder_layers=args.enc_layers,
+        num_decoder_layers=args.dec_layers,
+        return_intermediate_dec=True,
+    )
+def _get_activation_fn(activation):
+    """Return an activation function given a string"""
+    if activation == "relu":
+        return F.relu
+    if activation == "gelu":
+        return F.gelu
+    if activation == "glu":
+        return F.glu
+    raise RuntimeError(F"activation should be relu/gelu, not {activation}.")

third_party/COTR/COTR/options/options.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import sys
+import argparse
+import json
+import os
+from COTR.options.options_utils import str2bool
+from COTR.options import options_utils
+from COTR.global_configs import general_config, dataset_config
+from COTR.utils import debug_utils
+def set_general_arguments(parser):
+    general_arg = parser.add_argument_group('General')
+    general_arg.add_argument('--confirm', type=str2bool,
+                             default=True, help='promote confirmation for user')
+    general_arg.add_argument('--use_cuda', type=str2bool,
+                             default=True, help='use cuda')
+    general_arg.add_argument('--use_cc', type=str2bool,
+                             default=False, help='use computecanada')
+def set_dataset_arguments(parser):
+    data_arg = parser.add_argument_group('Data')
+    data_arg.add_argument('--dataset_name', type=str, default='megadepth', help='dataset name')
+    data_arg.add_argument('--shuffle_data', type=str2bool, default=True, help='use sequence dataset or shuffled dataset')
+    data_arg.add_argument('--use_ram', type=str2bool, default=False, help='load image/depth/pcd to ram')
+    data_arg.add_argument('--info_level', choices=['rgb', 'rgbd'], type=str, default='rgbd', help='the information level of dataset')
+    data_arg.add_argument('--scene_file', type=str, default=None, required=False, help='what scene/seq want to use')
+    data_arg.add_argument('--workers', type=int, default=0, help='worker for loading data')
+    data_arg.add_argument('--crop_cam', choices=['no_crop', 'crop_center', 'crop_center_and_resize'], type=str, default='crop_center_and_resize', help='crop the center of image to avoid changing aspect ratio, resize to make the operations batch-able.')
+def set_nn_arguments(parser):
+    nn_arg = parser.add_argument_group('Nearest neighbors')
+    nn_arg.add_argument('--nn_method', choices=['netvlad', 'overlapping'], type=str, default='overlapping', help='how to select nearest neighbors')
+    nn_arg.add_argument('--pool_size', type=int, default=20, help='a pool of sorted nn candidates')
+    nn_arg.add_argument('--k_size', type=int, default=1, help='select the nn randomly from pool')
+def set_COTR_arguments(parser):
+    cotr_arg = parser.add_argument_group('COTR model')
+    cotr_arg.add_argument('--backbone', type=str, default='resnet50')
+    cotr_arg.add_argument('--hidden_dim', type=int, default=256)
+    cotr_arg.add_argument('--dilation', type=str2bool, default=False)
+    cotr_arg.add_argument('--dropout', type=float, default=0.1)
+    cotr_arg.add_argument('--nheads', type=int, default=8)
+    cotr_arg.add_argument('--layer', type=str, default='layer3', help='which layer from resnet')
+    cotr_arg.add_argument('--enc_layers', type=int, default=6)
+    cotr_arg.add_argument('--dec_layers', type=int, default=6)
+    cotr_arg.add_argument('--position_embedding', type=str, default='lin_sine', help='sine wave type')

third_party/COTR/COTR/options/options_utils.py ADDED Viewed

	@@ -0,0 +1,108 @@

+'''utils for argparse
+'''
+import sys
+import os
+from os import path
+import time
+import json
+from COTR.utils import utils, debug_utils
+from COTR.global_configs import general_config, dataset_config
+def str2bool(v: str) -> bool:
+    return v.lower() in ('true', '1', 'yes', 'y', 't')
+def get_compact_naming_cotr(opt) -> str:
+    base_str = 'model:cotr_{0}_{1}_{2}_dset:{3}_bs:{4}_pe:{5}_lrbackbone:{6}'
+    result = base_str.format(opt.backbone,
+                             opt.layer,
+                             opt.dim_feedforward,
+                             opt.dataset_name,
+                             opt.batch_size,
+                             opt.position_embedding,
+                             opt.lr_backbone,
+                             )
+    if opt.suffix:
+        result = result + '_suffix:{0}'.format(opt.suffix)
+    return result
+def print_opt(opt):
+    content_list = []
+    args = list(vars(opt))
+    args.sort()
+    for arg in args:
+        content_list += [arg.rjust(25, ' ') + '  ' + str(getattr(opt, arg))]
+    utils.print_notification(content_list, 'OPTIONS')
+def confirm_opt(opt):
+    print_opt(opt)
+    if opt.use_cc == False:
+        if not utils.confirm():
+            exit(1)
+def opt_to_string(opt) -> str:
+    string = '\n\n'
+    string += 'python ' + ' '.join(sys.argv)
+    string += '\n\n'
+    # string += '---------------------- CONFIG ----------------------\n'
+    args = list(vars(opt))
+    args.sort()
+    for arg in args:
+        string += arg.rjust(25, ' ') + '  ' + str(getattr(opt, arg)) + '\n\n'
+    # string += '----------------------------------------------------\n'
+    return string
+def save_opt(opt):
+    '''save options to a json file
+    '''
+    if not os.path.exists(opt.out):
+        os.makedirs(opt.out)
+    json_path = os.path.join(opt.out, 'params.json')
+    if 'debug' not in opt.suffix and path.isfile(json_path):
+        assert opt.resume, 'You are trying to modify a model without resuming: {0}'.format(opt.out)
+        old_dict = json.load(open(json_path))
+        new_dict = vars(opt)
+        # assert old_dict.keys() == new_dict.keys(), 'New configuration keys is different from old one.\nold: {0}\nnew: {1}'.format(old_dict.keys(), new_dict.keys())
+        if new_dict != old_dict:
+            exception_keys = ['command']
+            for key in set(old_dict.keys()).union(set(new_dict.keys())):
+                if key not in exception_keys:
+                    old_val = old_dict[key] if key in old_dict else 'not exists(old)'
+                    new_val = new_dict[key] if key in old_dict else 'not exists(new)'
+                    if old_val != new_val:
+                        print('key: {0}, old_val: {1}, new_val: {2}'.format(key, old_val, new_val))
+            if opt.use_cc == False:
+                if not utils.confirm('Please manually confirm'):
+                    exit(1)
+    with open(json_path, 'w') as fp:
+        json.dump(vars(opt), fp, indent=0, sort_keys=True)
+def build_scenes_name_list_from_opt(opt):
+    if hasattr(opt, 'scene_file') and opt.scene_file is not None:
+        assert os.path.isfile(opt.scene_file), opt.scene_file
+        with open(opt.scene_file, 'r') as f:
+                scenes_list = json.load(f)
+    else:
+        scenes_list = [{'scene': opt.scene, 'seq': opt.seq}]
+    if 'megadepth' in opt.dataset_name:
+        assert opt.info_level in ['rgb', 'rgbd']
+        scenes_name_list = []
+        if opt.info_level == 'rgb':
+            dir_list = ['scene_dir', 'image_dir']
+        elif opt.info_level == 'rgbd':
+            dir_list = ['scene_dir', 'image_dir', 'depth_dir']
+        dir_list = {dir_name: dataset_config[opt.dataset_name][dir_name] for dir_name in dir_list}
+        for item in scenes_list:
+            cur_scene = {key: val.format(item['scene'], item['seq']) for key, val in dir_list.items()}
+            scenes_name_list.append(cur_scene)
+    else:
+        raise NotImplementedError()
+    return scenes_name_list

third_party/COTR/COTR/projector/pcd_projector.py ADDED Viewed

	@@ -0,0 +1,210 @@

+'''
+a point cloud projector based on np
+'''
+import numpy as np
+from COTR.utils import debug_utils, utils
+def render_point_cloud_at_capture(point_cloud, capture, render_type='rgb', return_pcd=False):
+    assert render_type in ['rgb', 'bw', 'depth']
+    if render_type == 'rgb':
+        assert point_cloud.shape[1] == 6
+    else:
+        point_cloud = point_cloud[:, :3]
+        assert point_cloud.shape[1] == 3
+    if render_type in ['bw', 'rgb']:
+        keep_z = False
+    else:
+        keep_z = True
+    pcd_2d = PointCloudProjector.pcd_3d_to_pcd_2d_np(point_cloud,
+                                                     capture.intrinsic_mat,
+                                                     capture.extrinsic_mat,
+                                                     capture.size,
+                                                     keep_z=True,
+                                                     crop=True,
+                                                     filter_neg=True,
+                                                     norm_coord=False,
+                                                     return_index=False)
+    reproj = PointCloudProjector.pcd_2d_to_img_2d_np(pcd_2d,
+                                                     capture.size,
+                                                     has_z=True,
+                                                     keep_z=keep_z)
+    if return_pcd:
+        return reproj, pcd_2d
+    else:
+        return reproj
+def optical_flow_from_a_to_b(cap_a, cap_b):
+    cap_a_intrinsic = cap_a.pinhole_cam.intrinsic_mat
+    cap_a_img_size = cap_a.pinhole_cam.shape[:2]
+    _h, _w = cap_b.pinhole_cam.shape[:2]
+    x, y = np.meshgrid(
+        np.linspace(0, _w - 1, num=_w),
+        np.linspace(0, _h - 1, num=_h),
+    )
+    coord_map = np.concatenate([np.expand_dims(x, 2), np.expand_dims(y, 2)], axis=2)
+    pcd_from_cap_b = cap_b.get_point_cloud_world_from_depth(coord_map)
+    # pcd_from_cap_b = cap_b.point_cloud_world_w_feat(['pos', 'coord'])
+    optical_flow = PointCloudProjector.pcd_2d_to_img_2d_np(PointCloudProjector.pcd_3d_to_pcd_2d_np(pcd_from_cap_b, cap_a_intrinsic, cap_a.cam_pose.world_to_camera[0:3, :], cap_a_img_size, keep_z=True, crop=True, filter_neg=True, norm_coord=False), cap_a_img_size, has_z=True, keep_z=False)
+    return optical_flow
+class PointCloudProjector():
+    def __init__(self):
+        pass
+    @staticmethod
+    def pcd_2d_to_pcd_3d_np(pcd, depth, intrinsic, motion=None, return_index=False):
+        assert isinstance(pcd, np.ndarray), 'cannot process data type: {0}'.format(type(pcd))
+        assert isinstance(intrinsic, np.ndarray), 'cannot process data type: {0}'.format(type(intrinsic))
+        assert len(pcd.shape) == 2 and pcd.shape[1] >= 2
+        assert len(depth.shape) == 2 and depth.shape[1] == 1
+        assert intrinsic.shape == (3, 3)
+        if motion is not None:
+            assert isinstance(motion, np.ndarray), 'cannot process data type: {0}'.format(type(motion))
+            assert motion.shape == (4, 4)
+        # exec(debug_utils.embed_breakpoint())
+        x, y, z = pcd[:, 0], pcd[:, 1], depth[:, 0]
+        append_ones = np.ones_like(x)
+        xyz = np.stack([x, y, append_ones], axis=1)  # shape: [num_points, 3]
+        inv_intrinsic_mat = np.linalg.inv(intrinsic)
+        xyz = np.matmul(inv_intrinsic_mat, xyz.T).T * z[..., None]
+        valid_mask_1 = np.where(xyz[:, 2] > 0)
+        xyz = xyz[valid_mask_1]
+        if motion is not None:
+            append_ones = np.ones_like(xyz[:, 0:1])
+            xyzw = np.concatenate([xyz, append_ones], axis=1)
+            xyzw = np.matmul(motion, xyzw.T).T
+            valid_mask_2 = np.where(xyzw[:, 3] != 0)
+            xyzw = xyzw[valid_mask_2]
+            xyzw /= xyzw[:, 3:4]
+            xyz = xyzw[:, 0:3]
+        if pcd.shape[1] > 2:
+            features = pcd[:, 2:]
+            try:
+                features = features[valid_mask_1][valid_mask_2]
+            except UnboundLocalError:
+                features = features[valid_mask_1]
+            assert xyz.shape[0] == features.shape[0]
+            xyz = np.concatenate([xyz, features], axis=1)
+        if return_index:
+            points_index = np.arange(pcd.shape[0])[valid_mask_1][valid_mask_2]
+            return xyz, points_index
+        return xyz
+    @staticmethod
+    def img_2d_to_pcd_3d_np(depth, intrinsic, img=None, motion=None):
+        '''
+        the function signature is not fully correct, because img is an optional
+        if motion is None, the output pcd is in camera space
+        if motion is camera_to_world, the out pcd is in world space.
+        here the output is pure np array
+        '''
+        assert isinstance(depth, np.ndarray), 'cannot process data type: {0}'.format(type(depth))
+        assert isinstance(intrinsic, np.ndarray), 'cannot process data type: {0}'.format(type(intrinsic))
+        assert len(depth.shape) == 2
+        assert intrinsic.shape == (3, 3)
+        if img is not None:
+            assert isinstance(img, np.ndarray), 'cannot process data type: {0}'.format(type(img))
+            assert len(img.shape) == 3
+            assert img.shape[:2] == depth.shape[:2], 'feature should have the same resolution as the depth'
+        if motion is not None:
+            assert isinstance(motion, np.ndarray), 'cannot process data type: {0}'.format(type(motion))
+            assert motion.shape == (4, 4)
+        pcd_image_space = PointCloudProjector.img_2d_to_pcd_2d_np(depth[..., None], norm_coord=False)
+        valid_mask_1 = np.where(pcd_image_space[:, 2] > 0)
+        pcd_image_space = pcd_image_space[valid_mask_1]
+        xy = pcd_image_space[:, :2]
+        z = pcd_image_space[:, 2:3]
+        if img is not None:
+            _c = img.shape[-1]
+            feat = img.reshape(-1, _c)
+            feat = feat[valid_mask_1]
+            xy = np.concatenate([xy, feat], axis=1)
+        pcd_3d = PointCloudProjector.pcd_2d_to_pcd_3d_np(xy, z, intrinsic, motion=motion)
+        return pcd_3d
+    @staticmethod
+    def pcd_3d_to_pcd_2d_np(pcd, intrinsic, extrinsic, size, keep_z: bool, crop: bool = True, filter_neg: bool = True, norm_coord: bool = True, return_index: bool = False):
+        assert isinstance(pcd, np.ndarray), 'cannot process data type: {0}'.format(type(pcd))
+        assert isinstance(intrinsic, np.ndarray), 'cannot process data type: {0}'.format(type(intrinsic))
+        assert isinstance(extrinsic, np.ndarray), 'cannot process data type: {0}'.format(type(extrinsic))
+        assert len(pcd.shape) == 2 and pcd.shape[1] >= 3, 'seems the input pcd is not a valid 3d point cloud: {0}'.format(pcd.shape)
+        xyzw = np.concatenate([pcd[:, 0:3], np.ones_like(pcd[:, 0:1])], axis=1)
+        mvp_mat = np.matmul(intrinsic, extrinsic)
+        camera_points = np.matmul(mvp_mat, xyzw.T).T
+        if filter_neg:
+            valid_mask_1 = camera_points[:, 2] > 0.0
+        else:
+            valid_mask_1 = np.ones_like(camera_points[:, 2], dtype=bool)
+        camera_points = camera_points[valid_mask_1]
+        image_points = camera_points / camera_points[:, 2:3]
+        image_points = image_points[:, :2]
+        if crop:
+            valid_mask_2 = (image_points[:, 0] >= 0) * (image_points[:, 0] < size[1] - 1) * (image_points[:, 1] >= 0) * (image_points[:, 1] < size[0] - 1)
+        else:
+            valid_mask_2 = np.ones_like(image_points[:, 0], dtype=bool)
+        if norm_coord:
+            image_points = ((image_points / size[::-1]) * 2) - 1
+        if keep_z:
+            image_points = np.concatenate([image_points[valid_mask_2], camera_points[valid_mask_2][:, 2:3], pcd[valid_mask_1][:, 3:][valid_mask_2]], axis=1)
+        else:
+            image_points = np.concatenate([image_points[valid_mask_2], pcd[valid_mask_1][:, 3:][valid_mask_2]], axis=1)
+        # if filter_neg and crop:
+        #     exec(debug_utils.embed_breakpoint('pcd_3d_to_pcd_2d_np'))
+        if return_index:
+            points_index = np.arange(pcd.shape[0])[valid_mask_1][valid_mask_2]
+            return image_points, points_index
+        return image_points
+    @staticmethod
+    def pcd_2d_to_img_2d_np(pcd, size, has_z=False, keep_z=False):
+        assert len(pcd.shape) == 2 and pcd.shape[-1] >= 2, 'seems the input pcd is not a valid point cloud: {0}'.format(pcd.shape)
+        # assert 0, 'pass Z values in'
+        if has_z:
+            pcd = pcd[pcd[:, 2].argsort()[::-1]]
+            if not keep_z:
+                pcd = np.delete(pcd, [2], axis=1)
+        index_list = np.round(pcd[:, 0:2]).astype(np.int32)
+        index_list[:, 0] = np.clip(index_list[:, 0], 0, size[1] - 1)
+        index_list[:, 1] = np.clip(index_list[:, 1], 0, size[0] - 1)
+        _h, _w, _c = *size, pcd.shape[-1] - 2
+        if _c == 0:
+            canvas = np.zeros((_h, _w, 1))
+            canvas[index_list[:, 1], index_list[:, 0]] = 1.0
+        else:
+            canvas = np.zeros((_h, _w, _c))
+            canvas[index_list[:, 1], index_list[:, 0]] = pcd[:, 2:]
+        return canvas
+    @staticmethod
+    def img_2d_to_pcd_2d_np(img, norm_coord=True):
+        assert isinstance(img, np.ndarray), 'cannot process data type: {0}'.format(type(img))
+        assert len(img.shape) == 3
+        _h, _w, _c = img.shape
+        if norm_coord:
+            x, y = np.meshgrid(
+                np.linspace(-1, 1, num=_w),
+                np.linspace(-1, 1, num=_h),
+            )
+        else:
+            x, y = np.meshgrid(
+                np.linspace(0, _w - 1, num=_w),
+                np.linspace(0, _h - 1, num=_h),
+            )
+        x, y = x.reshape(-1, 1), y.reshape(-1, 1)
+        feat = img.reshape(-1, _c)
+        pcd_2d = np.concatenate([x, y, feat], axis=1)
+        return pcd_2d

third_party/COTR/COTR/sfm_scenes/knn_search.py ADDED Viewed

	@@ -0,0 +1,56 @@

+'''
+Given one capture in a scene, search for its KNN captures
+'''
+import os
+import numpy as np
+from COTR.utils import debug_utils
+from COTR.utils.constants import VALID_NN_OVERLAPPING_THRESH
+class ReprojRatioKnnSearch():
+    def __init__(self, scene):
+        self.scene = scene
+        self.distance_mat = None
+        self.nn_index = None
+        self._read_dist_mat()
+        self._build_nn_index()
+    def _read_dist_mat(self):
+        dist_mat_path = os.path.join(os.path.dirname(os.path.dirname(self.scene.captures[0].depth_path)), 'dist_mat/dist_mat.npy')
+        self.distance_mat = np.load(dist_mat_path)
+    def _build_nn_index(self):
+        # argsort is in ascending order, so we take negative
+        self.nn_index = (-1 * self.distance_mat).argsort(axis=1)
+    def get_knn(self, query, k, db_mask=None):
+        query_index = self.scene.img_path_to_index_dict[query.img_path]
+        if db_mask is not None:
+            query_mask = np.setdiff1d(np.arange(self.distance_mat[query_index].shape[0]), db_mask)
+        num_pos = (self.distance_mat[query_index] > VALID_NN_OVERLAPPING_THRESH).sum() if db_mask is None else (self.distance_mat[query_index][db_mask] > VALID_NN_OVERLAPPING_THRESH).sum()
+        # we have enough valid NN or not
+        if num_pos > k:
+            if db_mask is None:
+                ind = self.nn_index[query_index][:k + 1]
+            else:
+                temp_dist = self.distance_mat[query_index].copy()
+                temp_dist[query_mask] = -1
+                ind = (-1 * temp_dist).argsort(axis=0)[:k + 1]
+            # remove self
+            if query_index in ind:
+                ind = np.delete(ind, np.argwhere(ind == query_index))
+            else:
+                ind = ind[:k]
+            assert ind.shape[0] <= k, ind.shape[0] > 0
+        else:
+            k = num_pos
+            if db_mask is None:
+                ind = self.nn_index[query_index][:max(k, 1)]
+            else:
+                temp_dist = self.distance_mat[query_index].copy()
+                temp_dist[query_mask] = -1
+                ind = (-1 * temp_dist).argsort(axis=0)[:max(k, 1)]
+        return self.scene.get_captures_given_index_list(ind)

third_party/COTR/COTR/sfm_scenes/sfm_scenes.py ADDED Viewed

	@@ -0,0 +1,87 @@

+'''
+Scene reconstructed from SFM, mainly colmap
+'''
+import os
+import copy
+import math
+import numpy as np
+from numpy.linalg import inv
+from tqdm import tqdm
+from COTR.transformations import transformations
+from COTR.transformations.transform_basics import Translation, Rotation
+from COTR.cameras.camera_pose import CameraPose
+from COTR.utils import debug_utils
+class SfmScene():
+    def __init__(self, captures, point_cloud=None):
+        self.captures = captures
+        if isinstance(point_cloud, tuple):
+            self.point_cloud = point_cloud[0]
+            self.point_meta = point_cloud[1]
+        else:
+            self.point_cloud = point_cloud
+        self.img_path_to_index_dict = {}
+        self.img_id_to_index_dict = {}
+        self.fname_to_index_dict = {}
+        self._build_img_X_to_index_dict()
+    def __str__(self):
+        string = 'Scene contains {0} captures'.format(len(self.captures))
+        return string
+    def __getitem__(self, x):
+        if isinstance(x, str):
+            try:
+                return self.captures[self.img_path_to_index_dict[x]]
+            except:
+                return self.captures[self.fname_to_index_dict[x]]
+        else:
+            return self.captures[x]
+    def _build_img_X_to_index_dict(self):
+        assert self.captures is not None, 'There is no captures'
+        for i, cap in enumerate(self.captures):
+            assert cap.img_path not in self.img_path_to_index_dict, 'Image already exists'
+            self.img_path_to_index_dict[cap.img_path] = i
+            assert os.path.basename(cap.img_path) not in self.fname_to_index_dict, 'Image already exists'
+            self.fname_to_index_dict[os.path.basename(cap.img_path)] = i
+            if hasattr(cap, 'image_id'):
+                self.img_id_to_index_dict[cap.image_id] = i
+    def get_captures_given_index_list(self, index_list):
+        captures_list = []
+        for i in index_list:
+            captures_list.append(self.captures[i])
+        return captures_list
+    def get_covisible_caps(self, cap):
+        assert cap.img_path in self.img_path_to_index_dict
+        covis_img_id = set()
+        point_ids = cap.point3d_id
+        for i in point_ids:
+            covis_img_id = covis_img_id.union(set(self.point_meta[i].image_ids))
+        covis_caps = []
+        for i in covis_img_id:
+            if i in self.img_id_to_index_dict:
+                covis_caps.append(self.captures[self.img_id_to_index_dict[i]])
+            else:
+                pass
+        return covis_caps
+    def read_data_to_ram(self, data_list):
+        print('warning: you are going to use a lot of RAM.')
+        sum_bytes = 0.0
+        pbar = tqdm(self.captures, desc='reading data, memory usage {0:.2f} MB'.format(sum_bytes / (1024.0 * 1024.0)))
+        for cap in pbar:
+            if 'image' in data_list:
+                sum_bytes += cap.read_image_to_ram()
+            if 'depth' in data_list:
+                sum_bytes += cap.read_depth_to_ram()
+            if 'pcd' in data_list:
+                sum_bytes += cap.read_pcd_to_ram()
+            pbar.set_description('reading data, memory usage {0:.2f} MB'.format(sum_bytes / (1024.0 * 1024.0)))
+        print('----- total memory usage for images: {0} MB-----'.format(sum_bytes / (1024.0 * 1024.0)))

third_party/COTR/COTR/trainers/base_trainer.py ADDED Viewed

	@@ -0,0 +1,111 @@

+import os
+import math
+import abc
+import time
+import tqdm
+import torch.nn as nn
+import tensorboardX
+from COTR.trainers import tensorboard_helper
+from COTR.utils import utils
+from COTR.options import options_utils
+class BaseTrainer(abc.ABC):
+    '''base trainer class.
+    contains methods for training, validation, and writing output.
+    '''
+    def __init__(self, opt, model, optimizer, criterion,
+                 train_loader, val_loader):
+        self.opt = opt
+        self.use_cuda = opt.use_cuda
+        self.model = model
+        self.optim = optimizer
+        self.criterion = criterion
+        self.train_loader = train_loader
+        self.val_loader = val_loader
+        self.out = opt.out
+        if not os.path.exists(opt.out):
+            os.makedirs(opt.out)
+        self.epoch = 0
+        self.iteration = 0
+        self.max_iter = opt.max_iter
+        self.valid_iter = opt.valid_iter
+        self.tb_pusher = tensorboard_helper.TensorboardPusher(opt)
+        self.push_opt_to_tb()
+        self.need_resume = opt.resume
+        if self.need_resume:
+            self.resume()
+        if self.opt.load_weights:
+            self.load_pretrained_weights()
+    def push_opt_to_tb(self):
+        opt_str = options_utils.opt_to_string(self.opt)
+        tb_datapack = tensorboard_helper.TensorboardDatapack()
+        tb_datapack.set_training(False)
+        tb_datapack.set_iteration(self.iteration)
+        tb_datapack.add_text({'options': opt_str})
+        self.tb_pusher.push_to_tensorboard(tb_datapack)
+    @abc.abstractmethod
+    def validate_batch(self, data_pack):
+        pass
+    @abc.abstractmethod
+    def validate(self):
+        pass
+    @abc.abstractmethod
+    def train_batch(self, data_pack):
+        '''train for one batch of data
+        '''
+        pass
+    def train_epoch(self):
+        '''train for one epoch
+        one epoch is iterating the whole training dataset once
+        '''
+        self.model.train()
+        for batch_idx, data_pack in tqdm.tqdm(enumerate(self.train_loader),
+                                              initial=self.iteration % len(
+                                                  self.train_loader),
+                                              total=len(self.train_loader),
+                                              desc='Train epoch={0}'.format(
+                                                  self.epoch),
+                                              ncols=80,
+                                              leave=True,
+                                              ):
+            # iteration = batch_idx + self.epoch * len(self.train_loader)
+            # if self.iteration != 0 and (iteration - 1) != self.iteration:
+            #     continue  # for resuming
+            # self.iteration = iteration
+            # self.iteration += 1
+            if self.iteration % self.valid_iter == 0:
+                time.sleep(2)  # Prevent possible deadlock during epoch transition
+                self.validate()
+            self.train_batch(data_pack)
+            if self.iteration >= self.max_iter:
+                break
+            self.iteration += 1
+    def train(self):
+        '''entrance of the whole training process
+        '''
+        max_epoch = int(math.ceil(1. * self.max_iter / len(self.train_loader)))
+        for epoch in tqdm.trange(self.epoch,
+                                 max_epoch,
+                                 desc='Train',
+                                 ncols=80):
+            self.epoch = epoch
+            time.sleep(2)  # Prevent possible deadlock during epoch transition
+            self.train_epoch()
+            if self.iteration >= self.max_iter:
+                break
+    @abc.abstractmethod
+    def resume(self):
+        pass

third_party/COTR/COTR/trainers/cotr_trainer.py ADDED Viewed

	@@ -0,0 +1,200 @@

+import os
+import math
+import os.path as osp
+import time
+import tqdm
+import torch
+import numpy as np
+import torchvision.utils as vutils
+from PIL import Image, ImageDraw
+from COTR.utils import utils, debug_utils, constants
+from COTR.trainers import base_trainer, tensorboard_helper
+from COTR.projector import pcd_projector
+class COTRTrainer(base_trainer.BaseTrainer):
+    def __init__(self, opt, model, optimizer, criterion,
+                 train_loader, val_loader):
+        super().__init__(opt, model, optimizer, criterion,
+                         train_loader, val_loader)
+    def validate_batch(self, data_pack):
+        assert self.model.training is False
+        with torch.no_grad():
+            img = data_pack['image'].cuda()
+            query = data_pack['queries'].cuda()
+            target = data_pack['targets'].cuda()
+            self.optim.zero_grad()
+            pred = self.model(img, query)['pred_corrs']
+            loss = torch.nn.functional.mse_loss(pred, target)
+            if self.opt.cycle_consis and self.opt.bidirectional:
+                cycle = self.model(img, pred)['pred_corrs']
+                mask = torch.norm(cycle - query, dim=-1) < 10 / constants.MAX_SIZE
+                if mask.sum() > 0:
+                    cycle_loss = torch.nn.functional.mse_loss(cycle[mask], query[mask])
+                    loss += cycle_loss
+            elif self.opt.cycle_consis and not self.opt.bidirectional:
+                img_reverse = torch.cat([img[..., constants.MAX_SIZE:], img[..., :constants.MAX_SIZE]], axis=-1)
+                query_reverse = pred.clone()
+                query_reverse[..., 0] = query_reverse[..., 0] - 0.5
+                cycle = self.model(img_reverse, query_reverse)['pred_corrs']
+                cycle[..., 0] = cycle[..., 0] - 0.5
+                mask = torch.norm(cycle - query, dim=-1) < 10 / constants.MAX_SIZE
+                if mask.sum() > 0:
+                    cycle_loss = torch.nn.functional.mse_loss(cycle[mask], query[mask])
+                    loss += cycle_loss
+            loss_data = loss.data.item()
+            if np.isnan(loss_data):
+                print('loss is nan while validating')
+            return loss_data, pred
+    def validate(self):
+        '''validate for whole validation dataset
+        '''
+        training = self.model.training
+        self.model.eval()
+        val_loss_list = []
+        for batch_idx, data_pack in tqdm.tqdm(
+                enumerate(self.val_loader), total=len(self.val_loader),
+                desc='Valid iteration=%d' % self.iteration, ncols=80,
+                leave=False):
+            loss_data, pred = self.validate_batch(data_pack)
+            val_loss_list.append(loss_data)
+        mean_loss = np.array(val_loss_list).mean()
+        validation_data = {'val_loss': mean_loss,
+                           'pred': pred,
+                           }
+        self.push_validation_data(data_pack, validation_data)
+        self.save_model()
+        if training:
+            self.model.train()
+    def save_model(self):
+        torch.save({
+            'epoch': self.epoch,
+            'iteration': self.iteration,
+            'optim_state_dict': self.optim.state_dict(),
+            'model_state_dict': self.model.state_dict(),
+        }, osp.join(self.out, 'checkpoint.pth.tar'))
+        if self.iteration % (10 * self.valid_iter) == 0:
+            torch.save({
+                'epoch': self.epoch,
+                'iteration': self.iteration,
+                'optim_state_dict': self.optim.state_dict(),
+                'model_state_dict': self.model.state_dict(),
+            }, osp.join(self.out, f'{self.iteration}_checkpoint.pth.tar'))
+    def draw_corrs(self, imgs, corrs, col=(255, 0, 0)):
+        imgs = utils.torch_img_to_np_img(imgs)
+        out = []
+        for img, corr in zip(imgs, corrs):
+            img = np.interp(img, [img.min(), img.max()], [0, 255]).astype(np.uint8)
+            img = Image.fromarray(img)
+            draw = ImageDraw.Draw(img)
+            corr *= np.array([constants.MAX_SIZE * 2, constants.MAX_SIZE, constants.MAX_SIZE * 2, constants.MAX_SIZE])
+            for c in corr:
+                draw.line(c, fill=col)
+            out.append(np.array(img))
+        out = np.array(out) / 255.0
+        return utils.np_img_to_torch_img(out)
+    def push_validation_data(self, data_pack, validation_data):
+        val_loss = validation_data['val_loss']
+        pred_corrs = np.concatenate([data_pack['queries'].numpy(), validation_data['pred'].cpu().numpy()], axis=-1)
+        pred_corrs = self.draw_corrs(data_pack['image'], pred_corrs)
+        gt_corrs = np.concatenate([data_pack['queries'].numpy(), data_pack['targets'].cpu().numpy()], axis=-1)
+        gt_corrs = self.draw_corrs(data_pack['image'], gt_corrs, (0, 255, 0))
+        gt_img = vutils.make_grid(gt_corrs, normalize=True, scale_each=True)
+        pred_img = vutils.make_grid(pred_corrs, normalize=True, scale_each=True)
+        tb_datapack = tensorboard_helper.TensorboardDatapack()
+        tb_datapack.set_training(False)
+        tb_datapack.set_iteration(self.iteration)
+        tb_datapack.add_scalar({'loss/val': val_loss})
+        tb_datapack.add_image({'image/gt_corrs': gt_img})
+        tb_datapack.add_image({'image/pred_corrs': pred_img})
+        self.tb_pusher.push_to_tensorboard(tb_datapack)
+    def train_batch(self, data_pack):
+        '''train for one batch of data
+        '''
+        img = data_pack['image'].cuda()
+        query = data_pack['queries'].cuda()
+        target = data_pack['targets'].cuda()
+        self.optim.zero_grad()
+        pred = self.model(img, query)['pred_corrs']
+        loss = torch.nn.functional.mse_loss(pred, target)
+        if self.opt.cycle_consis and self.opt.bidirectional:
+            cycle = self.model(img, pred)['pred_corrs']
+            mask = torch.norm(cycle - query, dim=-1) < 10 / constants.MAX_SIZE
+            if mask.sum() > 0:
+                cycle_loss = torch.nn.functional.mse_loss(cycle[mask], query[mask])
+                loss += cycle_loss
+        elif self.opt.cycle_consis and not self.opt.bidirectional:
+                img_reverse = torch.cat([img[..., constants.MAX_SIZE:], img[..., :constants.MAX_SIZE]], axis=-1)
+                query_reverse = pred.clone()
+                query_reverse[..., 0] = query_reverse[..., 0] - 0.5
+                cycle = self.model(img_reverse, query_reverse)['pred_corrs']
+                cycle[..., 0] = cycle[..., 0] - 0.5
+                mask = torch.norm(cycle - query, dim=-1) < 10 / constants.MAX_SIZE
+                if mask.sum() > 0:
+                    cycle_loss = torch.nn.functional.mse_loss(cycle[mask], query[mask])
+                    loss += cycle_loss
+        loss_data = loss.data.item()
+        if np.isnan(loss_data):
+            print('loss is nan during training')
+            self.optim.zero_grad()
+        else:
+            loss.backward()
+            self.push_training_data(data_pack, pred, target, loss)
+        self.optim.step()
+    def push_training_data(self, data_pack, pred, target, loss):
+        tb_datapack = tensorboard_helper.TensorboardDatapack()
+        tb_datapack.set_training(True)
+        tb_datapack.set_iteration(self.iteration)
+        tb_datapack.add_histogram({'distribution/pred': pred})
+        tb_datapack.add_histogram({'distribution/target': target})
+        tb_datapack.add_scalar({'loss/train': loss})
+        self.tb_pusher.push_to_tensorboard(tb_datapack)
+    def resume(self):
+        '''resume training:
+        resume from the recorded epoch, iteration, and saved weights.
+        resume from the model with the same name.
+        Arguments:
+            opt {[type]} -- [description]
+        '''
+        if hasattr(self.opt, 'load_weights'):
+            assert self.opt.load_weights is None or self.opt.load_weights == False
+        # 1. load check point
+        checkpoint_path = os.path.join(self.opt.out, 'checkpoint.pth.tar')
+        if os.path.isfile(checkpoint_path):
+            checkpoint = torch.load(checkpoint_path)
+        else:
+            raise FileNotFoundError(
+                'model check point cannnot found: {0}'.format(checkpoint_path))
+        # 2. load data
+        self.epoch = checkpoint['epoch']
+        self.iteration = checkpoint['iteration']
+        self.load_pretrained_weights()
+        self.optim.load_state_dict(checkpoint['optim_state_dict'])
+    def load_pretrained_weights(self):
+        '''
+        load pretrained weights from another model
+        '''
+        # if hasattr(self.opt, 'resume'):
+        #     assert self.opt.resume is False
+        assert os.path.isfile(self.opt.load_weights_path), self.opt.load_weights_path
+        saved_weights = torch.load(self.opt.load_weights_path)['model_state_dict']
+        utils.safe_load_weights(self.model, saved_weights)
+        content_list = []
+        content_list += [f'Loaded pretrained weights from {self.opt.load_weights_path}']
+        utils.print_notification(content_list)

third_party/COTR/COTR/trainers/tensorboard_helper.py ADDED Viewed

	@@ -0,0 +1,97 @@

+import abc
+import tensorboardX
+class TensorboardDatapack():
+    '''data dictionary for pushing to tb
+    '''
+    def __init__(self):
+        self.SCALAR_NAME = 'scalar'
+        self.HISTOGRAM_NAME = 'histogram'
+        self.IMAGE_NAME = 'image'
+        self.TEXT_NAME = 'text'
+        self.datapack = {}
+        self.datapack[self.SCALAR_NAME] = {}
+        self.datapack[self.HISTOGRAM_NAME] = {}
+        self.datapack[self.IMAGE_NAME] = {}
+        self.datapack[self.TEXT_NAME] = {}
+    def set_training(self, training):
+        self.training = training
+    def set_iteration(self, iteration):
+        self.iteration = iteration
+    def add_scalar(self, scalar_dict):
+        self.datapack[self.SCALAR_NAME].update(scalar_dict)
+    def add_histogram(self, histogram_dict):
+        self.datapack[self.HISTOGRAM_NAME].update(histogram_dict)
+    def add_image(self, image_dict):
+        self.datapack[self.IMAGE_NAME].update(image_dict)
+    def add_text(self, text_dict):
+        self.datapack[self.TEXT_NAME].update(text_dict)
+class TensorboardHelperBase(abc.ABC):
+    '''abstract base class for tb helpers
+    '''
+    def __init__(self, tb_writer):
+        self.tb_writer = tb_writer
+    @abc.abstractmethod
+    def add_data(self, tb_datapack):
+        pass
+class TensorboardScalarHelper(TensorboardHelperBase):
+    def add_data(self, tb_datapack):
+        scalar_dict = tb_datapack.datapack[tb_datapack.SCALAR_NAME]
+        for key, val in scalar_dict.items():
+            self.tb_writer.add_scalar(
+                key, val, global_step=tb_datapack.iteration)
+class TensorboardHistogramHelper(TensorboardHelperBase):
+    def add_data(self, tb_datapack):
+        histogram_dict = tb_datapack.datapack[tb_datapack.HISTOGRAM_NAME]
+        for key, val in histogram_dict.items():
+            self.tb_writer.add_histogram(
+                key, val, global_step=tb_datapack.iteration)
+class TensorboardImageHelper(TensorboardHelperBase):
+    def add_data(self, tb_datapack):
+        image_dict = tb_datapack.datapack[tb_datapack.IMAGE_NAME]
+        for key, val in image_dict.items():
+            self.tb_writer.add_image(
+                key, val, global_step=tb_datapack.iteration)
+class TensorboardTextHelper(TensorboardHelperBase):
+    def add_data(self, tb_datapack):
+        text_dict = tb_datapack.datapack[tb_datapack.TEXT_NAME]
+        for key, val in text_dict.items():
+            self.tb_writer.add_text(
+                key, val, global_step=tb_datapack.iteration)
+class TensorboardPusher():
+    def __init__(self, opt):
+        self.tb_writer = tensorboardX.SummaryWriter(opt.tb_out)
+        scalar_helper = TensorboardScalarHelper(self.tb_writer)
+        histogram_helper = TensorboardHistogramHelper(self.tb_writer)
+        image_helper = TensorboardImageHelper(self.tb_writer)
+        text_helper = TensorboardTextHelper(self.tb_writer)
+        self.helper_list = [scalar_helper,
+                            histogram_helper, image_helper, text_helper]
+    def push_to_tensorboard(self, tb_datapack):
+        for helper in self.helper_list:
+            helper.add_data(tb_datapack)
+        self.tb_writer.flush()

third_party/COTR/COTR/transformations/transform_basics.py ADDED Viewed

	@@ -0,0 +1,114 @@

+import numpy as np
+from COTR.transformations import transformations
+from COTR.utils import constants
+class Rotation():
+    def __init__(self, quat):
+        """
+        quaternion format (w, x, y, z)
+        """
+        assert quat.dtype == np.float32
+        self.quaternion = quat
+    def __str__(self):
+        string = '{0}'.format(self.quaternion)
+        return string
+    @classmethod
+    def from_matrix(cls, mat):
+        assert isinstance(mat, np.ndarray)
+        if mat.shape == (3, 3):
+            id_mat = np.eye(4)
+            id_mat[0:3, 0:3] = mat
+            mat = id_mat
+        assert mat.shape == (4, 4)
+        quat = transformations.quaternion_from_matrix(mat).astype(constants.DEFAULT_PRECISION)
+        return cls(quat)
+    @property
+    def rotation_matrix(self):
+        return transformations.quaternion_matrix(self.quaternion).astype(constants.DEFAULT_PRECISION)
+    @rotation_matrix.setter
+    def rotation_matrix(self, mat):
+        assert isinstance(mat, np.ndarray)
+        assert mat.shape == (4, 4)
+        quat = transformations.quaternion_from_matrix(mat)
+        self.quaternion = quat
+    @property
+    def quaternion(self):
+        assert isinstance(self._quaternion, np.ndarray)
+        assert self._quaternion.shape == (4,)
+        assert np.isclose(np.linalg.norm(self._quaternion), 1.0), 'self._quaternion is not normalized or valid'
+        return self._quaternion
+    @quaternion.setter
+    def quaternion(self, quat):
+        assert isinstance(quat, np.ndarray)
+        assert quat.shape == (4,)
+        if not np.isclose(np.linalg.norm(quat), 1.0):
+            print(f'WARNING: normalizing the input quatternion to unit quaternion: {np.linalg.norm(quat)}')
+            quat = quat / np.linalg.norm(quat)
+        assert np.isclose(np.linalg.norm(quat), 1.0), f'input quaternion is not normalized or valid: {quat}'
+        self._quaternion = quat
+class UnstableRotation():
+    def __init__(self, mat):
+        assert isinstance(mat, np.ndarray)
+        if mat.shape == (3, 3):
+            id_mat = np.eye(4)
+            id_mat[0:3, 0:3] = mat
+            mat = id_mat
+        assert mat.shape == (4, 4)
+        mat[:3, 3] = 0
+        self._rotation_matrix = mat
+    def __str__(self):
+        string = f'rotation_matrix: {self.rotation_matrix}'
+        return string
+    @property
+    def rotation_matrix(self):
+        return self._rotation_matrix
+class Translation():
+    def __init__(self, vec):
+        assert vec.dtype == np.float32
+        self.translation_vector = vec
+    def __str__(self):
+        string = '{0}'.format(self.translation_vector)
+        return string
+    @classmethod
+    def from_matrix(cls, mat):
+        assert isinstance(mat, np.ndarray)
+        assert mat.shape == (4, 4)
+        vec = transformations.translation_from_matrix(mat)
+        return cls(vec)
+    @property
+    def translation_matrix(self):
+        return transformations.translation_matrix(self.translation_vector).astype(constants.DEFAULT_PRECISION)
+    @translation_matrix.setter
+    def translation_matrix(self, mat):
+        assert isinstance(mat, np.ndarray)
+        assert mat.shape == (4, 4)
+        vec = transformations.translation_from_matrix(mat)
+        self.translation_vector = vec
+    @property
+    def translation_vector(self):
+        return self._translation_vector
+    @translation_vector.setter
+    def translation_vector(self, vec):
+        assert isinstance(vec, np.ndarray)
+        assert vec.shape == (3,)
+        self._translation_vector = vec

third_party/COTR/COTR/transformations/transformations.py ADDED Viewed

	@@ -0,0 +1,1951 @@

+# -*- coding: utf-8 -*-
+# transformations.py
+# Copyright (c) 2006-2019, Christoph Gohlke
+# Copyright (c) 2006-2019, The Regents of the University of California
+# Produced at the Laboratory for Fluorescence Dynamics
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+#   this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+"""Homogeneous Transformation Matrices and Quaternions.
+Transformations is a Python library for calculating 4x4 matrices for
+translating, rotating, reflecting, scaling, shearing, projecting,
+orthogonalizing, and superimposing arrays of 3D homogeneous coordinates
+as well as for converting between rotation matrices, Euler angles,
+and quaternions. Also includes an Arcball control object and
+functions to decompose transformation matrices.
+:Author:
+  `Christoph Gohlke <https://www.lfd.uci.edu/~gohlke/>`_
+:Organization:
+  Laboratory for Fluorescence Dynamics. University of California, Irvine
+:License: 3-clause BSD
+:Version: 2019.2.20
+Requirements
+------------
+* `CPython 2.7 or 3.5+ <https://www.python.org>`_
+* `Numpy 1.14 <https://www.numpy.org>`_
+* A Python distutils compatible C compiler  (build)
+Revisions
+---------
+2019.1.1
+    Update copyright year.
+Notes
+-----
+Transformations.py is no longer actively developed and has a few known issues
+and numerical instabilities. The module is mostly superseded by other modules
+for 3D transformations and quaternions:
+* `Scipy.spatial.transform <https://github.com/scipy/scipy/tree/master/
+  scipy/spatial/transform>`_
+* `Transforms3d <https://github.com/matthew-brett/transforms3d>`_
+  (includes most code of this module)
+* `Numpy-quaternion <https://github.com/moble/quaternion>`_
+* `Blender.mathutils <https://docs.blender.org/api/master/mathutils.html>`_
+The API is not stable yet and is expected to change between revisions.
+Python 2.7 and 3.4 are deprecated.
+This Python code is not optimized for speed. Refer to the transformations.c
+module for a faster implementation of some functions.
+Documentation in HTML format can be generated with epydoc.
+Matrices (M) can be inverted using numpy.linalg.inv(M), be concatenated using
+numpy.dot(M0, M1), or transform homogeneous coordinate arrays (v) using
+numpy.dot(M, v) for shape (4, \*) column vectors, respectively
+numpy.dot(v, M.T) for shape (\*, 4) row vectors ("array of points").
+This module follows the "column vectors on the right" and "row major storage"
+(C contiguous) conventions. The translation components are in the right column
+of the transformation matrix, i.e. M[:3, 3].
+The transpose of the transformation matrices may have to be used to interface
+with other graphics systems, e.g. OpenGL's glMultMatrixd(). See also [16].
+Calculations are carried out with numpy.float64 precision.
+Vector, point, quaternion, and matrix function arguments are expected to be
+"array like", i.e. tuple, list, or numpy arrays.
+Return types are numpy arrays unless specified otherwise.
+Angles are in radians unless specified otherwise.
+Quaternions w+ix+jy+kz are represented as [w, x, y, z].
+A triple of Euler angles can be applied/interpreted in 24 ways, which can
+be specified using a 4 character string or encoded 4-tuple:
+  *Axes 4-string*: e.g. 'sxyz' or 'ryxy'
+  - first character : rotations are applied to 's'tatic or 'r'otating frame
+  - remaining characters : successive rotation axis 'x', 'y', or 'z'
+  *Axes 4-tuple*: e.g. (0, 0, 0, 0) or (1, 1, 1, 1)
+  - inner axis: code of axis ('x':0, 'y':1, 'z':2) of rightmost matrix.
+  - parity : even (0) if inner axis 'x' is followed by 'y', 'y' is followed
+    by 'z', or 'z' is followed by 'x'. Otherwise odd (1).
+  - repetition : first and last axis are same (1) or different (0).
+  - frame : rotations are applied to static (0) or rotating (1) frame.
+References
+----------
+(1)  Matrices and transformations. Ronald Goldman.
+     In "Graphics Gems I", pp 472-475. Morgan Kaufmann, 1990.
+(2)  More matrices and transformations: shear and pseudo-perspective.
+     Ronald Goldman. In "Graphics Gems II", pp 320-323. Morgan Kaufmann, 1991.
+(3)  Decomposing a matrix into simple transformations. Spencer Thomas.
+     In "Graphics Gems II", pp 320-323. Morgan Kaufmann, 1991.
+(4)  Recovering the data from the transformation matrix. Ronald Goldman.
+     In "Graphics Gems II", pp 324-331. Morgan Kaufmann, 1991.
+(5)  Euler angle conversion. Ken Shoemake.
+     In "Graphics Gems IV", pp 222-229. Morgan Kaufmann, 1994.
+(6)  Arcball rotation control. Ken Shoemake.
+     In "Graphics Gems IV", pp 175-192. Morgan Kaufmann, 1994.
+(7)  Representing attitude: Euler angles, unit quaternions, and rotation
+     vectors. James Diebel. 2006.
+(8)  A discussion of the solution for the best rotation to relate two sets
+     of vectors. W Kabsch. Acta Cryst. 1978. A34, 827-828.
+(9)  Closed-form solution of absolute orientation using unit quaternions.
+     BKP Horn. J Opt Soc Am A. 1987. 4(4):629-642.
+(10) Quaternions. Ken Shoemake.
+     http://www.sfu.ca/~jwa3/cmpt461/files/quatut.pdf
+(11) From quaternion to matrix and back. JMP van Waveren. 2005.
+     http://www.intel.com/cd/ids/developer/asmo-na/eng/293748.htm
+(12) Uniform random rotations. Ken Shoemake.
+     In "Graphics Gems III", pp 124-132. Morgan Kaufmann, 1992.
+(13) Quaternion in molecular modeling. CFF Karney.
+     J Mol Graph Mod, 25(5):595-604
+(14) New method for extracting the quaternion from a rotation matrix.
+     Itzhack Y Bar-Itzhack, J Guid Contr Dynam. 2000. 23(6): 1085-1087.
+(15) Multiple View Geometry in Computer Vision. Hartley and Zissermann.
+     Cambridge University Press; 2nd Ed. 2004. Chapter 4, Algorithm 4.7, p 130.
+(16) Column Vectors vs. Row Vectors.
+     http://steve.hollasch.net/cgindex/math/matrix/column-vec.html
+Examples
+--------
+>>> alpha, beta, gamma = 0.123, -1.234, 2.345
+>>> origin, xaxis, yaxis, zaxis = [0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1]
+>>> I = identity_matrix()
+>>> Rx = rotation_matrix(alpha, xaxis)
+>>> Ry = rotation_matrix(beta, yaxis)
+>>> Rz = rotation_matrix(gamma, zaxis)
+>>> R = concatenate_matrices(Rx, Ry, Rz)
+>>> euler = euler_from_matrix(R, 'rxyz')
+>>> numpy.allclose([alpha, beta, gamma], euler)
+True
+>>> Re = euler_matrix(alpha, beta, gamma, 'rxyz')
+>>> is_same_transform(R, Re)
+True
+>>> al, be, ga = euler_from_matrix(Re, 'rxyz')
+>>> is_same_transform(Re, euler_matrix(al, be, ga, 'rxyz'))
+True
+>>> qx = quaternion_about_axis(alpha, xaxis)
+>>> qy = quaternion_about_axis(beta, yaxis)
+>>> qz = quaternion_about_axis(gamma, zaxis)
+>>> q = quaternion_multiply(qx, qy)
+>>> q = quaternion_multiply(q, qz)
+>>> Rq = quaternion_matrix(q)
+>>> is_same_transform(R, Rq)
+True
+>>> S = scale_matrix(1.23, origin)
+>>> T = translation_matrix([1, 2, 3])
+>>> Z = shear_matrix(beta, xaxis, origin, zaxis)
+>>> R = random_rotation_matrix(numpy.random.rand(3))
+>>> M = concatenate_matrices(T, R, Z, S)
+>>> scale, shear, angles, trans, persp = decompose_matrix(M)
+>>> numpy.allclose(scale, 1.23)
+True
+>>> numpy.allclose(trans, [1, 2, 3])
+True
+>>> numpy.allclose(shear, [0, math.tan(beta), 0])
+True
+>>> is_same_transform(R, euler_matrix(axes='sxyz', *angles))
+True
+>>> M1 = compose_matrix(scale, shear, angles, trans, persp)
+>>> is_same_transform(M, M1)
+True
+>>> v0, v1 = random_vector(3), random_vector(3)
+>>> M = rotation_matrix(angle_between_vectors(v0, v1), vector_product(v0, v1))
+>>> v2 = numpy.dot(v0, M[:3,:3].T)
+>>> numpy.allclose(unit_vector(v1), unit_vector(v2))
+True
+"""
+from __future__ import division, print_function
+__version__ = '2019.2.20'
+__docformat__ = 'restructuredtext en'
+import math
+import numpy
+def identity_matrix():
+    """Return 4x4 identity/unit matrix.
+    >>> I = identity_matrix()
+    >>> numpy.allclose(I, numpy.dot(I, I))
+    True
+    >>> numpy.sum(I), numpy.trace(I)
+    (4.0, 4.0)
+    >>> numpy.allclose(I, numpy.identity(4))
+    True
+    """
+    return numpy.identity(4)
+def translation_matrix(direction):
+    """Return matrix to translate by direction vector.
+    >>> v = numpy.random.random(3) - 0.5
+    >>> numpy.allclose(v, translation_matrix(v)[:3, 3])
+    True
+    """
+    M = numpy.identity(4)
+    M[:3, 3] = direction[:3]
+    return M
+def translation_from_matrix(matrix):
+    """Return translation vector from translation matrix.
+    >>> v0 = numpy.random.random(3) - 0.5
+    >>> v1 = translation_from_matrix(translation_matrix(v0))
+    >>> numpy.allclose(v0, v1)
+    True
+    """
+    return numpy.array(matrix, copy=False)[:3, 3].copy()
+def reflection_matrix(point, normal):
+    """Return matrix to mirror at plane defined by point and normal vector.
+    >>> v0 = numpy.random.random(4) - 0.5
+    >>> v0[3] = 1.
+    >>> v1 = numpy.random.random(3) - 0.5
+    >>> R = reflection_matrix(v0, v1)
+    >>> numpy.allclose(2, numpy.trace(R))
+    True
+    >>> numpy.allclose(v0, numpy.dot(R, v0))
+    True
+    >>> v2 = v0.copy()
+    >>> v2[:3] += v1
+    >>> v3 = v0.copy()
+    >>> v2[:3] -= v1
+    >>> numpy.allclose(v2, numpy.dot(R, v3))
+    True
+    """
+    normal = unit_vector(normal[:3])
+    M = numpy.identity(4)
+    M[:3, :3] -= 2.0 * numpy.outer(normal, normal)
+    M[:3, 3] = (2.0 * numpy.dot(point[:3], normal)) * normal
+    return M
+def reflection_from_matrix(matrix):
+    """Return mirror plane point and normal vector from reflection matrix.
+    >>> v0 = numpy.random.random(3) - 0.5
+    >>> v1 = numpy.random.random(3) - 0.5
+    >>> M0 = reflection_matrix(v0, v1)
+    >>> point, normal = reflection_from_matrix(M0)
+    >>> M1 = reflection_matrix(point, normal)
+    >>> is_same_transform(M0, M1)
+    True
+    """
+    M = numpy.array(matrix, dtype=numpy.float64, copy=False)
+    # normal: unit eigenvector corresponding to eigenvalue -1
+    w, V = numpy.linalg.eig(M[:3, :3])
+    i = numpy.where(abs(numpy.real(w) + 1.0) < 1e-8)[0]
+    if not len(i):
+        raise ValueError('no unit eigenvector corresponding to eigenvalue -1')
+    normal = numpy.real(V[:, i[0]]).squeeze()
+    # point: any unit eigenvector corresponding to eigenvalue 1
+    w, V = numpy.linalg.eig(M)
+    i = numpy.where(abs(numpy.real(w) - 1.0) < 1e-8)[0]
+    if not len(i):
+        raise ValueError('no unit eigenvector corresponding to eigenvalue 1')
+    point = numpy.real(V[:, i[-1]]).squeeze()
+    point /= point[3]
+    return point, normal
+def rotation_matrix(angle, direction, point=None):
+    """Return matrix to rotate about axis defined by point and direction.
+    >>> R = rotation_matrix(math.pi/2, [0, 0, 1], [1, 0, 0])
+    >>> numpy.allclose(numpy.dot(R, [0, 0, 0, 1]), [1, -1, 0, 1])
+    True
+    >>> angle = (random.random() - 0.5) * (2*math.pi)
+    >>> direc = numpy.random.random(3) - 0.5
+    >>> point = numpy.random.random(3) - 0.5
+    >>> R0 = rotation_matrix(angle, direc, point)
+    >>> R1 = rotation_matrix(angle-2*math.pi, direc, point)
+    >>> is_same_transform(R0, R1)
+    True
+    >>> R0 = rotation_matrix(angle, direc, point)
+    >>> R1 = rotation_matrix(-angle, -direc, point)
+    >>> is_same_transform(R0, R1)
+    True
+    >>> I = numpy.identity(4, numpy.float64)
+    >>> numpy.allclose(I, rotation_matrix(math.pi*2, direc))
+    True
+    >>> numpy.allclose(2, numpy.trace(rotation_matrix(math.pi/2,
+    ...                                               direc, point)))
+    True
+    """
+    sina = math.sin(angle)
+    cosa = math.cos(angle)
+    direction = unit_vector(direction[:3])
+    # rotation matrix around unit vector
+    R = numpy.diag([cosa, cosa, cosa])
+    R += numpy.outer(direction, direction) * (1.0 - cosa)
+    direction *= sina
+    R += numpy.array([[0.0,         -direction[2],  direction[1]],
+                      [direction[2], 0.0,          -direction[0]],
+                      [-direction[1], direction[0],  0.0]])
+    M = numpy.identity(4)
+    M[:3, :3] = R
+    if point is not None:
+        # rotation not around origin
+        point = numpy.array(point[:3], dtype=numpy.float64, copy=False)
+        M[:3, 3] = point - numpy.dot(R, point)
+    return M
+def rotation_from_matrix(matrix):
+    """Return rotation angle and axis from rotation matrix.
+    >>> angle = (random.random() - 0.5) * (2*math.pi)
+    >>> direc = numpy.random.random(3) - 0.5
+    >>> point = numpy.random.random(3) - 0.5
+    >>> R0 = rotation_matrix(angle, direc, point)
+    >>> angle, direc, point = rotation_from_matrix(R0)
+    >>> R1 = rotation_matrix(angle, direc, point)
+    >>> is_same_transform(R0, R1)
+    True
+    """
+    R = numpy.array(matrix, dtype=numpy.float64, copy=False)
+    R33 = R[:3, :3]
+    # direction: unit eigenvector of R33 corresponding to eigenvalue of 1
+    w, W = numpy.linalg.eig(R33.T)
+    i = numpy.where(abs(numpy.real(w) - 1.0) < 1e-8)[0]
+    if not len(i):
+        raise ValueError('no unit eigenvector corresponding to eigenvalue 1')
+    direction = numpy.real(W[:, i[-1]]).squeeze()
+    # point: unit eigenvector of R33 corresponding to eigenvalue of 1
+    w, Q = numpy.linalg.eig(R)
+    i = numpy.where(abs(numpy.real(w) - 1.0) < 1e-8)[0]
+    if not len(i):
+        raise ValueError('no unit eigenvector corresponding to eigenvalue 1')
+    point = numpy.real(Q[:, i[-1]]).squeeze()
+    point /= point[3]
+    # rotation angle depending on direction
+    cosa = (numpy.trace(R33) - 1.0) / 2.0
+    if abs(direction[2]) > 1e-8:
+        sina = (R[1, 0] + (cosa-1.0)*direction[0]*direction[1]) / direction[2]
+    elif abs(direction[1]) > 1e-8:
+        sina = (R[0, 2] + (cosa-1.0)*direction[0]*direction[2]) / direction[1]
+    else:
+        sina = (R[2, 1] + (cosa-1.0)*direction[1]*direction[2]) / direction[0]
+    angle = math.atan2(sina, cosa)
+    return angle, direction, point
+def scale_matrix(factor, origin=None, direction=None):
+    """Return matrix to scale by factor around origin in direction.
+    Use factor -1 for point symmetry.
+    >>> v = (numpy.random.rand(4, 5) - 0.5) * 20
+    >>> v[3] = 1
+    >>> S = scale_matrix(-1.234)
+    >>> numpy.allclose(numpy.dot(S, v)[:3], -1.234*v[:3])
+    True
+    >>> factor = random.random() * 10 - 5
+    >>> origin = numpy.random.random(3) - 0.5
+    >>> direct = numpy.random.random(3) - 0.5
+    >>> S = scale_matrix(factor, origin)
+    >>> S = scale_matrix(factor, origin, direct)
+    """
+    if direction is None:
+        # uniform scaling
+        M = numpy.diag([factor, factor, factor, 1.0])
+        if origin is not None:
+            M[:3, 3] = origin[:3]
+            M[:3, 3] *= 1.0 - factor
+    else:
+        # nonuniform scaling
+        direction = unit_vector(direction[:3])
+        factor = 1.0 - factor
+        M = numpy.identity(4)
+        M[:3, :3] -= factor * numpy.outer(direction, direction)
+        if origin is not None:
+            M[:3, 3] = (factor * numpy.dot(origin[:3], direction)) * direction
+    return M
+def scale_from_matrix(matrix):
+    """Return scaling factor, origin and direction from scaling matrix.
+    >>> factor = random.random() * 10 - 5
+    >>> origin = numpy.random.random(3) - 0.5
+    >>> direct = numpy.random.random(3) - 0.5
+    >>> S0 = scale_matrix(factor, origin)
+    >>> factor, origin, direction = scale_from_matrix(S0)
+    >>> S1 = scale_matrix(factor, origin, direction)
+    >>> is_same_transform(S0, S1)
+    True
+    >>> S0 = scale_matrix(factor, origin, direct)
+    >>> factor, origin, direction = scale_from_matrix(S0)
+    >>> S1 = scale_matrix(factor, origin, direction)
+    >>> is_same_transform(S0, S1)
+    True
+    """
+    M = numpy.array(matrix, dtype=numpy.float64, copy=False)
+    M33 = M[:3, :3]
+    factor = numpy.trace(M33) - 2.0
+    try:
+        # direction: unit eigenvector corresponding to eigenvalue factor
+        w, V = numpy.linalg.eig(M33)
+        i = numpy.where(abs(numpy.real(w) - factor) < 1e-8)[0][0]
+        direction = numpy.real(V[:, i]).squeeze()
+        direction /= vector_norm(direction)
+    except IndexError:
+        # uniform scaling
+        factor = (factor + 2.0) / 3.0
+        direction = None
+    # origin: any eigenvector corresponding to eigenvalue 1
+    w, V = numpy.linalg.eig(M)
+    i = numpy.where(abs(numpy.real(w) - 1.0) < 1e-8)[0]
+    if not len(i):
+        raise ValueError('no eigenvector corresponding to eigenvalue 1')
+    origin = numpy.real(V[:, i[-1]]).squeeze()
+    origin /= origin[3]
+    return factor, origin, direction
+def projection_matrix(point, normal, direction=None,
+                      perspective=None, pseudo=False):
+    """Return matrix to project onto plane defined by point and normal.
+    Using either perspective point, projection direction, or none of both.
+    If pseudo is True, perspective projections will preserve relative depth
+    such that Perspective = dot(Orthogonal, PseudoPerspective).
+    >>> P = projection_matrix([0, 0, 0], [1, 0, 0])
+    >>> numpy.allclose(P[1:, 1:], numpy.identity(4)[1:, 1:])
+    True
+    >>> point = numpy.random.random(3) - 0.5
+    >>> normal = numpy.random.random(3) - 0.5
+    >>> direct = numpy.random.random(3) - 0.5
+    >>> persp = numpy.random.random(3) - 0.5
+    >>> P0 = projection_matrix(point, normal)
+    >>> P1 = projection_matrix(point, normal, direction=direct)
+    >>> P2 = projection_matrix(point, normal, perspective=persp)
+    >>> P3 = projection_matrix(point, normal, perspective=persp, pseudo=True)
+    >>> is_same_transform(P2, numpy.dot(P0, P3))
+    True
+    >>> P = projection_matrix([3, 0, 0], [1, 1, 0], [1, 0, 0])
+    >>> v0 = (numpy.random.rand(4, 5) - 0.5) * 20
+    >>> v0[3] = 1
+    >>> v1 = numpy.dot(P, v0)
+    >>> numpy.allclose(v1[1], v0[1])
+    True
+    >>> numpy.allclose(v1[0], 3-v1[1])
+    True
+    """
+    M = numpy.identity(4)
+    point = numpy.array(point[:3], dtype=numpy.float64, copy=False)
+    normal = unit_vector(normal[:3])
+    if perspective is not None:
+        # perspective projection
+        perspective = numpy.array(perspective[:3], dtype=numpy.float64,
+                                  copy=False)
+        M[0, 0] = M[1, 1] = M[2, 2] = numpy.dot(perspective-point, normal)
+        M[:3, :3] -= numpy.outer(perspective, normal)
+        if pseudo:
+            # preserve relative depth
+            M[:3, :3] -= numpy.outer(normal, normal)
+            M[:3, 3] = numpy.dot(point, normal) * (perspective+normal)
+        else:
+            M[:3, 3] = numpy.dot(point, normal) * perspective
+        M[3, :3] = -normal
+        M[3, 3] = numpy.dot(perspective, normal)
+    elif direction is not None:
+        # parallel projection
+        direction = numpy.array(direction[:3], dtype=numpy.float64, copy=False)
+        scale = numpy.dot(direction, normal)
+        M[:3, :3] -= numpy.outer(direction, normal) / scale
+        M[:3, 3] = direction * (numpy.dot(point, normal) / scale)
+    else:
+        # orthogonal projection
+        M[:3, :3] -= numpy.outer(normal, normal)
+        M[:3, 3] = numpy.dot(point, normal) * normal
+    return M
+def projection_from_matrix(matrix, pseudo=False):
+    """Return projection plane and perspective point from projection matrix.
+    Return values are same as arguments for projection_matrix function:
+    point, normal, direction, perspective, and pseudo.
+    >>> point = numpy.random.random(3) - 0.5
+    >>> normal = numpy.random.random(3) - 0.5
+    >>> direct = numpy.random.random(3) - 0.5
+    >>> persp = numpy.random.random(3) - 0.5
+    >>> P0 = projection_matrix(point, normal)
+    >>> result = projection_from_matrix(P0)
+    >>> P1 = projection_matrix(*result)
+    >>> is_same_transform(P0, P1)
+    True
+    >>> P0 = projection_matrix(point, normal, direct)
+    >>> result = projection_from_matrix(P0)
+    >>> P1 = projection_matrix(*result)
+    >>> is_same_transform(P0, P1)
+    True
+    >>> P0 = projection_matrix(point, normal, perspective=persp, pseudo=False)
+    >>> result = projection_from_matrix(P0, pseudo=False)
+    >>> P1 = projection_matrix(*result)
+    >>> is_same_transform(P0, P1)
+    True
+    >>> P0 = projection_matrix(point, normal, perspective=persp, pseudo=True)
+    >>> result = projection_from_matrix(P0, pseudo=True)
+    >>> P1 = projection_matrix(*result)
+    >>> is_same_transform(P0, P1)
+    True
+    """
+    M = numpy.array(matrix, dtype=numpy.float64, copy=False)
+    M33 = M[:3, :3]
+    w, V = numpy.linalg.eig(M)
+    i = numpy.where(abs(numpy.real(w) - 1.0) < 1e-8)[0]
+    if not pseudo and len(i):
+        # point: any eigenvector corresponding to eigenvalue 1
+        point = numpy.real(V[:, i[-1]]).squeeze()
+        point /= point[3]
+        # direction: unit eigenvector corresponding to eigenvalue 0
+        w, V = numpy.linalg.eig(M33)
+        i = numpy.where(abs(numpy.real(w)) < 1e-8)[0]
+        if not len(i):
+            raise ValueError('no eigenvector corresponding to eigenvalue 0')
+        direction = numpy.real(V[:, i[0]]).squeeze()
+        direction /= vector_norm(direction)
+        # normal: unit eigenvector of M33.T corresponding to eigenvalue 0
+        w, V = numpy.linalg.eig(M33.T)
+        i = numpy.where(abs(numpy.real(w)) < 1e-8)[0]
+        if len(i):
+            # parallel projection
+            normal = numpy.real(V[:, i[0]]).squeeze()
+            normal /= vector_norm(normal)
+            return point, normal, direction, None, False
+        else:
+            # orthogonal projection, where normal equals direction vector
+            return point, direction, None, None, False
+    else:
+        # perspective projection
+        i = numpy.where(abs(numpy.real(w)) > 1e-8)[0]
+        if not len(i):
+            raise ValueError(
+                'no eigenvector not corresponding to eigenvalue 0')
+        point = numpy.real(V[:, i[-1]]).squeeze()
+        point /= point[3]
+        normal = - M[3, :3]
+        perspective = M[:3, 3] / numpy.dot(point[:3], normal)
+        if pseudo:
+            perspective -= normal
+        return point, normal, None, perspective, pseudo
+def clip_matrix(left, right, bottom, top, near, far, perspective=False):
+    """Return matrix to obtain normalized device coordinates from frustum.
+    The frustum bounds are axis-aligned along x (left, right),
+    y (bottom, top) and z (near, far).
+    Normalized device coordinates are in range [-1, 1] if coordinates are
+    inside the frustum.
+    If perspective is True the frustum is a truncated pyramid with the
+    perspective point at origin and direction along z axis, otherwise an
+    orthographic canonical view volume (a box).
+    Homogeneous coordinates transformed by the perspective clip matrix
+    need to be dehomogenized (divided by w coordinate).
+    >>> frustum = numpy.random.rand(6)
+    >>> frustum[1] += frustum[0]
+    >>> frustum[3] += frustum[2]
+    >>> frustum[5] += frustum[4]
+    >>> M = clip_matrix(perspective=False, *frustum)
+    >>> numpy.dot(M, [frustum[0], frustum[2], frustum[4], 1])
+    array([-1., -1., -1.,  1.])
+    >>> numpy.dot(M, [frustum[1], frustum[3], frustum[5], 1])
+    array([ 1.,  1.,  1.,  1.])
+    >>> M = clip_matrix(perspective=True, *frustum)
+    >>> v = numpy.dot(M, [frustum[0], frustum[2], frustum[4], 1])
+    >>> v / v[3]
+    array([-1., -1., -1.,  1.])
+    >>> v = numpy.dot(M, [frustum[1], frustum[3], frustum[4], 1])
+    >>> v / v[3]
+    array([ 1.,  1., -1.,  1.])
+    """
+    if left >= right or bottom >= top or near >= far:
+        raise ValueError('invalid frustum')
+    if perspective:
+        if near <= _EPS:
+            raise ValueError('invalid frustum: near <= 0')
+        t = 2.0 * near
+        M = [[t/(left-right), 0.0, (right+left)/(right-left), 0.0],
+             [0.0, t/(bottom-top), (top+bottom)/(top-bottom), 0.0],
+             [0.0, 0.0, (far+near)/(near-far), t*far/(far-near)],
+             [0.0, 0.0, -1.0, 0.0]]
+    else:
+        M = [[2.0/(right-left), 0.0, 0.0, (right+left)/(left-right)],
+             [0.0, 2.0/(top-bottom), 0.0, (top+bottom)/(bottom-top)],
+             [0.0, 0.0, 2.0/(far-near), (far+near)/(near-far)],
+             [0.0, 0.0, 0.0, 1.0]]
+    return numpy.array(M)
+def shear_matrix(angle, direction, point, normal):
+    """Return matrix to shear by angle along direction vector on shear plane.
+    The shear plane is defined by a point and normal vector. The direction
+    vector must be orthogonal to the plane's normal vector.
+    A point P is transformed by the shear matrix into P" such that
+    the vector P-P" is parallel to the direction vector and its extent is
+    given by the angle of P-P'-P", where P' is the orthogonal projection
+    of P onto the shear plane.
+    >>> angle = (random.random() - 0.5) * 4*math.pi
+    >>> direct = numpy.random.random(3) - 0.5
+    >>> point = numpy.random.random(3) - 0.5
+    >>> normal = numpy.cross(direct, numpy.random.random(3))
+    >>> S = shear_matrix(angle, direct, point, normal)
+    >>> numpy.allclose(1, numpy.linalg.det(S))
+    True
+    """
+    normal = unit_vector(normal[:3])
+    direction = unit_vector(direction[:3])
+    if abs(numpy.dot(normal, direction)) > 1e-6:
+        raise ValueError('direction and normal vectors are not orthogonal')
+    angle = math.tan(angle)
+    M = numpy.identity(4)
+    M[:3, :3] += angle * numpy.outer(direction, normal)
+    M[:3, 3] = -angle * numpy.dot(point[:3], normal) * direction
+    return M
+def shear_from_matrix(matrix):
+    """Return shear angle, direction and plane from shear matrix.
+    >>> angle = (random.random() - 0.5) * 4*math.pi
+    >>> direct = numpy.random.random(3) - 0.5
+    >>> point = numpy.random.random(3) - 0.5
+    >>> normal = numpy.cross(direct, numpy.random.random(3))
+    >>> S0 = shear_matrix(angle, direct, point, normal)
+    >>> angle, direct, point, normal = shear_from_matrix(S0)
+    >>> S1 = shear_matrix(angle, direct, point, normal)
+    >>> is_same_transform(S0, S1)
+    True
+    """
+    M = numpy.array(matrix, dtype=numpy.float64, copy=False)
+    M33 = M[:3, :3]
+    # normal: cross independent eigenvectors corresponding to the eigenvalue 1
+    w, V = numpy.linalg.eig(M33)
+    i = numpy.where(abs(numpy.real(w) - 1.0) < 1e-4)[0]
+    if len(i) < 2:
+        raise ValueError('no two linear independent eigenvectors found %s' % w)
+    V = numpy.real(V[:, i]).squeeze().T
+    lenorm = -1.0
+    for i0, i1 in ((0, 1), (0, 2), (1, 2)):
+        n = numpy.cross(V[i0], V[i1])
+        w = vector_norm(n)
+        if w > lenorm:
+            lenorm = w
+            normal = n
+    normal /= lenorm
+    # direction and angle
+    direction = numpy.dot(M33 - numpy.identity(3), normal)
+    angle = vector_norm(direction)
+    direction /= angle
+    angle = math.atan(angle)
+    # point: eigenvector corresponding to eigenvalue 1
+    w, V = numpy.linalg.eig(M)
+    i = numpy.where(abs(numpy.real(w) - 1.0) < 1e-8)[0]
+    if not len(i):
+        raise ValueError('no eigenvector corresponding to eigenvalue 1')
+    point = numpy.real(V[:, i[-1]]).squeeze()
+    point /= point[3]
+    return angle, direction, point, normal
+def decompose_matrix(matrix):
+    """Return sequence of transformations from transformation matrix.
+    matrix : array_like
+        Non-degenerative homogeneous transformation matrix
+    Return tuple of:
+        scale : vector of 3 scaling factors
+        shear : list of shear factors for x-y, x-z, y-z axes
+        angles : list of Euler angles about static x, y, z axes
+        translate : translation vector along x, y, z axes
+        perspective : perspective partition of matrix
+    Raise ValueError if matrix is of wrong type or degenerative.
+    >>> T0 = translation_matrix([1, 2, 3])
+    >>> scale, shear, angles, trans, persp = decompose_matrix(T0)
+    >>> T1 = translation_matrix(trans)
+    >>> numpy.allclose(T0, T1)
+    True
+    >>> S = scale_matrix(0.123)
+    >>> scale, shear, angles, trans, persp = decompose_matrix(S)
+    >>> scale[0]
+    0.123
+    >>> R0 = euler_matrix(1, 2, 3)
+    >>> scale, shear, angles, trans, persp = decompose_matrix(R0)
+    >>> R1 = euler_matrix(*angles)
+    >>> numpy.allclose(R0, R1)
+    True
+    """
+    M = numpy.array(matrix, dtype=numpy.float64, copy=True).T
+    if abs(M[3, 3]) < _EPS:
+        raise ValueError('M[3, 3] is zero')
+    M /= M[3, 3]
+    P = M.copy()
+    P[:, 3] = 0.0, 0.0, 0.0, 1.0
+    if not numpy.linalg.det(P):
+        raise ValueError('matrix is singular')
+    scale = numpy.zeros((3, ))
+    shear = [0.0, 0.0, 0.0]
+    angles = [0.0, 0.0, 0.0]
+    if any(abs(M[:3, 3]) > _EPS):
+        perspective = numpy.dot(M[:, 3], numpy.linalg.inv(P.T))
+        M[:, 3] = 0.0, 0.0, 0.0, 1.0
+    else:
+        perspective = numpy.array([0.0, 0.0, 0.0, 1.0])
+    translate = M[3, :3].copy()
+    M[3, :3] = 0.0
+    row = M[:3, :3].copy()
+    scale[0] = vector_norm(row[0])
+    row[0] /= scale[0]
+    shear[0] = numpy.dot(row[0], row[1])
+    row[1] -= row[0] * shear[0]
+    scale[1] = vector_norm(row[1])
+    row[1] /= scale[1]
+    shear[0] /= scale[1]
+    shear[1] = numpy.dot(row[0], row[2])
+    row[2] -= row[0] * shear[1]
+    shear[2] = numpy.dot(row[1], row[2])
+    row[2] -= row[1] * shear[2]
+    scale[2] = vector_norm(row[2])
+    row[2] /= scale[2]
+    shear[1:] /= scale[2]
+    if numpy.dot(row[0], numpy.cross(row[1], row[2])) < 0:
+        numpy.negative(scale, scale)
+        numpy.negative(row, row)
+    angles[1] = math.asin(-row[0, 2])
+    if math.cos(angles[1]):
+        angles[0] = math.atan2(row[1, 2], row[2, 2])
+        angles[2] = math.atan2(row[0, 1], row[0, 0])
+    else:
+        # angles[0] = math.atan2(row[1, 0], row[1, 1])
+        angles[0] = math.atan2(-row[2, 1], row[1, 1])
+        angles[2] = 0.0
+    return scale, shear, angles, translate, perspective
+def compose_matrix(scale=None, shear=None, angles=None, translate=None,
+                   perspective=None):
+    """Return transformation matrix from sequence of transformations.
+    This is the inverse of the decompose_matrix function.
+    Sequence of transformations:
+        scale : vector of 3 scaling factors
+        shear : list of shear factors for x-y, x-z, y-z axes
+        angles : list of Euler angles about static x, y, z axes
+        translate : translation vector along x, y, z axes
+        perspective : perspective partition of matrix
+    >>> scale = numpy.random.random(3) - 0.5
+    >>> shear = numpy.random.random(3) - 0.5
+    >>> angles = (numpy.random.random(3) - 0.5) * (2*math.pi)
+    >>> trans = numpy.random.random(3) - 0.5
+    >>> persp = numpy.random.random(4) - 0.5
+    >>> M0 = compose_matrix(scale, shear, angles, trans, persp)
+    >>> result = decompose_matrix(M0)
+    >>> M1 = compose_matrix(*result)
+    >>> is_same_transform(M0, M1)
+    True
+    """
+    M = numpy.identity(4)
+    if perspective is not None:
+        P = numpy.identity(4)
+        P[3, :] = perspective[:4]
+        M = numpy.dot(M, P)
+    if translate is not None:
+        T = numpy.identity(4)
+        T[:3, 3] = translate[:3]
+        M = numpy.dot(M, T)
+    if angles is not None:
+        R = euler_matrix(angles[0], angles[1], angles[2], 'sxyz')
+        M = numpy.dot(M, R)
+    if shear is not None:
+        Z = numpy.identity(4)
+        Z[1, 2] = shear[2]
+        Z[0, 2] = shear[1]
+        Z[0, 1] = shear[0]
+        M = numpy.dot(M, Z)
+    if scale is not None:
+        S = numpy.identity(4)
+        S[0, 0] = scale[0]
+        S[1, 1] = scale[1]
+        S[2, 2] = scale[2]
+        M = numpy.dot(M, S)
+    M /= M[3, 3]
+    return M
+def orthogonalization_matrix(lengths, angles):
+    """Return orthogonalization matrix for crystallographic cell coordinates.
+    Angles are expected in degrees.
+    The de-orthogonalization matrix is the inverse.
+    >>> O = orthogonalization_matrix([10, 10, 10], [90, 90, 90])
+    >>> numpy.allclose(O[:3, :3], numpy.identity(3, float) * 10)
+    True
+    >>> O = orthogonalization_matrix([9.8, 12.0, 15.5], [87.2, 80.7, 69.7])
+    >>> numpy.allclose(numpy.sum(O), 43.063229)
+    True
+    """
+    a, b, c = lengths
+    angles = numpy.radians(angles)
+    sina, sinb, _ = numpy.sin(angles)
+    cosa, cosb, cosg = numpy.cos(angles)
+    co = (cosa * cosb - cosg) / (sina * sinb)
+    return numpy.array([
+        [a*sinb*math.sqrt(1.0-co*co),  0.0,    0.0, 0.0],
+        [-a*sinb*co,                    b*sina, 0.0, 0.0],
+        [a*cosb,                       b*cosa, c,   0.0],
+        [0.0,                          0.0,    0.0, 1.0]])
+def affine_matrix_from_points(v0, v1, shear=True, scale=True, usesvd=True):
+    """Return affine transform matrix to register two point sets.
+    v0 and v1 are shape (ndims, \*) arrays of at least ndims non-homogeneous
+    coordinates, where ndims is the dimensionality of the coordinate space.
+    If shear is False, a similarity transformation matrix is returned.
+    If also scale is False, a rigid/Euclidean transformation matrix
+    is returned.
+    By default the algorithm by Hartley and Zissermann [15] is used.
+    If usesvd is True, similarity and Euclidean transformation matrices
+    are calculated by minimizing the weighted sum of squared deviations
+    (RMSD) according to the algorithm by Kabsch [8].
+    Otherwise, and if ndims is 3, the quaternion based algorithm by Horn [9]
+    is used, which is slower when using this Python implementation.
+    The returned matrix performs rotation, translation and uniform scaling
+    (if specified).
+    >>> v0 = [[0, 1031, 1031, 0], [0, 0, 1600, 1600]]
+    >>> v1 = [[675, 826, 826, 677], [55, 52, 281, 277]]
+    >>> affine_matrix_from_points(v0, v1)
+    array([[   0.14549,    0.00062,  675.50008],
+           [   0.00048,    0.14094,   53.24971],
+           [   0.     ,    0.     ,    1.     ]])
+    >>> T = translation_matrix(numpy.random.random(3)-0.5)
+    >>> R = random_rotation_matrix(numpy.random.random(3))
+    >>> S = scale_matrix(random.random())
+    >>> M = concatenate_matrices(T, R, S)
+    >>> v0 = (numpy.random.rand(4, 100) - 0.5) * 20
+    >>> v0[3] = 1
+    >>> v1 = numpy.dot(M, v0)
+    >>> v0[:3] += numpy.random.normal(0, 1e-8, 300).reshape(3, -1)
+    >>> M = affine_matrix_from_points(v0[:3], v1[:3])
+    >>> numpy.allclose(v1, numpy.dot(M, v0))
+    True
+    More examples in superimposition_matrix()
+    """
+    v0 = numpy.array(v0, dtype=numpy.float64, copy=True)
+    v1 = numpy.array(v1, dtype=numpy.float64, copy=True)
+    ndims = v0.shape[0]
+    if ndims < 2 or v0.shape[1] < ndims or v0.shape != v1.shape:
+        raise ValueError('input arrays are of wrong shape or type')
+    # move centroids to origin
+    t0 = -numpy.mean(v0, axis=1)
+    M0 = numpy.identity(ndims+1)
+    M0[:ndims, ndims] = t0
+    v0 += t0.reshape(ndims, 1)
+    t1 = -numpy.mean(v1, axis=1)
+    M1 = numpy.identity(ndims+1)
+    M1[:ndims, ndims] = t1
+    v1 += t1.reshape(ndims, 1)
+    if shear:
+        # Affine transformation
+        A = numpy.concatenate((v0, v1), axis=0)
+        u, s, vh = numpy.linalg.svd(A.T)
+        vh = vh[:ndims].T
+        B = vh[:ndims]
+        C = vh[ndims:2*ndims]
+        t = numpy.dot(C, numpy.linalg.pinv(B))
+        t = numpy.concatenate((t, numpy.zeros((ndims, 1))), axis=1)
+        M = numpy.vstack((t, ((0.0,)*ndims) + (1.0,)))
+    elif usesvd or ndims != 3:
+        # Rigid transformation via SVD of covariance matrix
+        u, s, vh = numpy.linalg.svd(numpy.dot(v1, v0.T))
+        # rotation matrix from SVD orthonormal bases
+        R = numpy.dot(u, vh)
+        if numpy.linalg.det(R) < 0.0:
+            # R does not constitute right handed system
+            R -= numpy.outer(u[:, ndims-1], vh[ndims-1, :]*2.0)
+            s[-1] *= -1.0
+        # homogeneous transformation matrix
+        M = numpy.identity(ndims+1)
+        M[:ndims, :ndims] = R
+    else:
+        # Rigid transformation matrix via quaternion
+        # compute symmetric matrix N
+        xx, yy, zz = numpy.sum(v0 * v1, axis=1)
+        xy, yz, zx = numpy.sum(v0 * numpy.roll(v1, -1, axis=0), axis=1)
+        xz, yx, zy = numpy.sum(v0 * numpy.roll(v1, -2, axis=0), axis=1)
+        N = [[xx+yy+zz, 0.0,      0.0,      0.0],
+             [yz-zy,    xx-yy-zz, 0.0,      0.0],
+             [zx-xz,    xy+yx,    yy-xx-zz, 0.0],
+             [xy-yx,    zx+xz,    yz+zy,    zz-xx-yy]]
+        # quaternion: eigenvector corresponding to most positive eigenvalue
+        w, V = numpy.linalg.eigh(N)
+        q = V[:, numpy.argmax(w)]
+        q /= vector_norm(q)  # unit quaternion
+        # homogeneous transformation matrix
+        M = quaternion_matrix(q)
+    if scale and not shear:
+        # Affine transformation; scale is ratio of RMS deviations from centroid
+        v0 *= v0
+        v1 *= v1
+        M[:ndims, :ndims] *= math.sqrt(numpy.sum(v1) / numpy.sum(v0))
+    # move centroids back
+    M = numpy.dot(numpy.linalg.inv(M1), numpy.dot(M, M0))
+    M /= M[ndims, ndims]
+    return M
+def superimposition_matrix(v0, v1, scale=False, usesvd=True):
+    """Return matrix to transform given 3D point set into second point set.
+    v0 and v1 are shape (3, \*) or (4, \*) arrays of at least 3 points.
+    The parameters scale and usesvd are explained in the more general
+    affine_matrix_from_points function.
+    The returned matrix is a similarity or Euclidean transformation matrix.
+    This function has a fast C implementation in transformations.c.
+    >>> v0 = numpy.random.rand(3, 10)
+    >>> M = superimposition_matrix(v0, v0)
+    >>> numpy.allclose(M, numpy.identity(4))
+    True
+    >>> R = random_rotation_matrix(numpy.random.random(3))
+    >>> v0 = [[1,0,0], [0,1,0], [0,0,1], [1,1,1]]
+    >>> v1 = numpy.dot(R, v0)
+    >>> M = superimposition_matrix(v0, v1)
+    >>> numpy.allclose(v1, numpy.dot(M, v0))
+    True
+    >>> v0 = (numpy.random.rand(4, 100) - 0.5) * 20
+    >>> v0[3] = 1
+    >>> v1 = numpy.dot(R, v0)
+    >>> M = superimposition_matrix(v0, v1)
+    >>> numpy.allclose(v1, numpy.dot(M, v0))
+    True
+    >>> S = scale_matrix(random.random())
+    >>> T = translation_matrix(numpy.random.random(3)-0.5)
+    >>> M = concatenate_matrices(T, R, S)
+    >>> v1 = numpy.dot(M, v0)
+    >>> v0[:3] += numpy.random.normal(0, 1e-9, 300).reshape(3, -1)
+    >>> M = superimposition_matrix(v0, v1, scale=True)
+    >>> numpy.allclose(v1, numpy.dot(M, v0))
+    True
+    >>> M = superimposition_matrix(v0, v1, scale=True, usesvd=False)
+    >>> numpy.allclose(v1, numpy.dot(M, v0))
+    True
+    >>> v = numpy.empty((4, 100, 3))
+    >>> v[:, :, 0] = v0
+    >>> M = superimposition_matrix(v0, v1, scale=True, usesvd=False)
+    >>> numpy.allclose(v1, numpy.dot(M, v[:, :, 0]))
+    True
+    """
+    v0 = numpy.array(v0, dtype=numpy.float64, copy=False)[:3]
+    v1 = numpy.array(v1, dtype=numpy.float64, copy=False)[:3]
+    return affine_matrix_from_points(v0, v1, shear=False,
+                                     scale=scale, usesvd=usesvd)
+def euler_matrix(ai, aj, ak, axes='sxyz'):
+    """Return homogeneous rotation matrix from Euler angles and axis sequence.
+    ai, aj, ak : Euler's roll, pitch and yaw angles
+    axes : One of 24 axis sequences as string or encoded tuple
+    >>> R = euler_matrix(1, 2, 3, 'syxz')
+    >>> numpy.allclose(numpy.sum(R[0]), -1.34786452)
+    True
+    >>> R = euler_matrix(1, 2, 3, (0, 1, 0, 1))
+    >>> numpy.allclose(numpy.sum(R[0]), -0.383436184)
+    True
+    >>> ai, aj, ak = (4*math.pi) * (numpy.random.random(3) - 0.5)
+    >>> for axes in _AXES2TUPLE.keys():
+    ...    R = euler_matrix(ai, aj, ak, axes)
+    >>> for axes in _TUPLE2AXES.keys():
+    ...    R = euler_matrix(ai, aj, ak, axes)
+    """
+    try:
+        firstaxis, parity, repetition, frame = _AXES2TUPLE[axes]
+    except (AttributeError, KeyError):
+        _TUPLE2AXES[axes]  # noqa: validation
+        firstaxis, parity, repetition, frame = axes
+    i = firstaxis
+    j = _NEXT_AXIS[i+parity]
+    k = _NEXT_AXIS[i-parity+1]
+    if frame:
+        ai, ak = ak, ai
+    if parity:
+        ai, aj, ak = -ai, -aj, -ak
+    si, sj, sk = math.sin(ai), math.sin(aj), math.sin(ak)
+    ci, cj, ck = math.cos(ai), math.cos(aj), math.cos(ak)
+    cc, cs = ci*ck, ci*sk
+    sc, ss = si*ck, si*sk
+    M = numpy.identity(4)
+    if repetition:
+        M[i, i] = cj
+        M[i, j] = sj*si
+        M[i, k] = sj*ci
+        M[j, i] = sj*sk
+        M[j, j] = -cj*ss+cc
+        M[j, k] = -cj*cs-sc
+        M[k, i] = -sj*ck
+        M[k, j] = cj*sc+cs
+        M[k, k] = cj*cc-ss
+    else:
+        M[i, i] = cj*ck
+        M[i, j] = sj*sc-cs
+        M[i, k] = sj*cc+ss
+        M[j, i] = cj*sk
+        M[j, j] = sj*ss+cc
+        M[j, k] = sj*cs-sc
+        M[k, i] = -sj
+        M[k, j] = cj*si
+        M[k, k] = cj*ci
+    return M
+def euler_from_matrix(matrix, axes='sxyz'):
+    """Return Euler angles from rotation matrix for specified axis sequence.
+    axes : One of 24 axis sequences as string or encoded tuple
+    Note that many Euler angle triplets can describe one matrix.
+    >>> R0 = euler_matrix(1, 2, 3, 'syxz')
+    >>> al, be, ga = euler_from_matrix(R0, 'syxz')
+    >>> R1 = euler_matrix(al, be, ga, 'syxz')
+    >>> numpy.allclose(R0, R1)
+    True
+    >>> angles = (4*math.pi) * (numpy.random.random(3) - 0.5)
+    >>> for axes in _AXES2TUPLE.keys():
+    ...    R0 = euler_matrix(axes=axes, *angles)
+    ...    R1 = euler_matrix(axes=axes, *euler_from_matrix(R0, axes))
+    ...    if not numpy.allclose(R0, R1): print(axes, "failed")
+    """
+    try:
+        firstaxis, parity, repetition, frame = _AXES2TUPLE[axes.lower()]
+    except (AttributeError, KeyError):
+        _TUPLE2AXES[axes]  # noqa: validation
+        firstaxis, parity, repetition, frame = axes
+    i = firstaxis
+    j = _NEXT_AXIS[i+parity]
+    k = _NEXT_AXIS[i-parity+1]
+    M = numpy.array(matrix, dtype=numpy.float64, copy=False)[:3, :3]
+    if repetition:
+        sy = math.sqrt(M[i, j]*M[i, j] + M[i, k]*M[i, k])
+        if sy > _EPS:
+            ax = math.atan2(M[i, j],  M[i, k])
+            ay = math.atan2(sy,       M[i, i])
+            az = math.atan2(M[j, i], -M[k, i])
+        else:
+            ax = math.atan2(-M[j, k],  M[j, j])
+            ay = math.atan2(sy,       M[i, i])
+            az = 0.0
+    else:
+        cy = math.sqrt(M[i, i]*M[i, i] + M[j, i]*M[j, i])
+        if cy > _EPS:
+            ax = math.atan2(M[k, j],  M[k, k])
+            ay = math.atan2(-M[k, i],  cy)
+            az = math.atan2(M[j, i],  M[i, i])
+        else:
+            ax = math.atan2(-M[j, k],  M[j, j])
+            ay = math.atan2(-M[k, i],  cy)
+            az = 0.0
+    if parity:
+        ax, ay, az = -ax, -ay, -az
+    if frame:
+        ax, az = az, ax
+    return ax, ay, az
+def euler_from_quaternion(quaternion, axes='sxyz'):
+    """Return Euler angles from quaternion for specified axis sequence.
+    >>> angles = euler_from_quaternion([0.99810947, 0.06146124, 0, 0])
+    >>> numpy.allclose(angles, [0.123, 0, 0])
+    True
+    """
+    return euler_from_matrix(quaternion_matrix(quaternion), axes)
+def quaternion_from_euler(ai, aj, ak, axes='sxyz'):
+    """Return quaternion from Euler angles and axis sequence.
+    ai, aj, ak : Euler's roll, pitch and yaw angles
+    axes : One of 24 axis sequences as string or encoded tuple
+    >>> q = quaternion_from_euler(1, 2, 3, 'ryxz')
+    >>> numpy.allclose(q, [0.435953, 0.310622, -0.718287, 0.444435])
+    True
+    """
+    try:
+        firstaxis, parity, repetition, frame = _AXES2TUPLE[axes.lower()]
+    except (AttributeError, KeyError):
+        _TUPLE2AXES[axes]  # noqa: validation
+        firstaxis, parity, repetition, frame = axes
+    i = firstaxis + 1
+    j = _NEXT_AXIS[i+parity-1] + 1
+    k = _NEXT_AXIS[i-parity] + 1
+    if frame:
+        ai, ak = ak, ai
+    if parity:
+        aj = -aj
+    ai /= 2.0
+    aj /= 2.0
+    ak /= 2.0
+    ci = math.cos(ai)
+    si = math.sin(ai)
+    cj = math.cos(aj)
+    sj = math.sin(aj)
+    ck = math.cos(ak)
+    sk = math.sin(ak)
+    cc = ci*ck
+    cs = ci*sk
+    sc = si*ck
+    ss = si*sk
+    q = numpy.empty((4, ))
+    if repetition:
+        q[0] = cj*(cc - ss)
+        q[i] = cj*(cs + sc)
+        q[j] = sj*(cc + ss)
+        q[k] = sj*(cs - sc)
+    else:
+        q[0] = cj*cc + sj*ss
+        q[i] = cj*sc - sj*cs
+        q[j] = cj*ss + sj*cc
+        q[k] = cj*cs - sj*sc
+    if parity:
+        q[j] *= -1.0
+    return q
+def quaternion_about_axis(angle, axis):
+    """Return quaternion for rotation about axis.
+    >>> q = quaternion_about_axis(0.123, [1, 0, 0])
+    >>> numpy.allclose(q, [0.99810947, 0.06146124, 0, 0])
+    True
+    """
+    q = numpy.array([0.0, axis[0], axis[1], axis[2]])
+    qlen = vector_norm(q)
+    if qlen > _EPS:
+        q *= math.sin(angle/2.0) / qlen
+    q[0] = math.cos(angle/2.0)
+    return q
+def quaternion_matrix(quaternion):
+    """Return homogeneous rotation matrix from quaternion.
+    >>> M = quaternion_matrix([0.99810947, 0.06146124, 0, 0])
+    >>> numpy.allclose(M, rotation_matrix(0.123, [1, 0, 0]))
+    True
+    >>> M = quaternion_matrix([1, 0, 0, 0])
+    >>> numpy.allclose(M, numpy.identity(4))
+    True
+    >>> M = quaternion_matrix([0, 1, 0, 0])
+    >>> numpy.allclose(M, numpy.diag([1, -1, -1, 1]))
+    True
+    """
+    q = numpy.array(quaternion, dtype=numpy.float64, copy=True)
+    n = numpy.dot(q, q)
+    if n < _EPS:
+        return numpy.identity(4)
+    q *= math.sqrt(2.0 / n)
+    q = numpy.outer(q, q)
+    return numpy.array([
+        [1.0-q[2, 2]-q[3, 3],     q[1, 2]-q[3, 0],     q[1, 3]+q[2, 0], 0.0],
+        [q[1, 2]+q[3, 0], 1.0-q[1, 1]-q[3, 3],     q[2, 3]-q[1, 0], 0.0],
+        [q[1, 3]-q[2, 0],     q[2, 3]+q[1, 0], 1.0-q[1, 1]-q[2, 2], 0.0],
+        [0.0,                 0.0,                 0.0, 1.0]])
+def quaternion_from_matrix(matrix, isprecise=False):
+    """Return quaternion from rotation matrix.
+    If isprecise is True, the input matrix is assumed to be a precise rotation
+    matrix and a faster algorithm is used.
+    >>> q = quaternion_from_matrix(numpy.identity(4), True)
+    >>> numpy.allclose(q, [1, 0, 0, 0])
+    True
+    >>> q = quaternion_from_matrix(numpy.diag([1, -1, -1, 1]))
+    >>> numpy.allclose(q, [0, 1, 0, 0]) or numpy.allclose(q, [0, -1, 0, 0])
+    True
+    >>> R = rotation_matrix(0.123, (1, 2, 3))
+    >>> q = quaternion_from_matrix(R, True)
+    >>> numpy.allclose(q, [0.9981095, 0.0164262, 0.0328524, 0.0492786])
+    True
+    >>> R = [[-0.545, 0.797, 0.260, 0], [0.733, 0.603, -0.313, 0],
+    ...      [-0.407, 0.021, -0.913, 0], [0, 0, 0, 1]]
+    >>> q = quaternion_from_matrix(R)
+    >>> numpy.allclose(q, [0.19069, 0.43736, 0.87485, -0.083611])
+    True
+    >>> R = [[0.395, 0.362, 0.843, 0], [-0.626, 0.796, -0.056, 0],
+    ...      [-0.677, -0.498, 0.529, 0], [0, 0, 0, 1]]
+    >>> q = quaternion_from_matrix(R)
+    >>> numpy.allclose(q, [0.82336615, -0.13610694, 0.46344705, -0.29792603])
+    True
+    >>> R = random_rotation_matrix()
+    >>> q = quaternion_from_matrix(R)
+    >>> is_same_transform(R, quaternion_matrix(q))
+    True
+    >>> is_same_quaternion(quaternion_from_matrix(R, isprecise=False),
+    ...                    quaternion_from_matrix(R, isprecise=True))
+    True
+    >>> R = euler_matrix(0.0, 0.0, numpy.pi/2.0)
+    >>> is_same_quaternion(quaternion_from_matrix(R, isprecise=False),
+    ...                    quaternion_from_matrix(R, isprecise=True))
+    True
+    """
+    M = numpy.array(matrix, dtype=numpy.float64, copy=False)[:4, :4]
+    if isprecise:
+        q = numpy.empty((4, ))
+        t = numpy.trace(M)
+        if t > M[3, 3]:
+            q[0] = t
+            q[3] = M[1, 0] - M[0, 1]
+            q[2] = M[0, 2] - M[2, 0]
+            q[1] = M[2, 1] - M[1, 2]
+        else:
+            i, j, k = 0, 1, 2
+            if M[1, 1] > M[0, 0]:
+                i, j, k = 1, 2, 0
+            if M[2, 2] > M[i, i]:
+                i, j, k = 2, 0, 1
+            t = M[i, i] - (M[j, j] + M[k, k]) + M[3, 3]
+            q[i] = t
+            q[j] = M[i, j] + M[j, i]
+            q[k] = M[k, i] + M[i, k]
+            q[3] = M[k, j] - M[j, k]
+            q = q[[3, 0, 1, 2]]
+        q *= 0.5 / math.sqrt(t * M[3, 3])
+    else:
+        m00 = M[0, 0]
+        m01 = M[0, 1]
+        m02 = M[0, 2]
+        m10 = M[1, 0]
+        m11 = M[1, 1]
+        m12 = M[1, 2]
+        m20 = M[2, 0]
+        m21 = M[2, 1]
+        m22 = M[2, 2]
+        # symmetric matrix K
+        K = numpy.array([[m00-m11-m22, 0.0,         0.0,         0.0],
+                         [m01+m10,     m11-m00-m22, 0.0,         0.0],
+                         [m02+m20,     m12+m21,     m22-m00-m11, 0.0],
+                         [m21-m12,     m02-m20,     m10-m01,     m00+m11+m22]])
+        K /= 3.0
+        # quaternion is eigenvector of K that corresponds to largest eigenvalue
+        w, V = numpy.linalg.eigh(K)
+        q = V[[3, 0, 1, 2], numpy.argmax(w)]
+    if q[0] < 0.0:
+        numpy.negative(q, q)
+    return q
+def quaternion_multiply(quaternion1, quaternion0):
+    """Return multiplication of two quaternions.
+    >>> q = quaternion_multiply([4, 1, -2, 3], [8, -5, 6, 7])
+    >>> numpy.allclose(q, [28, -44, -14, 48])
+    True
+    """
+    w0, x0, y0, z0 = quaternion0
+    w1, x1, y1, z1 = quaternion1
+    return numpy.array([
+        -x1*x0 - y1*y0 - z1*z0 + w1*w0,
+        x1*w0 + y1*z0 - z1*y0 + w1*x0,
+        -x1*z0 + y1*w0 + z1*x0 + w1*y0,
+        x1*y0 - y1*x0 + z1*w0 + w1*z0], dtype=numpy.float64)
+def quaternion_conjugate(quaternion):
+    """Return conjugate of quaternion.
+    >>> q0 = random_quaternion()
+    >>> q1 = quaternion_conjugate(q0)
+    >>> q1[0] == q0[0] and all(q1[1:] == -q0[1:])
+    True
+    """
+    q = numpy.array(quaternion, dtype=numpy.float64, copy=True)
+    numpy.negative(q[1:], q[1:])
+    return q
+def quaternion_inverse(quaternion):
+    """Return inverse of quaternion.
+    >>> q0 = random_quaternion()
+    >>> q1 = quaternion_inverse(q0)
+    >>> numpy.allclose(quaternion_multiply(q0, q1), [1, 0, 0, 0])
+    True
+    """
+    q = numpy.array(quaternion, dtype=numpy.float64, copy=True)
+    numpy.negative(q[1:], q[1:])
+    return q / numpy.dot(q, q)
+def quaternion_real(quaternion):
+    """Return real part of quaternion.
+    >>> quaternion_real([3, 0, 1, 2])
+    3.0
+    """
+    return float(quaternion[0])
+def quaternion_imag(quaternion):
+    """Return imaginary part of quaternion.
+    >>> quaternion_imag([3, 0, 1, 2])
+    array([ 0.,  1.,  2.])
+    """
+    return numpy.array(quaternion[1:4], dtype=numpy.float64, copy=True)
+def quaternion_slerp(quat0, quat1, fraction, spin=0, shortestpath=True):
+    """Return spherical linear interpolation between two quaternions.
+    >>> q0 = random_quaternion()
+    >>> q1 = random_quaternion()
+    >>> q = quaternion_slerp(q0, q1, 0)
+    >>> numpy.allclose(q, q0)
+    True
+    >>> q = quaternion_slerp(q0, q1, 1, 1)
+    >>> numpy.allclose(q, q1)
+    True
+    >>> q = quaternion_slerp(q0, q1, 0.5)
+    >>> angle = math.acos(numpy.dot(q0, q))
+    >>> numpy.allclose(2, math.acos(numpy.dot(q0, q1)) / angle) or \
+        numpy.allclose(2, math.acos(-numpy.dot(q0, q1)) / angle)
+    True
+    """
+    q0 = unit_vector(quat0[:4])
+    q1 = unit_vector(quat1[:4])
+    if fraction == 0.0:
+        return q0
+    elif fraction == 1.0:
+        return q1
+    d = numpy.dot(q0, q1)
+    if abs(abs(d) - 1.0) < _EPS:
+        return q0
+    if shortestpath and d < 0.0:
+        # invert rotation
+        d = -d
+        numpy.negative(q1, q1)
+    angle = math.acos(d) + spin * math.pi
+    if abs(angle) < _EPS:
+        return q0
+    isin = 1.0 / math.sin(angle)
+    q0 *= math.sin((1.0 - fraction) * angle) * isin
+    q1 *= math.sin(fraction * angle) * isin
+    q0 += q1
+    return q0
+def random_quaternion(rand=None):
+    """Return uniform random unit quaternion.
+    rand: array like or None
+        Three independent random variables that are uniformly distributed
+        between 0 and 1.
+    >>> q = random_quaternion()
+    >>> numpy.allclose(1, vector_norm(q))
+    True
+    >>> q = random_quaternion(numpy.random.random(3))
+    >>> len(q.shape), q.shape[0]==4
+    (1, True)
+    """
+    if rand is None:
+        rand = numpy.random.rand(3)
+    else:
+        assert len(rand) == 3
+    r1 = numpy.sqrt(1.0 - rand[0])
+    r2 = numpy.sqrt(rand[0])
+    pi2 = math.pi * 2.0
+    t1 = pi2 * rand[1]
+    t2 = pi2 * rand[2]
+    return numpy.array([numpy.cos(t2)*r2, numpy.sin(t1)*r1,
+                        numpy.cos(t1)*r1, numpy.sin(t2)*r2])
+def random_rotation_matrix(rand=None):
+    """Return uniform random rotation matrix.
+    rand: array like
+        Three independent random variables that are uniformly distributed
+        between 0 and 1 for each returned quaternion.
+    >>> R = random_rotation_matrix()
+    >>> numpy.allclose(numpy.dot(R.T, R), numpy.identity(4))
+    True
+    """
+    return quaternion_matrix(random_quaternion(rand))
+class Arcball(object):
+    """Virtual Trackball Control.
+    >>> ball = Arcball()
+    >>> ball = Arcball(initial=numpy.identity(4))
+    >>> ball.place([320, 320], 320)
+    >>> ball.down([500, 250])
+    >>> ball.drag([475, 275])
+    >>> R = ball.matrix()
+    >>> numpy.allclose(numpy.sum(R), 3.90583455)
+    True
+    >>> ball = Arcball(initial=[1, 0, 0, 0])
+    >>> ball.place([320, 320], 320)
+    >>> ball.setaxes([1, 1, 0], [-1, 1, 0])
+    >>> ball.constrain = True
+    >>> ball.down([400, 200])
+    >>> ball.drag([200, 400])
+    >>> R = ball.matrix()
+    >>> numpy.allclose(numpy.sum(R), 0.2055924)
+    True
+    >>> ball.next()
+    """
+    def __init__(self, initial=None):
+        """Initialize virtual trackball control.
+        initial : quaternion or rotation matrix
+        """
+        self._axis = None
+        self._axes = None
+        self._radius = 1.0
+        self._center = [0.0, 0.0]
+        self._vdown = numpy.array([0.0, 0.0, 1.0])
+        self._constrain = False
+        if initial is None:
+            self._qdown = numpy.array([1.0, 0.0, 0.0, 0.0])
+        else:
+            initial = numpy.array(initial, dtype=numpy.float64)
+            if initial.shape == (4, 4):
+                self._qdown = quaternion_from_matrix(initial)
+            elif initial.shape == (4, ):
+                initial /= vector_norm(initial)
+                self._qdown = initial
+            else:
+                raise ValueError("initial not a quaternion or matrix")
+        self._qnow = self._qpre = self._qdown
+    def place(self, center, radius):
+        """Place Arcball, e.g. when window size changes.
+        center : sequence[2]
+            Window coordinates of trackball center.
+        radius : float
+            Radius of trackball in window coordinates.
+        """
+        self._radius = float(radius)
+        self._center[0] = center[0]
+        self._center[1] = center[1]
+    def setaxes(self, *axes):
+        """Set axes to constrain rotations."""
+        if axes is None:
+            self._axes = None
+        else:
+            self._axes = [unit_vector(axis) for axis in axes]
+    @property
+    def constrain(self):
+        """Return state of constrain to axis mode."""
+        return self._constrain
+    @constrain.setter
+    def constrain(self, value):
+        """Set state of constrain to axis mode."""
+        self._constrain = bool(value)
+    def down(self, point):
+        """Set initial cursor window coordinates and pick constrain-axis."""
+        self._vdown = arcball_map_to_sphere(point, self._center, self._radius)
+        self._qdown = self._qpre = self._qnow
+        if self._constrain and self._axes is not None:
+            self._axis = arcball_nearest_axis(self._vdown, self._axes)
+            self._vdown = arcball_constrain_to_axis(self._vdown, self._axis)
+        else:
+            self._axis = None
+    def drag(self, point):
+        """Update current cursor window coordinates."""
+        vnow = arcball_map_to_sphere(point, self._center, self._radius)
+        if self._axis is not None:
+            vnow = arcball_constrain_to_axis(vnow, self._axis)
+        self._qpre = self._qnow
+        t = numpy.cross(self._vdown, vnow)
+        if numpy.dot(t, t) < _EPS:
+            self._qnow = self._qdown
+        else:
+            q = [numpy.dot(self._vdown, vnow), t[0], t[1], t[2]]
+            self._qnow = quaternion_multiply(q, self._qdown)
+    def next(self, acceleration=0.0):
+        """Continue rotation in direction of last drag."""
+        q = quaternion_slerp(self._qpre, self._qnow, 2.0+acceleration, False)
+        self._qpre, self._qnow = self._qnow, q
+    def matrix(self):
+        """Return homogeneous rotation matrix."""
+        return quaternion_matrix(self._qnow)
+def arcball_map_to_sphere(point, center, radius):
+    """Return unit sphere coordinates from window coordinates."""
+    v0 = (point[0] - center[0]) / radius
+    v1 = (center[1] - point[1]) / radius
+    n = v0*v0 + v1*v1
+    if n > 1.0:
+        # position outside of sphere
+        n = math.sqrt(n)
+        return numpy.array([v0/n, v1/n, 0.0])
+    else:
+        return numpy.array([v0, v1, math.sqrt(1.0 - n)])
+def arcball_constrain_to_axis(point, axis):
+    """Return sphere point perpendicular to axis."""
+    v = numpy.array(point, dtype=numpy.float64, copy=True)
+    a = numpy.array(axis, dtype=numpy.float64, copy=True)
+    v -= a * numpy.dot(a, v)  # on plane
+    n = vector_norm(v)
+    if n > _EPS:
+        if v[2] < 0.0:
+            numpy.negative(v, v)
+        v /= n
+        return v
+    if a[2] == 1.0:
+        return numpy.array([1.0, 0.0, 0.0])
+    return unit_vector([-a[1], a[0], 0.0])
+def arcball_nearest_axis(point, axes):
+    """Return axis, which arc is nearest to point."""
+    point = numpy.array(point, dtype=numpy.float64, copy=False)
+    nearest = None
+    mx = -1.0
+    for axis in axes:
+        t = numpy.dot(arcball_constrain_to_axis(point, axis), point)
+        if t > mx:
+            nearest = axis
+            mx = t
+    return nearest
+# epsilon for testing whether a number is close to zero
+_EPS = numpy.finfo(float).eps * 4.0
+# axis sequences for Euler angles
+_NEXT_AXIS = [1, 2, 0, 1]
+# map axes strings to/from tuples of inner axis, parity, repetition, frame
+_AXES2TUPLE = {
+    'sxyz': (0, 0, 0, 0), 'sxyx': (0, 0, 1, 0), 'sxzy': (0, 1, 0, 0),
+    'sxzx': (0, 1, 1, 0), 'syzx': (1, 0, 0, 0), 'syzy': (1, 0, 1, 0),
+    'syxz': (1, 1, 0, 0), 'syxy': (1, 1, 1, 0), 'szxy': (2, 0, 0, 0),
+    'szxz': (2, 0, 1, 0), 'szyx': (2, 1, 0, 0), 'szyz': (2, 1, 1, 0),
+    'rzyx': (0, 0, 0, 1), 'rxyx': (0, 0, 1, 1), 'ryzx': (0, 1, 0, 1),
+    'rxzx': (0, 1, 1, 1), 'rxzy': (1, 0, 0, 1), 'ryzy': (1, 0, 1, 1),
+    'rzxy': (1, 1, 0, 1), 'ryxy': (1, 1, 1, 1), 'ryxz': (2, 0, 0, 1),
+    'rzxz': (2, 0, 1, 1), 'rxyz': (2, 1, 0, 1), 'rzyz': (2, 1, 1, 1)}
+_TUPLE2AXES = dict((v, k) for k, v in _AXES2TUPLE.items())
+def vector_norm(data, axis=None, out=None):
+    """Return length, i.e. Euclidean norm, of ndarray along axis.
+    >>> v = numpy.random.random(3)
+    >>> n = vector_norm(v)
+    >>> numpy.allclose(n, numpy.linalg.norm(v))
+    True
+    >>> v = numpy.random.rand(6, 5, 3)
+    >>> n = vector_norm(v, axis=-1)
+    >>> numpy.allclose(n, numpy.sqrt(numpy.sum(v*v, axis=2)))
+    True
+    >>> n = vector_norm(v, axis=1)
+    >>> numpy.allclose(n, numpy.sqrt(numpy.sum(v*v, axis=1)))
+    True
+    >>> v = numpy.random.rand(5, 4, 3)
+    >>> n = numpy.empty((5, 3))
+    >>> vector_norm(v, axis=1, out=n)
+    >>> numpy.allclose(n, numpy.sqrt(numpy.sum(v*v, axis=1)))
+    True
+    >>> vector_norm([])
+    0.0
+    >>> vector_norm([1])
+    1.0
+    """
+    data = numpy.array(data, dtype=numpy.float64, copy=True)
+    if out is None:
+        if data.ndim == 1:
+            return math.sqrt(numpy.dot(data, data))
+        data *= data
+        out = numpy.atleast_1d(numpy.sum(data, axis=axis))
+        numpy.sqrt(out, out)
+        return out
+    else:
+        data *= data
+        numpy.sum(data, axis=axis, out=out)
+        numpy.sqrt(out, out)
+def unit_vector(data, axis=None, out=None):
+    """Return ndarray normalized by length, i.e. Euclidean norm, along axis.
+    >>> v0 = numpy.random.random(3)
+    >>> v1 = unit_vector(v0)
+    >>> numpy.allclose(v1, v0 / numpy.linalg.norm(v0))
+    True
+    >>> v0 = numpy.random.rand(5, 4, 3)
+    >>> v1 = unit_vector(v0, axis=-1)
+    >>> v2 = v0 / numpy.expand_dims(numpy.sqrt(numpy.sum(v0*v0, axis=2)), 2)
+    >>> numpy.allclose(v1, v2)
+    True
+    >>> v1 = unit_vector(v0, axis=1)
+    >>> v2 = v0 / numpy.expand_dims(numpy.sqrt(numpy.sum(v0*v0, axis=1)), 1)
+    >>> numpy.allclose(v1, v2)
+    True
+    >>> v1 = numpy.empty((5, 4, 3))
+    >>> unit_vector(v0, axis=1, out=v1)
+    >>> numpy.allclose(v1, v2)
+    True
+    >>> list(unit_vector([]))
+    []
+    >>> list(unit_vector([1]))
+    [1.0]
+    """
+    if out is None:
+        data = numpy.array(data, dtype=numpy.float64, copy=True)
+        if data.ndim == 1:
+            data /= math.sqrt(numpy.dot(data, data))
+            return data
+    else:
+        if out is not data:
+            out[:] = numpy.array(data, copy=False)
+        data = out
+    length = numpy.atleast_1d(numpy.sum(data*data, axis))
+    numpy.sqrt(length, length)
+    if axis is not None:
+        length = numpy.expand_dims(length, axis)
+    data /= length
+    if out is None:
+        return data
+def random_vector(size):
+    """Return array of random doubles in the half-open interval [0.0, 1.0).
+    >>> v = random_vector(10000)
+    >>> numpy.all(v >= 0) and numpy.all(v < 1)
+    True
+    >>> v0 = random_vector(10)
+    >>> v1 = random_vector(10)
+    >>> numpy.any(v0 == v1)
+    False
+    """
+    return numpy.random.random(size)
+def vector_product(v0, v1, axis=0):
+    """Return vector perpendicular to vectors.
+    >>> v = vector_product([2, 0, 0], [0, 3, 0])
+    >>> numpy.allclose(v, [0, 0, 6])
+    True
+    >>> v0 = [[2, 0, 0, 2], [0, 2, 0, 2], [0, 0, 2, 2]]
+    >>> v1 = [[3], [0], [0]]
+    >>> v = vector_product(v0, v1)
+    >>> numpy.allclose(v, [[0, 0, 0, 0], [0, 0, 6, 6], [0, -6, 0, -6]])
+    True
+    >>> v0 = [[2, 0, 0], [2, 0, 0], [0, 2, 0], [2, 0, 0]]
+    >>> v1 = [[0, 3, 0], [0, 0, 3], [0, 0, 3], [3, 3, 3]]
+    >>> v = vector_product(v0, v1, axis=1)
+    >>> numpy.allclose(v, [[0, 0, 6], [0, -6, 0], [6, 0, 0], [0, -6, 6]])
+    True
+    """
+    return numpy.cross(v0, v1, axis=axis)
+def angle_between_vectors(v0, v1, directed=True, axis=0):
+    """Return angle between vectors.
+    If directed is False, the input vectors are interpreted as undirected axes,
+    i.e. the maximum angle is pi/2.
+    >>> a = angle_between_vectors([1, -2, 3], [-1, 2, -3])
+    >>> numpy.allclose(a, math.pi)
+    True
+    >>> a = angle_between_vectors([1, -2, 3], [-1, 2, -3], directed=False)
+    >>> numpy.allclose(a, 0)
+    True
+    >>> v0 = [[2, 0, 0, 2], [0, 2, 0, 2], [0, 0, 2, 2]]
+    >>> v1 = [[3], [0], [0]]
+    >>> a = angle_between_vectors(v0, v1)
+    >>> numpy.allclose(a, [0, 1.5708, 1.5708, 0.95532])
+    True
+    >>> v0 = [[2, 0, 0], [2, 0, 0], [0, 2, 0], [2, 0, 0]]
+    >>> v1 = [[0, 3, 0], [0, 0, 3], [0, 0, 3], [3, 3, 3]]
+    >>> a = angle_between_vectors(v0, v1, axis=1)
+    >>> numpy.allclose(a, [1.5708, 1.5708, 1.5708, 0.95532])
+    True
+    """
+    v0 = numpy.array(v0, dtype=numpy.float64, copy=False)
+    v1 = numpy.array(v1, dtype=numpy.float64, copy=False)
+    dot = numpy.sum(v0 * v1, axis=axis)
+    dot /= vector_norm(v0, axis=axis) * vector_norm(v1, axis=axis)
+    dot = numpy.clip(dot, -1.0, 1.0)
+    return numpy.arccos(dot if directed else numpy.fabs(dot))
+def inverse_matrix(matrix):
+    """Return inverse of square transformation matrix.
+    >>> M0 = random_rotation_matrix()
+    >>> M1 = inverse_matrix(M0.T)
+    >>> numpy.allclose(M1, numpy.linalg.inv(M0.T))
+    True
+    >>> for size in range(1, 7):
+    ...     M0 = numpy.random.rand(size, size)
+    ...     M1 = inverse_matrix(M0)
+    ...     if not numpy.allclose(M1, numpy.linalg.inv(M0)): print(size)
+    """
+    return numpy.linalg.inv(matrix)
+def concatenate_matrices(*matrices):
+    """Return concatenation of series of transformation matrices.
+    >>> M = numpy.random.rand(16).reshape((4, 4)) - 0.5
+    >>> numpy.allclose(M, concatenate_matrices(M))
+    True
+    >>> numpy.allclose(numpy.dot(M, M.T), concatenate_matrices(M, M.T))
+    True
+    """
+    M = numpy.identity(4)
+    for i in matrices:
+        M = numpy.dot(M, i)
+    return M
+def is_same_transform(matrix0, matrix1):
+    """Return True if two matrices perform same transformation.
+    >>> is_same_transform(numpy.identity(4), numpy.identity(4))
+    True
+    >>> is_same_transform(numpy.identity(4), random_rotation_matrix())
+    False
+    """
+    matrix0 = numpy.array(matrix0, dtype=numpy.float64, copy=True)
+    matrix0 /= matrix0[3, 3]
+    matrix1 = numpy.array(matrix1, dtype=numpy.float64, copy=True)
+    matrix1 /= matrix1[3, 3]
+    return numpy.allclose(matrix0, matrix1)
+def is_same_quaternion(q0, q1):
+    """Return True if two quaternions are equal."""
+    q0 = numpy.array(q0)
+    q1 = numpy.array(q1)
+    return numpy.allclose(q0, q1) or numpy.allclose(q0, -q1)
+def _import_module(name, package=None, warn=True, postfix='_py', ignore='_'):
+    """Try import all public attributes from module into global namespace.
+    Existing attributes with name clashes are renamed with prefix.
+    Attributes starting with underscore are ignored by default.
+    Return True on successful import.
+    """
+    import warnings
+    from importlib import import_module
+    try:
+        if not package:
+            module = import_module(name)
+        else:
+            module = import_module('.' + name, package=package)
+    except ImportError as err:
+        if warn:
+            warnings.warn(str(err))
+    else:
+        for attr in dir(module):
+            if ignore and attr.startswith(ignore):
+                continue
+            if postfix:
+                if attr in globals():
+                    globals()[attr + postfix] = globals()[attr]
+                elif warn:
+                    warnings.warn('no Python implementation of ' + attr)
+            globals()[attr] = getattr(module, attr)
+        return True
+_import_module('_transformations', __package__, warn=False)
+if __name__ == '__main__':
+    import doctest
+    import random  # noqa: used in doctests
+    try:
+        numpy.set_printoptions(suppress=True, precision=5, legacy='1.13')
+    except TypeError:
+        numpy.set_printoptions(suppress=True, precision=5)
+    doctest.testmod()

third_party/COTR/COTR/utils/constants.py ADDED Viewed

	@@ -0,0 +1,3 @@

+DEFAULT_PRECISION = 'float32'
+MAX_SIZE = 256
+VALID_NN_OVERLAPPING_THRESH = 0.1

third_party/COTR/COTR/utils/debug_utils.py ADDED Viewed

	@@ -0,0 +1,15 @@

+def embed_breakpoint(debug_info='', terminate=True):
+    print('\nyou are inside a break point')
+    if debug_info:
+        print('debug info: {0}'.format(debug_info))
+    print('')
+    embedding = ('import IPython\n'
+                 'import matplotlib.pyplot as plt\n'
+                 'IPython.embed()\n'
+                 )
+    if terminate:
+        embedding += (
+            'assert 0, \'force termination\'\n'
+        )
+    return embedding

third_party/COTR/COTR/utils/utils.py ADDED Viewed

	@@ -0,0 +1,271 @@

+import random
+import smtplib
+import ssl
+from collections import namedtuple
+from COTR.utils import debug_utils
+import numpy as np
+import torch
+import cv2
+import matplotlib.pyplot as plt
+import PIL
+'''
+ImagePatch: patch: patch content, np array or None
+            x: left bound in original resolution
+            y: upper bound in original resolution
+            w: width of patch
+            h: height of patch
+            ow: width of original resolution
+            oh: height of original resolution
+'''
+ImagePatch = namedtuple('ImagePatch', ['patch', 'x', 'y', 'w', 'h', 'ow', 'oh'])
+Point3D = namedtuple("Point3D", ["id", "arr_idx", "image_ids"])
+Point2D = namedtuple("Point2D", ["id_3d", "xy"])
+class CropCamConfig():
+    def __init__(self, x, y, w, h, out_w, out_h, orig_w, orig_h):
+        '''
+        xy: left upper corner
+        '''
+        # assert x > 0 and x < orig_w
+        # assert y > 0 and y < orig_h
+        # assert w < orig_w and h < orig_h
+        # assert x - w / 2 > 0 and x + w / 2 < orig_w
+        # assert y - h / 2 > 0 and y + h / 2 < orig_h
+        # assert h / w == out_h / out_w
+        self.x = x
+        self.y = y
+        self.w = w
+        self.h = h
+        self.out_w = out_w
+        self.out_h = out_h
+        self.orig_w = orig_w
+        self.orig_h = orig_h
+    def __str__(self):
+        out = f'original image size(h,w): [{self.orig_h}, {self.orig_w}]\n'
+        out += f'crop at(x,y):             [{self.x}, {self.y}]\n'
+        out += f'crop size(h,w):           [{self.h}, {self.w}]\n'
+        out += f'resize crop to(h,w):      [{self.out_h}, {self.out_w}]'
+        return out
+def fix_randomness(seed=42):
+    random.seed(seed)
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+    torch.manual_seed(seed)
+    np.random.seed(seed)
+def worker_init_fn(worker_id):
+    np.random.seed(np.random.get_state()[1][0] + worker_id)
+def float_image_resize(img, shape, interp=PIL.Image.BILINEAR):
+    missing_channel = False
+    if len(img.shape) == 2:
+        missing_channel = True
+        img = img[..., None]
+    layers = []
+    img = img.transpose(2, 0, 1)
+    for l in img:
+        l = np.array(PIL.Image.fromarray(l).resize(shape[::-1], resample=interp))
+        assert l.shape[:2] == shape
+        layers.append(l)
+    if missing_channel:
+        return np.stack(layers, axis=-1)[..., 0]
+    else:
+        return np.stack(layers, axis=-1)
+def is_nan(x):
+    """
+    get mask of nan values.
+    :param x: torch or numpy var.
+    :return: a N-D array of bool. True -> nan, False -> ok.
+    """
+    return x != x
+def has_nan(x) -> bool:
+    """
+    check whether x contains nan.
+    :param x: torch or numpy var.
+    :return: single bool, True -> x containing nan, False -> ok.
+    """
+    if x is None:
+        return False
+    return is_nan(x).any()
+def confirm(question='OK to continue?'):
+    """
+    Ask user to enter Y or N (case-insensitive).
+    :return: True if the answer is Y.
+    :rtype: bool
+    """
+    answer = ""
+    while answer not in ["y", "n"]:
+        answer = input(question + ' [y/n] ').lower()
+    return answer == "y"
+def print_notification(content_list, notification_type='NOTIFICATION'):
+    print('---------------------- {0} ----------------------'.format(notification_type))
+    print()
+    for content in content_list:
+        print(content)
+    print()
+    print('----------------------------------------------------')
+def torch_img_to_np_img(torch_img):
+    '''convert a torch image to matplotlib-able numpy image
+    torch use Channels x Height x Width
+    numpy use Height x Width x Channels
+    Arguments:
+        torch_img {[type]} -- [description]
+    '''
+    assert isinstance(torch_img, torch.Tensor), 'cannot process data type: {0}'.format(type(torch_img))
+    if len(torch_img.shape) == 4 and (torch_img.shape[1] == 3 or torch_img.shape[1] == 1):
+        return np.transpose(torch_img.detach().cpu().numpy(), (0, 2, 3, 1))
+    if len(torch_img.shape) == 3 and (torch_img.shape[0] == 3 or torch_img.shape[0] == 1):
+        return np.transpose(torch_img.detach().cpu().numpy(), (1, 2, 0))
+    elif len(torch_img.shape) == 2:
+        return torch_img.detach().cpu().numpy()
+    else:
+        raise ValueError('cannot process this image')
+def np_img_to_torch_img(np_img):
+    """convert a numpy image to torch image
+    numpy use Height x Width x Channels
+    torch use Channels x Height x Width
+    Arguments:
+        np_img {[type]} -- [description]
+    """
+    assert isinstance(np_img, np.ndarray), 'cannot process data type: {0}'.format(type(np_img))
+    if len(np_img.shape) == 4 and (np_img.shape[3] == 3 or np_img.shape[3] == 1):
+        return torch.from_numpy(np.transpose(np_img, (0, 3, 1, 2)))
+    if len(np_img.shape) == 3 and (np_img.shape[2] == 3 or np_img.shape[2] == 1):
+        return torch.from_numpy(np.transpose(np_img, (2, 0, 1)))
+    elif len(np_img.shape) == 2:
+        return torch.from_numpy(np_img)
+    else:
+        raise ValueError('cannot process this image with shape: {0}'.format(np_img.shape))
+def safe_load_weights(model, saved_weights):
+    try:
+        model.load_state_dict(saved_weights)
+    except RuntimeError:
+        try:
+            weights = saved_weights
+            weights = {k.replace('module.', ''): v for k, v in weights.items()}
+            model.load_state_dict(weights)
+        except RuntimeError:
+            try:
+                weights = saved_weights
+                weights = {'module.' + k: v for k, v in weights.items()}
+                model.load_state_dict(weights)
+            except RuntimeError:
+                try:
+                    pretrained_dict = saved_weights
+                    model_dict = model.state_dict()
+                    pretrained_dict = {k: v for k, v in pretrained_dict.items() if ((k in model_dict) and (model_dict[k].shape == pretrained_dict[k].shape))}
+                    assert len(pretrained_dict) != 0
+                    model_dict.update(pretrained_dict)
+                    model.load_state_dict(model_dict)
+                    non_match_keys = set(model.state_dict().keys()) - set(pretrained_dict.keys())
+                    notification = []
+                    notification += ['pretrained weights PARTIALLY loaded, following are missing:']
+                    notification += [str(non_match_keys)]
+                    print_notification(notification, 'WARNING')
+                except Exception as e:
+                    print(f'pretrained weights loading failed {e}')
+                    exit()
+    print('weights safely loaded')
+def visualize_corrs(img1, img2, corrs, mask=None):
+    if mask is None:
+        mask = np.ones(len(corrs)).astype(bool)
+    scale1 = 1.0
+    scale2 = 1.0
+    if img1.shape[1] > img2.shape[1]:
+        scale2 = img1.shape[1] / img2.shape[1]
+        w = img1.shape[1]
+    else:
+        scale1 = img2.shape[1] / img1.shape[1]
+        w = img2.shape[1]
+    # Resize if too big
+    max_w = 400
+    if w > max_w:
+        scale1 *= max_w / w
+        scale2 *= max_w / w
+    img1 = cv2.resize(img1, (0, 0), fx=scale1, fy=scale1)
+    img2 = cv2.resize(img2, (0, 0), fx=scale2, fy=scale2)
+    x1, x2 = corrs[:, :2], corrs[:, 2:]
+    h1, w1 = img1.shape[:2]
+    h2, w2 = img2.shape[:2]
+    img = np.zeros((h1 + h2, max(w1, w2), 3), dtype=img1.dtype)
+    img[:h1, :w1] = img1
+    img[h1:, :w2] = img2
+    # Move keypoints to coordinates to image coordinates
+    x1 = x1 * scale1
+    x2 = x2 * scale2
+    # recompute the coordinates for the second image
+    x2p = x2 + np.array([[0, h1]])
+    fig = plt.figure(frameon=False)
+    fig = plt.imshow(img)
+    cols = [
+        [0.0, 0.67, 0.0],
+        [0.9, 0.1, 0.1],
+    ]
+    lw = .5
+    alpha = 1
+    # Draw outliers
+    _x1 = x1[~mask]
+    _x2p = x2p[~mask]
+    xs = np.stack([_x1[:, 0], _x2p[:, 0]], axis=1).T
+    ys = np.stack([_x1[:, 1], _x2p[:, 1]], axis=1).T
+    plt.plot(
+        xs, ys,
+        alpha=alpha,
+        linestyle="-",
+        linewidth=lw,
+        aa=False,
+        color=cols[1],
+    )
+    # Draw Inliers
+    _x1 = x1[mask]
+    _x2p = x2p[mask]
+    xs = np.stack([_x1[:, 0], _x2p[:, 0]], axis=1).T
+    ys = np.stack([_x1[:, 1], _x2p[:, 1]], axis=1).T
+    plt.plot(
+        xs, ys,
+        alpha=alpha,
+        linestyle="-",
+        linewidth=lw,
+        aa=False,
+        color=cols[0],
+    )
+    plt.scatter(xs, ys)
+    fig.axes.get_xaxis().set_visible(False)
+    fig.axes.get_yaxis().set_visible(False)
+    ax = plt.gca()
+    ax.set_axis_off()
+    plt.show()

third_party/COTR/LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

third_party/COTR/demo_face.py ADDED Viewed

	@@ -0,0 +1,69 @@

+'''
+COTR demo for human face
+We use an off-the-shelf face landmarks detector: https://github.com/1adrianb/face-alignment
+'''
+import argparse
+import os
+import time
+import cv2
+import numpy as np
+import torch
+import imageio
+import matplotlib.pyplot as plt
+from COTR.utils import utils, debug_utils
+from COTR.models import build_model
+from COTR.options.options import *
+from COTR.options.options_utils import *
+from COTR.inference.inference_helper import triangulate_corr
+from COTR.inference.sparse_engine import SparseEngine
+utils.fix_randomness(0)
+torch.set_grad_enabled(False)
+def main(opt):
+    model = build_model(opt)
+    model = model.cuda()
+    weights = torch.load(opt.load_weights_path, map_location='cpu')['model_state_dict']
+    utils.safe_load_weights(model, weights)
+    model = model.eval()
+    img_a = imageio.imread('./sample_data/imgs/face_1.png', pilmode='RGB')
+    img_b = imageio.imread('./sample_data/imgs/face_2.png', pilmode='RGB')
+    queries = np.load('./sample_data/face_landmarks.npy')[0]
+    engine = SparseEngine(model, 32, mode='stretching')
+    corrs = engine.cotr_corr_multiscale(img_a, img_b, np.linspace(0.5, 0.0625, 4), 1, queries_a=queries, force=False)
+    f, axarr = plt.subplots(1, 2)
+    axarr[0].imshow(img_a)
+    axarr[0].scatter(*queries.T, s=1)
+    axarr[0].title.set_text('Reference Face')
+    axarr[0].axis('off')
+    axarr[1].imshow(img_b)
+    axarr[1].scatter(*corrs[:, 2:].T, s=1)
+    axarr[1].title.set_text('Target Face')
+    axarr[1].axis('off')
+    plt.show()
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    set_COTR_arguments(parser)
+    parser.add_argument('--out_dir', type=str, default=general_config['out'], help='out directory')
+    parser.add_argument('--load_weights', type=str, default=None, help='load a pretrained set of weights, you need to provide the model id')
+    opt = parser.parse_args()
+    opt.command = ' '.join(sys.argv)
+    layer_2_channels = {'layer1': 256,
+                        'layer2': 512,
+                        'layer3': 1024,
+                        'layer4': 2048, }
+    opt.dim_feedforward = layer_2_channels[opt.layer]
+    if opt.load_weights:
+        opt.load_weights_path = os.path.join(opt.out_dir, opt.load_weights, 'checkpoint.pth.tar')
+    print_opt(opt)
+    main(opt)

third_party/COTR/demo_guided_matching.py ADDED Viewed

	@@ -0,0 +1,85 @@

+'''
+Feature-free COTR guided matching for keypoints.
+We use DISK(https://github.com/cvlab-epfl/disk) keypoints location.
+We apply RANSAC + F matrix to further prune outliers.
+Note: This script doesn't use descriptors.
+'''
+import argparse
+import os
+import time
+import cv2
+import numpy as np
+import torch
+import imageio
+from scipy.spatial import distance_matrix
+from COTR.utils import utils, debug_utils
+from COTR.models import build_model
+from COTR.options.options import *
+from COTR.options.options_utils import *
+from COTR.inference.sparse_engine import SparseEngine, FasterSparseEngine
+utils.fix_randomness(0)
+torch.set_grad_enabled(False)
+def main(opt):
+    model = build_model(opt)
+    model = model.cuda()
+    weights = torch.load(opt.load_weights_path)['model_state_dict']
+    utils.safe_load_weights(model, weights)
+    model = model.eval()
+    img_a = imageio.imread('./sample_data/imgs/21526113_4379776807.jpg')
+    img_b = imageio.imread('./sample_data/imgs/21126421_4537535153.jpg')
+    kp_a = np.load('./sample_data/21526113_4379776807.jpg.disk.kpts.npy')
+    kp_b = np.load('./sample_data/21126421_4537535153.jpg.disk.kpts.npy')
+    if opt.faster_infer:
+        engine = FasterSparseEngine(model, 32, mode='tile')
+    else:
+        engine = SparseEngine(model, 32, mode='tile')
+    t0 = time.time()
+    corrs_a_b = engine.cotr_corr_multiscale(img_a, img_b, np.linspace(0.5, 0.0625, 4), 1, max_corrs=kp_a.shape[0], queries_a=kp_a, force=True)
+    corrs_b_a = engine.cotr_corr_multiscale(img_b, img_a, np.linspace(0.5, 0.0625, 4), 1, max_corrs=kp_b.shape[0], queries_a=kp_b, force=True)
+    t1 = time.time()
+    print(f'COTR spent {t1-t0} seconds.')
+    inds_a_b = np.argmin(distance_matrix(corrs_a_b[:, 2:], kp_b), axis=1)
+    matched_a_b = np.stack([np.arange(kp_a.shape[0]), inds_a_b]).T
+    inds_b_a = np.argmin(distance_matrix(corrs_b_a[:, 2:], kp_a), axis=1)
+    matched_b_a = np.stack([np.arange(kp_b.shape[0]), inds_b_a]).T
+    good = 0
+    final_matches = []
+    for m_ab in matched_a_b:
+        for m_ba in matched_b_a:
+            if (m_ab == m_ba[::-1]).all():
+                good += 1
+                final_matches.append(m_ab)
+                break
+    final_matches = np.array(final_matches)
+    final_corrs = np.concatenate([kp_a[final_matches[:, 0]], kp_b[final_matches[:, 1]]], axis=1)
+    _, mask = cv2.findFundamentalMat(final_corrs[:, :2], final_corrs[:, 2:], cv2.FM_RANSAC, ransacReprojThreshold=5, confidence=0.999999)
+    utils.visualize_corrs(img_a, img_b, final_corrs[np.where(mask[:, 0])])
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    set_COTR_arguments(parser)
+    parser.add_argument('--out_dir', type=str, default=general_config['out'], help='out directory')
+    parser.add_argument('--load_weights', type=str, default=None, help='load a pretrained set of weights, you need to provide the model id')
+    parser.add_argument('--faster_infer', type=str2bool, default=False, help='use fatser inference')
+    opt = parser.parse_args()
+    opt.command = ' '.join(sys.argv)
+    layer_2_channels = {'layer1': 256,
+                        'layer2': 512,
+                        'layer3': 1024,
+                        'layer4': 2048, }
+    opt.dim_feedforward = layer_2_channels[opt.layer]
+    if opt.load_weights:
+        opt.load_weights_path = os.path.join(opt.out_dir, opt.load_weights, 'checkpoint.pth.tar')
+    print_opt(opt)
+    main(opt)

third_party/COTR/demo_homography.py ADDED Viewed

	@@ -0,0 +1,84 @@

+'''
+COTR demo for homography estimation
+'''
+import argparse
+import os
+import time
+import cv2
+import numpy as np
+import torch
+import imageio
+import matplotlib.pyplot as plt
+from COTR.utils import utils, debug_utils
+from COTR.models import build_model
+from COTR.options.options import *
+from COTR.options.options_utils import *
+from COTR.inference.inference_helper import triangulate_corr
+from COTR.inference.sparse_engine import SparseEngine
+utils.fix_randomness(0)
+torch.set_grad_enabled(False)
+def main(opt):
+    model = build_model(opt)
+    model = model.cuda()
+    weights = torch.load(opt.load_weights_path, map_location='cpu')['model_state_dict']
+    utils.safe_load_weights(model, weights)
+    model = model.eval()
+    img_a = imageio.imread('./sample_data/imgs/paint_1.JPG', pilmode='RGB')
+    img_b = imageio.imread('./sample_data/imgs/paint_2.jpg', pilmode='RGB')
+    rep_img = imageio.imread('./sample_data/imgs/Meisje_met_de_parel.jpg', pilmode='RGB')
+    rep_mask = np.ones(rep_img.shape[:2])
+    lu_corner = [932, 1025]
+    ru_corner = [2469, 901]
+    lb_corner = [908, 2927]
+    rb_corner = [2436, 3080]
+    queries = np.array([lu_corner, ru_corner, lb_corner, rb_corner]).astype(np.float32)
+    rep_coord = np.array([[0, 0], [rep_img.shape[1], 0], [0, rep_img.shape[0]], [rep_img.shape[1], rep_img.shape[0]]]).astype(np.float32)
+    engine = SparseEngine(model, 32, mode='stretching')
+    corrs = engine.cotr_corr_multiscale(img_a, img_b, np.linspace(0.5, 0.0625, 4), 1, queries_a=queries, force=True)
+    T = cv2.getPerspectiveTransform(rep_coord, corrs[:, 2:].astype(np.float32))
+    vmask = cv2.warpPerspective(rep_mask, T, (img_b.shape[1], img_b.shape[0])) > 0
+    warped = cv2.warpPerspective(rep_img, T, (img_b.shape[1], img_b.shape[0]))
+    out = warped * vmask[..., None] + img_b * (~vmask[..., None])
+    f, axarr = plt.subplots(1, 4)
+    axarr[0].imshow(rep_img)
+    axarr[0].title.set_text('Virtual Paint')
+    axarr[0].axis('off')
+    axarr[1].imshow(img_a)
+    axarr[1].title.set_text('Annotated Frame')
+    axarr[1].axis('off')
+    axarr[2].imshow(img_b)
+    axarr[2].title.set_text('Target Frame')
+    axarr[2].axis('off')
+    axarr[3].imshow(out)
+    axarr[3].title.set_text('Overlay')
+    axarr[3].axis('off')
+    plt.show()
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    set_COTR_arguments(parser)
+    parser.add_argument('--out_dir', type=str, default=general_config['out'], help='out directory')
+    parser.add_argument('--load_weights', type=str, default=None, help='load a pretrained set of weights, you need to provide the model id')
+    opt = parser.parse_args()
+    opt.command = ' '.join(sys.argv)
+    layer_2_channels = {'layer1': 256,
+                        'layer2': 512,
+                        'layer3': 1024,
+                        'layer4': 2048, }
+    opt.dim_feedforward = layer_2_channels[opt.layer]
+    if opt.load_weights:
+        opt.load_weights_path = os.path.join(opt.out_dir, opt.load_weights, 'checkpoint.pth.tar')
+    print_opt(opt)
+    main(opt)

third_party/COTR/demo_reconstruction.py ADDED Viewed

	@@ -0,0 +1,92 @@

+'''
+COTR two view reconstruction with known extrinsic/intrinsic demo
+'''
+import argparse
+import os
+import time
+import numpy as np
+import torch
+import imageio
+import open3d as o3d
+from COTR.utils import utils, debug_utils
+from COTR.models import build_model
+from COTR.options.options import *
+from COTR.options.options_utils import *
+from COTR.inference.sparse_engine import SparseEngine, FasterSparseEngine
+from COTR.projector import pcd_projector
+utils.fix_randomness(0)
+torch.set_grad_enabled(False)
+def triangulate_rays_to_pcd(center_a, dir_a, center_b, dir_b):
+    A = center_a
+    a = dir_a / np.linalg.norm(dir_a, axis=1, keepdims=True)
+    B = center_b
+    b = dir_b / np.linalg.norm(dir_b, axis=1, keepdims=True)
+    c = B - A
+    D = A + a * ((-np.sum(a * b, axis=1) * np.sum(b * c, axis=1) + np.sum(a * c, axis=1) * np.sum(b * b, axis=1)) / (np.sum(a * a, axis=1) * np.sum(b * b, axis=1) - np.sum(a * b, axis=1) * np.sum(a * b, axis=1)))[..., None]
+    return D
+def main(opt):
+    model = build_model(opt)
+    model = model.cuda()
+    weights = torch.load(opt.load_weights_path, map_location='cpu')['model_state_dict']
+    utils.safe_load_weights(model, weights)
+    model = model.eval()
+    img_a = imageio.imread('./sample_data/imgs/img_0.jpg', pilmode='RGB')
+    img_b = imageio.imread('./sample_data/imgs/img_1.jpg', pilmode='RGB')
+    if opt.faster_infer:
+        engine = FasterSparseEngine(model, 32, mode='tile')
+    else:
+        engine = SparseEngine(model, 32, mode='tile')
+    t0 = time.time()
+    corrs = engine.cotr_corr_multiscale_with_cycle_consistency(img_a, img_b, np.linspace(0.5, 0.0625, 4), 1, max_corrs=opt.max_corrs, queries_a=None)
+    t1 = time.time()
+    print(f'spent {t1-t0} seconds for {opt.max_corrs} correspondences.')
+    camera_a = np.load('./sample_data/camera_0.npy', allow_pickle=True).item()
+    camera_b = np.load('./sample_data/camera_1.npy', allow_pickle=True).item()
+    center_a = camera_a['cam_center']
+    center_b = camera_b['cam_center']
+    rays_a = pcd_projector.PointCloudProjector.pcd_2d_to_pcd_3d_np(corrs[:, :2], np.ones([corrs.shape[0], 1]) * 2, camera_a['intrinsic'], motion=camera_a['c2w'])
+    rays_b = pcd_projector.PointCloudProjector.pcd_2d_to_pcd_3d_np(corrs[:, 2:], np.ones([corrs.shape[0], 1]) * 2, camera_b['intrinsic'], motion=camera_b['c2w'])
+    dir_a = rays_a - center_a
+    dir_b = rays_b - center_b
+    center_a = np.array([center_a] * corrs.shape[0])
+    center_b = np.array([center_b] * corrs.shape[0])
+    points = triangulate_rays_to_pcd(center_a, dir_a, center_b, dir_b)
+    colors = (img_a[tuple(np.floor(corrs[:, :2]).astype(int)[:, ::-1].T)] / 255 + img_b[tuple(np.floor(corrs[:, 2:]).astype(int)[:, ::-1].T)] / 255) / 2
+    colors = np.array(colors)
+    pcd = o3d.geometry.PointCloud()
+    pcd.points = o3d.utility.Vector3dVector(points)
+    pcd.colors = o3d.utility.Vector3dVector(colors)
+    o3d.visualization.draw_geometries([pcd])
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    set_COTR_arguments(parser)
+    parser.add_argument('--out_dir', type=str, default=general_config['out'], help='out directory')
+    parser.add_argument('--load_weights', type=str, default=None, help='load a pretrained set of weights, you need to provide the model id')
+    parser.add_argument('--max_corrs', type=int, default=2048, help='number of correspondences')
+    parser.add_argument('--faster_infer', type=str2bool, default=False, help='use fatser inference')
+    opt = parser.parse_args()
+    opt.command = ' '.join(sys.argv)
+    layer_2_channels = {'layer1': 256,
+                        'layer2': 512,
+                        'layer3': 1024,
+                        'layer4': 2048, }
+    opt.dim_feedforward = layer_2_channels[opt.layer]
+    if opt.load_weights:
+        opt.load_weights_path = os.path.join(opt.out_dir, opt.load_weights, 'checkpoint.pth.tar')
+    print_opt(opt)
+    main(opt)

third_party/COTR/demo_single_pair.py ADDED Viewed

	@@ -0,0 +1,66 @@

+'''
+COTR demo for a single image pair
+'''
+import argparse
+import os
+import time
+import cv2
+import numpy as np
+import torch
+import imageio
+import matplotlib.pyplot as plt
+from COTR.utils import utils, debug_utils
+from COTR.models import build_model
+from COTR.options.options import *
+from COTR.options.options_utils import *
+from COTR.inference.inference_helper import triangulate_corr
+from COTR.inference.sparse_engine import SparseEngine
+utils.fix_randomness(0)
+torch.set_grad_enabled(False)
+def main(opt):
+    model = build_model(opt)
+    model = model.cuda()
+    weights = torch.load(opt.load_weights_path, map_location='cpu')['model_state_dict']
+    utils.safe_load_weights(model, weights)
+    model = model.eval()
+    img_a = imageio.imread('./sample_data/imgs/cathedral_1.jpg', pilmode='RGB')
+    img_b = imageio.imread('./sample_data/imgs/cathedral_2.jpg', pilmode='RGB')
+    engine = SparseEngine(model, 32, mode='tile')
+    t0 = time.time()
+    corrs = engine.cotr_corr_multiscale_with_cycle_consistency(img_a, img_b, np.linspace(0.5, 0.0625, 4), 1, max_corrs=opt.max_corrs, queries_a=None)
+    t1 = time.time()
+    utils.visualize_corrs(img_a, img_b, corrs)
+    print(f'spent {t1-t0} seconds for {opt.max_corrs} correspondences.')
+    dense = triangulate_corr(corrs, img_a.shape, img_b.shape)
+    warped = cv2.remap(img_b, dense[..., 0].astype(np.float32), dense[..., 1].astype(np.float32), interpolation=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT)
+    plt.imshow(warped / 255 * 0.5 + img_a / 255 * 0.5)
+    plt.show()
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    set_COTR_arguments(parser)
+    parser.add_argument('--out_dir', type=str, default=general_config['out'], help='out directory')
+    parser.add_argument('--load_weights', type=str, default=None, help='load a pretrained set of weights, you need to provide the model id')
+    parser.add_argument('--max_corrs', type=int, default=100, help='number of correspondences')
+    opt = parser.parse_args()
+    opt.command = ' '.join(sys.argv)
+    layer_2_channels = {'layer1': 256,
+                        'layer2': 512,
+                        'layer3': 1024,
+                        'layer4': 2048, }
+    opt.dim_feedforward = layer_2_channels[opt.layer]
+    if opt.load_weights:
+        opt.load_weights_path = os.path.join(opt.out_dir, opt.load_weights, 'checkpoint.pth.tar')
+    print_opt(opt)
+    main(opt)

third_party/COTR/demo_wbs.py ADDED Viewed

	@@ -0,0 +1,71 @@

+'''
+Manually passing scale to COTR, skip the scale difference estimation.
+'''
+import argparse
+import os
+import time
+import cv2
+import numpy as np
+import torch
+import imageio
+from scipy.spatial import distance_matrix
+import matplotlib.pyplot as plt
+from COTR.utils import utils, debug_utils
+from COTR.models import build_model
+from COTR.options.options import *
+from COTR.options.options_utils import *
+from COTR.inference.sparse_engine import SparseEngine
+utils.fix_randomness(0)
+torch.set_grad_enabled(False)
+def main(opt):
+    model = build_model(opt)
+    model = model.cuda()
+    weights = torch.load(opt.load_weights_path)['model_state_dict']
+    utils.safe_load_weights(model, weights)
+    model = model.eval()
+    img_a = imageio.imread('./sample_data/imgs/petrzin_01.png')
+    img_b = imageio.imread('./sample_data/imgs/petrzin_02.png')
+    img_a_area = 1.0
+    img_b_area = 1.0
+    gt_corrs = np.loadtxt('./sample_data/petrzin_pts.txt')
+    kp_a = gt_corrs[:, :2]
+    kp_b = gt_corrs[:, 2:]
+    engine = SparseEngine(model, 32, mode='tile')
+    t0 = time.time()
+    corrs = engine.cotr_corr_multiscale(img_a, img_b, np.linspace(0.75, 0.1, 4), 1, max_corrs=kp_a.shape[0], queries_a=kp_a, force=True, areas=[img_a_area, img_b_area])
+    t1 = time.time()
+    print(f'COTR spent {t1-t0} seconds.')
+    utils.visualize_corrs(img_a, img_b, corrs)
+    plt.imshow(img_b)
+    plt.scatter(kp_b[:,0], kp_b[:,1])
+    plt.scatter(corrs[:,2], corrs[:,3])
+    plt.plot(np.stack([kp_b[:,0], corrs[:,2]], axis=1).T, np.stack([kp_b[:,1], corrs[:,3]], axis=1).T, color=[1,0,0])
+    plt.show()
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    set_COTR_arguments(parser)
+    parser.add_argument('--out_dir', type=str, default=general_config['out'], help='out directory')
+    parser.add_argument('--load_weights', type=str, default=None, help='load a pretrained set of weights, you need to provide the model id')
+    opt = parser.parse_args()
+    opt.command = ' '.join(sys.argv)
+    layer_2_channels = {'layer1': 256,
+                        'layer2': 512,
+                        'layer3': 1024,
+                        'layer4': 2048, }
+    opt.dim_feedforward = layer_2_channels[opt.layer]
+    if opt.load_weights:
+        opt.load_weights_path = os.path.join(opt.out_dir, opt.load_weights, 'checkpoint.pth.tar')
+    print_opt(opt)
+    main(opt)

third_party/COTR/environment.yml ADDED Viewed

	@@ -0,0 +1,104 @@

+name: cotr_env
+channels:
+  - pytorch
+  - defaults
+dependencies:
+  - _libgcc_mutex=0.1=main
+  - backcall=0.2.0=pyhd3eb1b0_0
+  - blas=1.0=mkl
+  - bzip2=1.0.8=h7b6447c_0
+  - ca-certificates=2021.4.13=h06a4308_1
+  - cairo=1.16.0=hf32fb01_1
+  - certifi=2020.12.5=py37h06a4308_0
+  - cudatoolkit=10.2.89=hfd86e86_1
+  - cycler=0.10.0=py37_0
+  - dbus=1.13.18=hb2f20db_0
+  - decorator=5.0.6=pyhd3eb1b0_0
+  - expat=2.3.0=h2531618_2
+  - ffmpeg=4.0=hcdf2ecd_0
+  - fontconfig=2.13.1=h6c09931_0
+  - freeglut=3.0.0=hf484d3e_5
+  - freetype=2.10.4=h5ab3b9f_0
+  - glib=2.68.1=h36276a3_0
+  - graphite2=1.3.14=h23475e2_0
+  - gst-plugins-base=1.14.0=h8213a91_2
+  - gstreamer=1.14.0=h28cd5cc_2
+  - harfbuzz=1.8.8=hffaf4a1_0
+  - hdf5=1.10.2=hba1933b_1
+  - icu=58.2=he6710b0_3
+  - imageio=2.9.0=pyhd3eb1b0_0
+  - intel-openmp=2021.2.0=h06a4308_610
+  - ipython=7.22.0=py37hb070fc8_0
+  - ipython_genutils=0.2.0=pyhd3eb1b0_1
+  - jasper=2.0.14=h07fcdf6_1
+  - jedi=0.17.0=py37_0
+  - jpeg=9b=h024ee3a_2
+  - kiwisolver=1.3.1=py37h2531618_0
+  - lcms2=2.12=h3be6417_0
+  - ld_impl_linux-64=2.33.1=h53a641e_7
+  - libffi=3.3=he6710b0_2
+  - libgcc-ng=9.1.0=hdf63c60_0
+  - libgfortran-ng=7.3.0=hdf63c60_0
+  - libglu=9.0.0=hf484d3e_1
+  - libopencv=3.4.2=hb342d67_1
+  - libopus=1.3.1=h7b6447c_0
+  - libpng=1.6.37=hbc83047_0
+  - libstdcxx-ng=9.1.0=hdf63c60_0
+  - libtiff=4.1.0=h2733197_1
+  - libuuid=1.0.3=h1bed415_2
+  - libuv=1.40.0=h7b6447c_0
+  - libvpx=1.7.0=h439df22_0
+  - libxcb=1.14=h7b6447c_0
+  - libxml2=2.9.10=hb55368b_3
+  - lz4-c=1.9.3=h2531618_0
+  - matplotlib=3.3.4=py37h06a4308_0
+  - matplotlib-base=3.3.4=py37h62a2d02_0
+  - mkl=2020.2=256
+  - mkl-service=2.3.0=py37he8ac12f_0
+  - mkl_fft=1.3.0=py37h54f3939_0
+  - mkl_random=1.1.1=py37h0573a6f_0
+  - ncurses=6.2=he6710b0_1
+  - ninja=1.10.2=hff7bd54_1
+  - numpy=1.19.2=py37h54aff64_0
+  - numpy-base=1.19.2=py37hfa32c7d_0
+  - olefile=0.46=py37_0
+  - opencv=3.4.2=py37h6fd60c2_1
+  - openssl=1.1.1k=h27cfd23_0
+  - parso=0.8.2=pyhd3eb1b0_0
+  - pcre=8.44=he6710b0_0
+  - pexpect=4.8.0=pyhd3eb1b0_3
+  - pickleshare=0.7.5=pyhd3eb1b0_1003
+  - pillow=8.2.0=py37he98fc37_0
+  - pip=21.0.1=py37h06a4308_0
+  - pixman=0.40.0=h7b6447c_0
+  - prompt-toolkit=3.0.17=pyh06a4308_0
+  - ptyprocess=0.7.0=pyhd3eb1b0_2
+  - py-opencv=3.4.2=py37hb342d67_1
+  - pygments=2.8.1=pyhd3eb1b0_0
+  - pyparsing=2.4.7=pyhd3eb1b0_0
+  - pyqt=5.9.2=py37h05f1152_2
+  - python=3.7.10=hdb3f193_0
+  - python-dateutil=2.8.1=pyhd3eb1b0_0
+  - pytorch=1.7.1=py3.7_cuda10.2.89_cudnn7.6.5_0
+  - qt=5.9.7=h5867ecd_1
+  - readline=8.1=h27cfd23_0
+  - scipy=1.2.1=py37h7c811a0_0
+  - setuptools=52.0.0=py37h06a4308_0
+  - sip=4.19.8=py37hf484d3e_0
+  - six=1.15.0=py37h06a4308_0
+  - sqlite=3.35.4=hdfb4753_0
+  - tk=8.6.10=hbc83047_0
+  - torchaudio=0.7.2=py37
+  - torchvision=0.8.2=py37_cu102
+  - tornado=6.1=py37h27cfd23_0
+  - tqdm=4.59.0=pyhd3eb1b0_1
+  - traitlets=5.0.5=pyhd3eb1b0_0
+  - typing_extensions=3.7.4.3=pyha847dfd_0
+  - vispy=0.5.3=py37hee6b756_0
+  - wcwidth=0.2.5=py_0
+  - wheel=0.36.2=pyhd3eb1b0_0
+  - xz=5.2.5=h7b6447c_0
+  - zlib=1.2.11=h7b6447c_3
+  - zstd=1.4.9=haebb681_0
+  - pip:
+    - tables==3.6.1

third_party/COTR/out/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

third_party/COTR/out/.placeholder ADDED Viewed

File without changes

third_party/COTR/out/default/checkpoint.pth.tar ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:abfa1183408dc566535146b41508ed02084d5f5d1a150f5c188ee479463d6d5c
+size 219363688